Parse html table

i have an html table to parse



how do i calculate the cell Column Index faster

What is the “cell column index”? And how do you calculate it using Go now?

i use goquery to parse html,but i have an problem
when i parsing row 1,column 1,it has vertical merge ,when i parsing “2c”,i donot know how to calculate it’s column index,maybe show my code

func parseTable(s *goquery.Selection) (rowCount, colCount int, tableCellMap map[string]*model.TableCell) {
	cells = make([]*model.TableCell, 0)
	tableCellMap = make(map[string]*model.TableCell)
	cellMap := make(map[string]string)
	tableRowSelection := s.Find("tbody tr")
	if tableRowSelection.Nodes != nil {
		rowCount = len(tableRowSelection.Nodes)
		colCount = 0
		tableRowSelection.Each(func(i int, selection *goquery.Selection) {
			cc := parseTableRow(i, selection, cellMap, tableCellMap)
			if cc > colCount {
				colCount = cc
			}
		})
	}
	return
}

func parseTableRow(rowIndex int, s *goquery.Selection, cellMap map[string]string, tableCellMap map[string]*model.TableCell) (colCount int) {
	tableColSeletion := s.Find("td")
	cellMergeCount := 0
	for colIndex, node := range tableColSeletion.Nodes {
		rowSpan := 0
		colSpan := 0
		for _, attr := range node.Attr {
			if attr.Key == "colspan" {
				col, err := strconv.Atoi(attr.Val)
				if err != nil {
					log.Fatalln(err)
				}
				if col == 1 {
					continue
				}
				colSpan = col
			} else if attr.Key == "rowspan" {
				row, err := strconv.Atoi(attr.Val)
				if err != nil {
					log.Fatalln(err)
				}
				if row == 1 {
					continue
				}
				rowSpan = row
			}
		}

		value := node.FirstChild.Data

		if rowSpan == 0 && colSpan == 0 {
			// 先要确定这个格子的索引
			for ci := 0; ci < math.MaxInt8; ci++ {
				cellKey := utils.GetCellKey(rowIndex, colIndex+ci)
				_, ok := cellMap[cellKey]
				if !ok {
					cellMap[cellKey] = value
					cell := &model.TableCell{RowIndex: rowIndex, ColIndex: colIndex + ci, Value: value}
					tableCellMap[cellKey] = cell
					cells = append(cells, cell)
					break
				}
			}
		}

		if rowSpan != 0 && colSpan == 0 {
			for ri := 0; ri < rowSpan; ri++ {
				cellKey := utils.GetCellKey(rowIndex+ri, colIndex+cellMergeCount)
				cellMap[cellKey] = value
				if rowIndex != rowIndex+rowSpan-1 {
					if !utils.IsCellInMergeCellScope(cellKey, tableCellMap) {
						cell := &model.TableCell{RowIndex: rowIndex + ri, ColIndex: colIndex + cellMergeCount, VMerge: rowSpan, Value: value}
						tableCellMap[cellKey] = cell
						cells = append(cells, cell)
					}
				}
			}
		} else if rowSpan == 0 && colSpan != 0 {
			for ci := 0; ci < colSpan; ci++ {
				cellKey := utils.GetCellKey(rowIndex, colIndex+ci+cellMergeCount)
				cellMap[cellKey] = value
				if colIndex != colSpan-1 {
					if !utils.IsCellInMergeCellScope(cellKey, tableCellMap) {
						cell := &model.TableCell{RowIndex: rowIndex, ColIndex: colIndex + ci + cellMergeCount, HMerge: colSpan, Value: value}
						tableCellMap[cellKey] = cell
						cells = append(cells, cell)
					}
				}
			}
			cellMergeCount += colSpan - 1
		} else if rowSpan != 0 && colSpan != 0 {
			// 计算每个格子的值
			for ri := 0; ri < rowSpan; ri++ {
				for ci := 0; ci < colSpan; ci++ {
					cellKey := utils.GetCellKey(rowIndex+ri, colIndex+ci+cellMergeCount)
					cellMap[cellKey] = value
					if !utils.IsCellInMergeCellScope(cellKey, tableCellMap) {
						cell := &model.TableCell{RowIndex: rowIndex + ri, ColIndex: colIndex + ci + cellMergeCount, VMerge: rowSpan, HMerge: colSpan, Value: value}
						tableCellMap[cellKey] = cell
						cells = append(cells, cell)
					}
				}
			}
			cellMergeCount += colSpan - 1
		}
	}
	colCount = cellMergeCount + len(tableColSeletion.Nodes)
	return
}

This topic was automatically closed 90 days after the last reply. New replies are no longer allowed.