i have an html table to parse
how do i calculate the cell Column Index faster
What is the “cell column index”? And how do you calculate it using Go now?
i use goquery to parse html,but i have an problem
when i parsing row 1,column 1,it has vertical merge ,when i parsing “2c”,i donot know how to calculate it’s column index,maybe show my code
func parseTable(s *goquery.Selection) (rowCount, colCount int, tableCellMap map[string]*model.TableCell) {
cells = make([]*model.TableCell, 0)
tableCellMap = make(map[string]*model.TableCell)
cellMap := make(map[string]string)
tableRowSelection := s.Find("tbody tr")
if tableRowSelection.Nodes != nil {
rowCount = len(tableRowSelection.Nodes)
colCount = 0
tableRowSelection.Each(func(i int, selection *goquery.Selection) {
cc := parseTableRow(i, selection, cellMap, tableCellMap)
if cc > colCount {
colCount = cc
}
})
}
return
}
func parseTableRow(rowIndex int, s *goquery.Selection, cellMap map[string]string, tableCellMap map[string]*model.TableCell) (colCount int) {
tableColSeletion := s.Find("td")
cellMergeCount := 0
for colIndex, node := range tableColSeletion.Nodes {
rowSpan := 0
colSpan := 0
for _, attr := range node.Attr {
if attr.Key == "colspan" {
col, err := strconv.Atoi(attr.Val)
if err != nil {
log.Fatalln(err)
}
if col == 1 {
continue
}
colSpan = col
} else if attr.Key == "rowspan" {
row, err := strconv.Atoi(attr.Val)
if err != nil {
log.Fatalln(err)
}
if row == 1 {
continue
}
rowSpan = row
}
}
value := node.FirstChild.Data
if rowSpan == 0 && colSpan == 0 {
// 先要确定这个格子的索引
for ci := 0; ci < math.MaxInt8; ci++ {
cellKey := utils.GetCellKey(rowIndex, colIndex+ci)
_, ok := cellMap[cellKey]
if !ok {
cellMap[cellKey] = value
cell := &model.TableCell{RowIndex: rowIndex, ColIndex: colIndex + ci, Value: value}
tableCellMap[cellKey] = cell
cells = append(cells, cell)
break
}
}
}
if rowSpan != 0 && colSpan == 0 {
for ri := 0; ri < rowSpan; ri++ {
cellKey := utils.GetCellKey(rowIndex+ri, colIndex+cellMergeCount)
cellMap[cellKey] = value
if rowIndex != rowIndex+rowSpan-1 {
if !utils.IsCellInMergeCellScope(cellKey, tableCellMap) {
cell := &model.TableCell{RowIndex: rowIndex + ri, ColIndex: colIndex + cellMergeCount, VMerge: rowSpan, Value: value}
tableCellMap[cellKey] = cell
cells = append(cells, cell)
}
}
}
} else if rowSpan == 0 && colSpan != 0 {
for ci := 0; ci < colSpan; ci++ {
cellKey := utils.GetCellKey(rowIndex, colIndex+ci+cellMergeCount)
cellMap[cellKey] = value
if colIndex != colSpan-1 {
if !utils.IsCellInMergeCellScope(cellKey, tableCellMap) {
cell := &model.TableCell{RowIndex: rowIndex, ColIndex: colIndex + ci + cellMergeCount, HMerge: colSpan, Value: value}
tableCellMap[cellKey] = cell
cells = append(cells, cell)
}
}
}
cellMergeCount += colSpan - 1
} else if rowSpan != 0 && colSpan != 0 {
// 计算每个格子的值
for ri := 0; ri < rowSpan; ri++ {
for ci := 0; ci < colSpan; ci++ {
cellKey := utils.GetCellKey(rowIndex+ri, colIndex+ci+cellMergeCount)
cellMap[cellKey] = value
if !utils.IsCellInMergeCellScope(cellKey, tableCellMap) {
cell := &model.TableCell{RowIndex: rowIndex + ri, ColIndex: colIndex + ci + cellMergeCount, VMerge: rowSpan, HMerge: colSpan, Value: value}
tableCellMap[cellKey] = cell
cells = append(cells, cell)
}
}
}
cellMergeCount += colSpan - 1
}
}
colCount = cellMergeCount + len(tableColSeletion.Nodes)
return
}
This topic was automatically closed 90 days after the last reply. New replies are no longer allowed.