Breaking change: remove `TotalRows` of row iterator and performance optimization

Reduce allocation memory 20%, and 80% GC times for the row's iterator
pull/2/head
xuri 3 years ago
parent 50c4dedf8d
commit 4daa6ed0b4
No known key found for this signature in database
GPG Key ID: BA5E5BB1C948EDF7

@ -340,6 +340,14 @@ func TestGetCellType(t *testing.T) {
assert.EqualError(t, err, newCellNameToCoordinatesError("A", newInvalidCellNameError("A")).Error()) assert.EqualError(t, err, newCellNameToCoordinatesError("A", newInvalidCellNameError("A")).Error())
} }
func TestGetValueFrom(t *testing.T) {
f := NewFile()
c := xlsxC{T: "s"}
value, err := c.getValueFrom(f, f.sharedStringsReader(), false)
assert.NoError(t, err)
assert.Equal(t, "", value)
}
func TestGetCellFormula(t *testing.T) { func TestGetCellFormula(t *testing.T) {
// Test get cell formula on not exist worksheet. // Test get cell formula on not exist worksheet.
f := NewFile() f := NewFile()

@ -68,29 +68,49 @@ func (f *File) GetRows(sheet string, opts ...Options) ([][]string, error) {
// Rows defines an iterator to a sheet. // Rows defines an iterator to a sheet.
type Rows struct { type Rows struct {
err error err error
curRow, totalRows, stashRow int curRow, seekRow int
rawCellValue bool needClose, rawCellValue bool
sheet string sheet string
f *File f *File
tempFile *os.File tempFile *os.File
decoder *xml.Decoder sst *xlsxSST
decoder *xml.Decoder
token xml.Token
} }
// CurrentRow returns the row number that represents the current row. // CurrentRow returns the row number that represents the current row.
func (rows *Rows) CurrentRow() int { func (rows *Rows) CurrentRow() int {
return rows.curRow return rows.seekRow
}
// TotalRows returns the total rows count in the worksheet.
func (rows *Rows) TotalRows() int {
return rows.totalRows
} }
// Next will return true if find the next row element. // Next will return true if find the next row element.
func (rows *Rows) Next() bool { func (rows *Rows) Next() bool {
rows.curRow++ rows.seekRow++
return rows.curRow <= rows.totalRows if rows.curRow >= rows.seekRow {
return true
}
for {
token, _ := rows.decoder.Token()
if token == nil {
return false
}
switch xmlElement := token.(type) {
case xml.StartElement:
if xmlElement.Name.Local == "row" {
rows.curRow++
if rowNum, _ := attrValToInt("r", xmlElement.Attr); rowNum != 0 {
rows.curRow = rowNum
}
rows.token = token
return true
}
case xml.EndElement:
if xmlElement.Name.Local == "sheetData" {
return false
}
}
}
} }
// Error will return the error when the error occurs. // Error will return the error when the error occurs.
@ -109,44 +129,40 @@ func (rows *Rows) Close() error {
// Columns return the current row's column values. // Columns return the current row's column values.
func (rows *Rows) Columns(opts ...Options) ([]string, error) { func (rows *Rows) Columns(opts ...Options) ([]string, error) {
var rowIterator rowXMLIterator if rows.curRow > rows.seekRow {
if rows.stashRow >= rows.curRow { return nil, nil
return rowIterator.columns, rowIterator.err
} }
rows.rawCellValue = parseOptions(opts...).RawCellValue var rowIterator rowXMLIterator
rowIterator.rows = rows var token xml.Token
rowIterator.d = rows.f.sharedStringsReader() rows.rawCellValue, rows.sst = parseOptions(opts...).RawCellValue, rows.f.sharedStringsReader()
for { for {
token, _ := rows.decoder.Token() if rows.token != nil {
if token == nil { token = rows.token
} else if token, _ = rows.decoder.Token(); token == nil {
break break
} }
switch xmlElement := token.(type) { switch xmlElement := token.(type) {
case xml.StartElement: case xml.StartElement:
rowIterator.inElement = xmlElement.Name.Local rowIterator.inElement = xmlElement.Name.Local
if rowIterator.inElement == "row" { if rowIterator.inElement == "row" {
rowIterator.row++ rowNum := 0
if rowIterator.attrR, rowIterator.err = attrValToInt("r", xmlElement.Attr); rowIterator.attrR != 0 { if rowNum, rowIterator.err = attrValToInt("r", xmlElement.Attr); rowNum != 0 {
rowIterator.row = rowIterator.attrR rows.curRow = rowNum
} else if rows.token == nil {
rows.curRow++
} }
if rowIterator.row > rowIterator.rows.curRow { if rows.curRow > rows.seekRow {
rowIterator.rows.stashRow = rowIterator.row - 1 rows.token = nil
return rowIterator.columns, rowIterator.err return rowIterator.columns, rowIterator.err
} }
} }
rowXMLHandler(&rowIterator, &xmlElement, rows.rawCellValue) if rows.rowXMLHandler(&rowIterator, &xmlElement, rows.rawCellValue); rowIterator.err != nil {
if rowIterator.err != nil { rows.token = nil
return rowIterator.columns, rowIterator.err return rowIterator.columns, rowIterator.err
} }
rows.token = nil
case xml.EndElement: case xml.EndElement:
rowIterator.inElement = xmlElement.Name.Local if xmlElement.Name.Local == "sheetData" {
if rowIterator.row == 0 && rowIterator.rows.curRow > 1 {
rowIterator.row = rowIterator.rows.curRow
}
if rowIterator.inElement == "row" && rowIterator.row+1 < rowIterator.rows.curRow {
return rowIterator.columns, rowIterator.err
}
if rowIterator.inElement == "sheetData" {
return rowIterator.columns, rowIterator.err return rowIterator.columns, rowIterator.err
} }
} }
@ -173,29 +189,25 @@ func (err ErrSheetNotExist) Error() string {
// rowXMLIterator defined runtime use field for the worksheet row SAX parser. // rowXMLIterator defined runtime use field for the worksheet row SAX parser.
type rowXMLIterator struct { type rowXMLIterator struct {
err error err error
inElement string inElement string
attrR, cellCol, row int cellCol int
columns []string columns []string
rows *Rows
d *xlsxSST
} }
// rowXMLHandler parse the row XML element of the worksheet. // rowXMLHandler parse the row XML element of the worksheet.
func rowXMLHandler(rowIterator *rowXMLIterator, xmlElement *xml.StartElement, raw bool) { func (rows *Rows) rowXMLHandler(rowIterator *rowXMLIterator, xmlElement *xml.StartElement, raw bool) {
rowIterator.err = nil
if rowIterator.inElement == "c" { if rowIterator.inElement == "c" {
rowIterator.cellCol++ rowIterator.cellCol++
colCell := xlsxC{} colCell := xlsxC{}
_ = rowIterator.rows.decoder.DecodeElement(&colCell, xmlElement) _ = rows.decoder.DecodeElement(&colCell, xmlElement)
if colCell.R != "" { if colCell.R != "" {
if rowIterator.cellCol, _, rowIterator.err = CellNameToCoordinates(colCell.R); rowIterator.err != nil { if rowIterator.cellCol, _, rowIterator.err = CellNameToCoordinates(colCell.R); rowIterator.err != nil {
return return
} }
} }
blank := rowIterator.cellCol - len(rowIterator.columns) blank := rowIterator.cellCol - len(rowIterator.columns)
val, _ := colCell.getValueFrom(rowIterator.rows.f, rowIterator.d, raw) if val, _ := colCell.getValueFrom(rows.f, rows.sst, raw); val != "" || colCell.F != nil {
if val != "" || colCell.F != nil {
rowIterator.columns = append(appendSpace(blank, rowIterator.columns), val) rowIterator.columns = append(appendSpace(blank, rowIterator.columns), val)
} }
} }
@ -236,48 +248,10 @@ func (f *File) Rows(sheet string) (*Rows, error) {
output, _ := xml.Marshal(worksheet) output, _ := xml.Marshal(worksheet)
f.saveFileList(name, f.replaceNameSpaceBytes(name, output)) f.saveFileList(name, f.replaceNameSpaceBytes(name, output))
} }
var ( var err error
err error rows := Rows{f: f, sheet: name}
inElement string rows.needClose, rows.decoder, rows.tempFile, err = f.xmlDecoder(name)
row int return &rows, err
rows Rows
needClose bool
decoder *xml.Decoder
tempFile *os.File
)
if needClose, decoder, tempFile, err = f.xmlDecoder(name); needClose && err == nil {
defer tempFile.Close()
}
for {
token, _ := decoder.Token()
if token == nil {
break
}
switch xmlElement := token.(type) {
case xml.StartElement:
inElement = xmlElement.Name.Local
if inElement == "row" {
row++
for _, attr := range xmlElement.Attr {
if attr.Name.Local == "r" {
row, err = strconv.Atoi(attr.Value)
if err != nil {
return &rows, err
}
}
}
rows.totalRows = row
}
case xml.EndElement:
if xmlElement.Name.Local == "sheetData" {
rows.f = f
rows.sheet = name
_, rows.decoder, rows.tempFile, err = f.xmlDecoder(name)
return &rows, err
}
}
}
return &rows, nil
} }
// getFromStringItem build shared string item offset list from system temporary // getFromStringItem build shared string item offset list from system temporary

@ -44,13 +44,6 @@ func TestRows(t *testing.T) {
} }
assert.NoError(t, f.Close()) assert.NoError(t, f.Close())
f = NewFile()
f.Pkg.Store("xl/worksheets/sheet1.xml", []byte(`<worksheet><sheetData><row r="1"><c r="A1" t="s"><v>1</v></c></row><row r="A"><c r="2" t="str"><v>B</v></c></row></sheetData></worksheet>`))
f.Sheet.Delete("xl/worksheets/sheet1.xml")
delete(f.checked, "xl/worksheets/sheet1.xml")
_, err = f.Rows("Sheet1")
assert.EqualError(t, err, `strconv.Atoi: parsing "A": invalid syntax`)
f.Pkg.Store("xl/worksheets/sheet1.xml", nil) f.Pkg.Store("xl/worksheets/sheet1.xml", nil)
_, err = f.Rows("Sheet1") _, err = f.Rows("Sheet1")
assert.NoError(t, err) assert.NoError(t, err)
@ -82,7 +75,6 @@ func TestRowsIterator(t *testing.T) {
for rows.Next() { for rows.Next() {
rowCount++ rowCount++
assert.Equal(t, rowCount, rows.CurrentRow()) assert.Equal(t, rowCount, rows.CurrentRow())
assert.Equal(t, expectedNumRow, rows.TotalRows())
require.True(t, rowCount <= expectedNumRow, "rowCount is greater than expected") require.True(t, rowCount <= expectedNumRow, "rowCount is greater than expected")
} }
assert.Equal(t, expectedNumRow, rowCount) assert.Equal(t, expectedNumRow, rowCount)
@ -186,7 +178,7 @@ func TestColumns(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
rows.decoder = f.xmlNewDecoder(bytes.NewReader([]byte(`<worksheet><sheetData><row r="A"><c r="A1" t="s"><v>1</v></c></row><row r="A"><c r="2" t="str"><v>B</v></c></row></sheetData></worksheet>`))) rows.decoder = f.xmlNewDecoder(bytes.NewReader([]byte(`<worksheet><sheetData><row r="A"><c r="A1" t="s"><v>1</v></c></row><row r="A"><c r="2" t="str"><v>B</v></c></row></sheetData></worksheet>`)))
rows.stashRow, rows.curRow = 0, 1 assert.True(t, rows.Next())
_, err = rows.Columns() _, err = rows.Columns()
assert.EqualError(t, err, `strconv.Atoi: parsing "A": invalid syntax`) assert.EqualError(t, err, `strconv.Atoi: parsing "A": invalid syntax`)
@ -194,8 +186,8 @@ func TestColumns(t *testing.T) {
_, err = rows.Columns() _, err = rows.Columns()
assert.NoError(t, err) assert.NoError(t, err)
rows.curRow = 3
rows.decoder = f.xmlNewDecoder(bytes.NewReader([]byte(`<worksheet><sheetData><row r="1"><c r="A" t="s"><v>1</v></c></row></sheetData></worksheet>`))) rows.decoder = f.xmlNewDecoder(bytes.NewReader([]byte(`<worksheet><sheetData><row r="1"><c r="A" t="s"><v>1</v></c></row></sheetData></worksheet>`)))
assert.True(t, rows.Next())
_, err = rows.Columns() _, err = rows.Columns()
assert.EqualError(t, err, newCellNameToCoordinatesError("A", newInvalidCellNameError("A")).Error()) assert.EqualError(t, err, newCellNameToCoordinatesError("A", newInvalidCellNameError("A")).Error())

Loading…
Cancel
Save