Compatibility improvement: parse document core part (workbook) dynamically

formula
xuri 4 years ago
parent fcca8a3838
commit c82a185af8
No known key found for this signature in database
GPG Key ID: BA5E5BB1C948EDF7

@ -16,6 +16,7 @@ import (
"encoding/xml" "encoding/xml"
"errors" "errors"
"fmt" "fmt"
"path/filepath"
"strconv" "strconv"
"strings" "strings"
) )
@ -800,9 +801,11 @@ func (f *File) AddChartSheet(sheet, format string, combo ...string) error {
f.addContentTypePart(chartID, "chart") f.addContentTypePart(chartID, "chart")
f.addContentTypePart(sheetID, "chartsheet") f.addContentTypePart(sheetID, "chartsheet")
f.addContentTypePart(drawingID, "drawings") f.addContentTypePart(drawingID, "drawings")
// Update xl/_rels/workbook.xml.rels // Update workbook.xml.rels
rID := f.addRels("xl/_rels/workbook.xml.rels", SourceRelationshipChartsheet, fmt.Sprintf("chartsheets/sheet%d.xml", sheetID), "") wbPath := f.getWorkbookPath()
// Update xl/workbook.xml wbRelsPath := strings.TrimPrefix(filepath.Join(filepath.Dir(wbPath), "_rels", filepath.Base(wbPath)+".rels"), string(filepath.Separator))
rID := f.addRels(wbRelsPath, SourceRelationshipChartsheet, fmt.Sprintf("/xl/chartsheets/sheet%d.xml", sheetID), "")
// Update workbook.xml
f.setWorkbook(sheet, sheetID, rID) f.setWorkbook(sheet, sheetID, rID)
chartsheet, _ := xml.Marshal(cs) chartsheet, _ := xml.Marshal(cs)
f.addSheetNameSpace(sheet, NameSpaceSpreadSheet) f.addSheetNameSpace(sheet, NameSpaceSpreadSheet)

@ -22,6 +22,7 @@ import (
"io/ioutil" "io/ioutil"
"os" "os"
"path" "path"
"path/filepath"
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
@ -112,7 +113,7 @@ func OpenReader(r io.Reader, opt ...Options) (*File, error) {
return nil, err return nil, err
} }
f := newFile() f := newFile()
if bytes.Contains(b, oleIdentifier) { if bytes.Contains(b, oleIdentifier) && len(opt) > 0 {
for _, o := range opt { for _, o := range opt {
f.options = &o f.options = &o
} }
@ -345,7 +346,9 @@ func (f *File) AddVBAProject(bin string) error {
return errors.New("unsupported VBA project extension") return errors.New("unsupported VBA project extension")
} }
f.setContentTypePartVBAProjectExtensions() f.setContentTypePartVBAProjectExtensions()
wb := f.relsReader("xl/_rels/workbook.xml.rels") wbPath := f.getWorkbookPath()
wbRelsPath := strings.TrimPrefix(filepath.Join(filepath.Dir(wbPath), "_rels", filepath.Base(wbPath)+".rels"), string(filepath.Separator))
wb := f.relsReader(wbRelsPath)
var rID int var rID int
var ok bool var ok bool
for _, rel := range wb.Relationships { for _, rel := range wb.Relationships {

@ -201,7 +201,7 @@ func TestCharsetTranscoder(t *testing.T) {
func TestOpenReader(t *testing.T) { func TestOpenReader(t *testing.T) {
_, err := OpenReader(strings.NewReader("")) _, err := OpenReader(strings.NewReader(""))
assert.EqualError(t, err, "zip: not a valid zip file") assert.EqualError(t, err, "zip: not a valid zip file")
_, err = OpenReader(bytes.NewReader(oleIdentifier)) _, err = OpenReader(bytes.NewReader(oleIdentifier), Options{Password: "password"})
assert.EqualError(t, err, "decrypted file failed") assert.EqualError(t, err, "decrypted file failed")
// Test open password protected spreadsheet created by Microsoft Office Excel 2010. // Test open password protected spreadsheet created by Microsoft Office Excel 2010.

@ -206,7 +206,6 @@ func CoordinatesToCellName(col, row int) (string, error) {
if col < 1 || row < 1 { if col < 1 || row < 1 {
return "", fmt.Errorf("invalid cell coordinates [%d, %d]", col, row) return "", fmt.Errorf("invalid cell coordinates [%d, %d]", col, row)
} }
//Using itoa will save more memory
colname, err := ColumnNumberToName(col) colname, err := ColumnNumberToName(col)
return colname + strconv.Itoa(row), err return colname + strconv.Itoa(row), err
} }
@ -244,11 +243,12 @@ func parseFormatSet(formatSet string) []byte {
// Transitional namespaces. // Transitional namespaces.
func namespaceStrictToTransitional(content []byte) []byte { func namespaceStrictToTransitional(content []byte) []byte {
var namespaceTranslationDic = map[string]string{ var namespaceTranslationDic = map[string]string{
StrictSourceRelationship: SourceRelationship.Value, StrictSourceRelationship: SourceRelationship.Value,
StrictSourceRelationshipChart: SourceRelationshipChart, StrictSourceRelationshipOfficeDocument: SourceRelationshipOfficeDocument,
StrictSourceRelationshipComments: SourceRelationshipComments, StrictSourceRelationshipChart: SourceRelationshipChart,
StrictSourceRelationshipImage: SourceRelationshipImage, StrictSourceRelationshipComments: SourceRelationshipComments,
StrictNameSpaceSpreadSheet: NameSpaceSpreadSheet.Value, StrictSourceRelationshipImage: SourceRelationshipImage,
StrictNameSpaceSpreadSheet: NameSpaceSpreadSheet.Value,
} }
for s, n := range namespaceTranslationDic { for s, n := range namespaceTranslationDic {
content = bytesReplace(content, []byte(s), []byte(n), -1) content = bytesReplace(content, []byte(s), []byte(n), -1)

@ -15,6 +15,7 @@ import (
"encoding/xml" "encoding/xml"
"errors" "errors"
"fmt" "fmt"
"path/filepath"
"strconv" "strconv"
"strings" "strings"
) )
@ -138,7 +139,9 @@ func (f *File) AddPivotTable(opt *PivotTableOption) error {
} }
// workbook pivot cache // workbook pivot cache
workBookPivotCacheRID := f.addRels("xl/_rels/workbook.xml.rels", SourceRelationshipPivotCache, fmt.Sprintf("pivotCache/pivotCacheDefinition%d.xml", pivotCacheID), "") wbPath := f.getWorkbookPath()
wbRelsPath := strings.TrimPrefix(filepath.Join(filepath.Dir(wbPath), "_rels", filepath.Base(wbPath)+".rels"), string(filepath.Separator))
workBookPivotCacheRID := f.addRels(wbRelsPath, SourceRelationshipPivotCache, fmt.Sprintf("/xl/pivotCache/pivotCacheDefinition%d.xml", pivotCacheID), "")
cacheID := f.addWorkbookPivotCache(workBookPivotCacheRID) cacheID := f.addWorkbookPivotCache(workBookPivotCacheRID)
pivotCacheRels := "xl/pivotTables/_rels/pivotTable" + strconv.Itoa(pivotTableID) + ".xml.rels" pivotCacheRels := "xl/pivotTables/_rels/pivotTable" + strconv.Itoa(pivotTableID) + ".xml.rels"
@ -661,7 +664,7 @@ func (f *File) getPivotTableFieldNameDefaultSubtotal(name string, fields []Pivot
return false, false return false, false
} }
// addWorkbookPivotCache add the association ID of the pivot cache in xl/workbook.xml. // addWorkbookPivotCache add the association ID of the pivot cache in workbook.xml.
func (f *File) addWorkbookPivotCache(RID int) int { func (f *File) addWorkbookPivotCache(RID int) int {
wb := f.workbookReader() wb := f.workbookReader()
if wb.PivotCaches == nil { if wb.PivotCaches == nil {

@ -19,7 +19,9 @@ import (
"io" "io"
"log" "log"
"math" "math"
"path/filepath"
"strconv" "strconv"
"strings"
) )
// GetRows return all the rows in a sheet by given worksheet name (case // GetRows return all the rows in a sheet by given worksheet name (case
@ -288,7 +290,8 @@ func (f *File) GetRowHeight(sheet string, row int) (float64, error) {
// after deserialization of xl/sharedStrings.xml. // after deserialization of xl/sharedStrings.xml.
func (f *File) sharedStringsReader() *xlsxSST { func (f *File) sharedStringsReader() *xlsxSST {
var err error var err error
wbPath := f.getWorkbookPath()
relPath := strings.TrimPrefix(filepath.Join(filepath.Dir(wbPath), "_rels", filepath.Base(wbPath)+".rels"), string(filepath.Separator))
f.Lock() f.Lock()
defer f.Unlock() defer f.Unlock()
if f.SharedStrings == nil { if f.SharedStrings == nil {
@ -308,14 +311,14 @@ func (f *File) sharedStringsReader() *xlsxSST {
} }
} }
f.addContentTypePart(0, "sharedStrings") f.addContentTypePart(0, "sharedStrings")
rels := f.relsReader("xl/_rels/workbook.xml.rels") rels := f.relsReader(relPath)
for _, rel := range rels.Relationships { for _, rel := range rels.Relationships {
if rel.Target == "sharedStrings.xml" { if rel.Target == "/xl/sharedStrings.xml" {
return f.SharedStrings return f.SharedStrings
} }
} }
// Update xl/_rels/workbook.xml.rels // Update workbook.xml.rels
f.addRels("xl/_rels/workbook.xml.rels", SourceRelationshipSharedStrings, "sharedStrings.xml", "") f.addRels(relPath, SourceRelationshipSharedStrings, "/xl/sharedStrings.xml", "")
} }
return f.SharedStrings return f.SharedStrings

@ -22,6 +22,7 @@ import (
"log" "log"
"os" "os"
"path" "path"
"path/filepath"
"reflect" "reflect"
"regexp" "regexp"
"strconv" "strconv"
@ -57,9 +58,11 @@ func (f *File) NewSheet(name string) int {
f.setContentTypes("/xl/worksheets/sheet"+strconv.Itoa(sheetID)+".xml", ContentTypeSpreadSheetMLWorksheet) f.setContentTypes("/xl/worksheets/sheet"+strconv.Itoa(sheetID)+".xml", ContentTypeSpreadSheetMLWorksheet)
// Create new sheet /xl/worksheets/sheet%d.xml // Create new sheet /xl/worksheets/sheet%d.xml
f.setSheet(sheetID, name) f.setSheet(sheetID, name)
// Update xl/_rels/workbook.xml.rels // Update workbook.xml.rels
rID := f.addRels("xl/_rels/workbook.xml.rels", SourceRelationshipWorkSheet, fmt.Sprintf("worksheets/sheet%d.xml", sheetID), "") wbPath := f.getWorkbookPath()
// Update xl/workbook.xml wbRelsPath := strings.TrimPrefix(filepath.Join(filepath.Dir(wbPath), "_rels", filepath.Base(wbPath)+".rels"), string(filepath.Separator))
rID := f.addRels(wbRelsPath, SourceRelationshipWorkSheet, fmt.Sprintf("/xl/worksheets/sheet%d.xml", sheetID), "")
// Update workbook.xml
f.setWorkbook(name, sheetID, rID) f.setWorkbook(name, sheetID, rID)
return f.GetSheetIndex(name) return f.GetSheetIndex(name)
} }
@ -89,18 +92,33 @@ func (f *File) contentTypesWriter() {
} }
} }
// workbookReader provides a function to get the pointer to the xl/workbook.xml // getWorkbookPath provides a function to get the path of the workbook.xml in
// the spreadsheet.
func (f *File) getWorkbookPath() (path string) {
if rels := f.relsReader("_rels/.rels"); rels != nil {
for _, rel := range rels.Relationships {
if rel.Type == SourceRelationshipOfficeDocument {
path = strings.TrimPrefix(rel.Target, string(filepath.Separator))
return
}
}
}
return
}
// workbookReader provides a function to get the pointer to the workbook.xml
// structure after deserialization. // structure after deserialization.
func (f *File) workbookReader() *xlsxWorkbook { func (f *File) workbookReader() *xlsxWorkbook {
var err error var err error
if f.WorkBook == nil { if f.WorkBook == nil {
wbPath := f.getWorkbookPath()
f.WorkBook = new(xlsxWorkbook) f.WorkBook = new(xlsxWorkbook)
if _, ok := f.xmlAttr["xl/workbook.xml"]; !ok { if _, ok := f.xmlAttr[wbPath]; !ok {
d := f.xmlNewDecoder(bytes.NewReader(namespaceStrictToTransitional(f.readXML("xl/workbook.xml")))) d := f.xmlNewDecoder(bytes.NewReader(namespaceStrictToTransitional(f.readXML(wbPath))))
f.xmlAttr["xl/workbook.xml"] = append(f.xmlAttr["xl/workbook.xml"], getRootElement(d)...) f.xmlAttr[wbPath] = append(f.xmlAttr[wbPath], getRootElement(d)...)
f.addNameSpaces("xl/workbook.xml", SourceRelationship) f.addNameSpaces(wbPath, SourceRelationship)
} }
if err = f.xmlNewDecoder(bytes.NewReader(namespaceStrictToTransitional(f.readXML("xl/workbook.xml")))). if err = f.xmlNewDecoder(bytes.NewReader(namespaceStrictToTransitional(f.readXML(wbPath)))).
Decode(f.WorkBook); err != nil && err != io.EOF { Decode(f.WorkBook); err != nil && err != io.EOF {
log.Printf("xml decode error: %s", err) log.Printf("xml decode error: %s", err)
} }
@ -108,31 +126,28 @@ func (f *File) workbookReader() *xlsxWorkbook {
return f.WorkBook return f.WorkBook
} }
// workBookWriter provides a function to save xl/workbook.xml after serialize // workBookWriter provides a function to save workbook.xml after serialize
// structure. // structure.
func (f *File) workBookWriter() { func (f *File) workBookWriter() {
if f.WorkBook != nil { if f.WorkBook != nil {
output, _ := xml.Marshal(f.WorkBook) output, _ := xml.Marshal(f.WorkBook)
f.saveFileList("xl/workbook.xml", replaceRelationshipsBytes(f.replaceNameSpaceBytes("xl/workbook.xml", output))) f.saveFileList(f.getWorkbookPath(), replaceRelationshipsBytes(f.replaceNameSpaceBytes(f.getWorkbookPath(), output)))
} }
} }
// workSheetWriter provides a function to save xl/worksheets/sheet%d.xml after // workSheetWriter provides a function to save xl/worksheets/sheet%d.xml after
// serialize structure. // serialize structure.
func (f *File) workSheetWriter() { func (f *File) workSheetWriter() {
// optimize memory alloc
var arr []byte var arr []byte
buffer := bytes.NewBuffer(arr) buffer := bytes.NewBuffer(arr)
encoder := xml.NewEncoder(buffer) encoder := xml.NewEncoder(buffer)
for p, sheet := range f.Sheet { for p, sheet := range f.Sheet {
if sheet != nil { if sheet != nil {
for k, v := range sheet.SheetData.Row { for k, v := range sheet.SheetData.Row {
f.Sheet[p].SheetData.Row[k].C = trimCell(v.C) f.Sheet[p].SheetData.Row[k].C = trimCell(v.C)
} }
// reusing buffer // reusing buffer
encoder.Encode(sheet) _ = encoder.Encode(sheet)
f.saveFileList(p, replaceRelationshipsBytes(f.replaceNameSpaceBytes(p, buffer.Bytes()))) f.saveFileList(p, replaceRelationshipsBytes(f.replaceNameSpaceBytes(p, buffer.Bytes())))
ok := f.checked[p] ok := f.checked[p]
if ok { if ok {
@ -419,10 +434,12 @@ func (f *File) GetSheetList() (list []string) {
} }
// getSheetMap provides a function to get worksheet name and XML file path map // getSheetMap provides a function to get worksheet name and XML file path map
// of XLSX. // of the spreadsheet.
func (f *File) getSheetMap() map[string]string { func (f *File) getSheetMap() map[string]string {
content := f.workbookReader() content := f.workbookReader()
rels := f.relsReader("xl/_rels/workbook.xml.rels") wbPath := f.getWorkbookPath()
wbRelsPath := strings.TrimPrefix(filepath.Join(filepath.Dir(wbPath), "_rels", filepath.Base(wbPath)+".rels"), string(filepath.Separator))
rels := f.relsReader(wbRelsPath)
maps := map[string]string{} maps := map[string]string{}
for _, v := range content.Sheets.Sheet { for _, v := range content.Sheets.Sheet {
for _, rel := range rels.Relationships { for _, rel := range rels.Relationships {
@ -472,7 +489,9 @@ func (f *File) DeleteSheet(name string) {
} }
sheetName := trimSheetName(name) sheetName := trimSheetName(name)
wb := f.workbookReader() wb := f.workbookReader()
wbRels := f.relsReader("xl/_rels/workbook.xml.rels") wbPath := f.getWorkbookPath()
wbRelsPath := strings.TrimPrefix(filepath.Join(filepath.Dir(wbPath), "_rels", filepath.Base(wbPath)+".rels"), string(filepath.Separator))
wbRels := f.relsReader(wbRelsPath)
for idx, sheet := range wb.Sheets.Sheet { for idx, sheet := range wb.Sheets.Sheet {
if sheet.Name == sheetName { if sheet.Name == sheetName {
wb.Sheets.Sheet = append(wb.Sheets.Sheet[:idx], wb.Sheets.Sheet[idx+1:]...) wb.Sheets.Sheet = append(wb.Sheets.Sheet[:idx], wb.Sheets.Sheet[idx+1:]...)
@ -511,10 +530,11 @@ func (f *File) DeleteSheet(name string) {
} }
// deleteSheetFromWorkbookRels provides a function to remove worksheet // deleteSheetFromWorkbookRels provides a function to remove worksheet
// relationships by given relationships ID in the file // relationships by given relationships ID in the file workbook.xml.rels.
// xl/_rels/workbook.xml.rels.
func (f *File) deleteSheetFromWorkbookRels(rID string) string { func (f *File) deleteSheetFromWorkbookRels(rID string) string {
content := f.relsReader("xl/_rels/workbook.xml.rels") wbPath := f.getWorkbookPath()
wbRelsPath := strings.TrimPrefix(filepath.Join(filepath.Dir(wbPath), "_rels", filepath.Base(wbPath)+".rels"), string(filepath.Separator))
content := f.relsReader(wbRelsPath)
for k, v := range content.Relationships { for k, v := range content.Relationships {
if v.ID == rID { if v.ID == rID {
content.Relationships = append(content.Relationships[:k], content.Relationships[k+1:]...) content.Relationships = append(content.Relationships[:k], content.Relationships[k+1:]...)

@ -33,6 +33,7 @@ var (
// Source relationship and namespace. // Source relationship and namespace.
const ( const (
SourceRelationshipOfficeDocument = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"
SourceRelationshipChart = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/chart" SourceRelationshipChart = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/chart"
SourceRelationshipComments = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments" SourceRelationshipComments = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments"
SourceRelationshipImage = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" SourceRelationshipImage = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"
@ -50,6 +51,7 @@ const (
NameSpaceXML = "http://www.w3.org/XML/1998/namespace" NameSpaceXML = "http://www.w3.org/XML/1998/namespace"
NameSpaceXMLSchemaInstance = "http://www.w3.org/2001/XMLSchema-instance" NameSpaceXMLSchemaInstance = "http://www.w3.org/2001/XMLSchema-instance"
StrictSourceRelationship = "http://purl.oclc.org/ooxml/officeDocument/relationships" StrictSourceRelationship = "http://purl.oclc.org/ooxml/officeDocument/relationships"
StrictSourceRelationshipOfficeDocument = "http://purl.oclc.org/ooxml/officeDocument/relationships/officeDocument"
StrictSourceRelationshipChart = "http://purl.oclc.org/ooxml/officeDocument/relationships/chart" StrictSourceRelationshipChart = "http://purl.oclc.org/ooxml/officeDocument/relationships/chart"
StrictSourceRelationshipComments = "http://purl.oclc.org/ooxml/officeDocument/relationships/comments" StrictSourceRelationshipComments = "http://purl.oclc.org/ooxml/officeDocument/relationships/comments"
StrictSourceRelationshipImage = "http://purl.oclc.org/ooxml/officeDocument/relationships/image" StrictSourceRelationshipImage = "http://purl.oclc.org/ooxml/officeDocument/relationships/image"

@ -313,7 +313,7 @@ type xlsxSheetData struct {
// xlsxRow directly maps the row element. The element expresses information // xlsxRow directly maps the row element. The element expresses information
// about an entire row of a worksheet, and contains all cell definitions for a // about an entire row of a worksheet, and contains all cell definitions for a
// particular row in the worksheet. // particular row in the worksheet.
type xlsxRow struct { // alignment word type xlsxRow struct { // alignment word
C []xlsxC `xml:"c"` C []xlsxC `xml:"c"`
R int `xml:"r,attr,omitempty"` R int `xml:"r,attr,omitempty"`
Spans string `xml:"spans,attr,omitempty"` Spans string `xml:"spans,attr,omitempty"`
@ -456,7 +456,6 @@ type DataValidation struct {
// s (Shared String) | Cell containing a shared string. // s (Shared String) | Cell containing a shared string.
// str (String) | Cell containing a formula string. // str (String) | Cell containing a formula string.
// //
// fixme: how to make this structure smaller; cur size is 152 bytes. it's be too bigger.
type xlsxC struct { type xlsxC struct {
XMLName xml.Name `xml:"c"` XMLName xml.Name `xml:"c"`
XMLSpace xml.Attr `xml:"space,attr,omitempty"` XMLSpace xml.Attr `xml:"space,attr,omitempty"`

Loading…
Cancel
Save