Compatibility improvement: parse document core part (workbook) dynamically

formula
xuri 4 years ago
parent fcca8a3838
commit c82a185af8
No known key found for this signature in database
GPG Key ID: BA5E5BB1C948EDF7

@ -16,6 +16,7 @@ import (
"encoding/xml"
"errors"
"fmt"
"path/filepath"
"strconv"
"strings"
)
@ -800,9 +801,11 @@ func (f *File) AddChartSheet(sheet, format string, combo ...string) error {
f.addContentTypePart(chartID, "chart")
f.addContentTypePart(sheetID, "chartsheet")
f.addContentTypePart(drawingID, "drawings")
// Update xl/_rels/workbook.xml.rels
rID := f.addRels("xl/_rels/workbook.xml.rels", SourceRelationshipChartsheet, fmt.Sprintf("chartsheets/sheet%d.xml", sheetID), "")
// Update xl/workbook.xml
// Update workbook.xml.rels
wbPath := f.getWorkbookPath()
wbRelsPath := strings.TrimPrefix(filepath.Join(filepath.Dir(wbPath), "_rels", filepath.Base(wbPath)+".rels"), string(filepath.Separator))
rID := f.addRels(wbRelsPath, SourceRelationshipChartsheet, fmt.Sprintf("/xl/chartsheets/sheet%d.xml", sheetID), "")
// Update workbook.xml
f.setWorkbook(sheet, sheetID, rID)
chartsheet, _ := xml.Marshal(cs)
f.addSheetNameSpace(sheet, NameSpaceSpreadSheet)

@ -22,6 +22,7 @@ import (
"io/ioutil"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"sync"
@ -112,7 +113,7 @@ func OpenReader(r io.Reader, opt ...Options) (*File, error) {
return nil, err
}
f := newFile()
if bytes.Contains(b, oleIdentifier) {
if bytes.Contains(b, oleIdentifier) && len(opt) > 0 {
for _, o := range opt {
f.options = &o
}
@ -345,7 +346,9 @@ func (f *File) AddVBAProject(bin string) error {
return errors.New("unsupported VBA project extension")
}
f.setContentTypePartVBAProjectExtensions()
wb := f.relsReader("xl/_rels/workbook.xml.rels")
wbPath := f.getWorkbookPath()
wbRelsPath := strings.TrimPrefix(filepath.Join(filepath.Dir(wbPath), "_rels", filepath.Base(wbPath)+".rels"), string(filepath.Separator))
wb := f.relsReader(wbRelsPath)
var rID int
var ok bool
for _, rel := range wb.Relationships {

@ -201,7 +201,7 @@ func TestCharsetTranscoder(t *testing.T) {
func TestOpenReader(t *testing.T) {
_, err := OpenReader(strings.NewReader(""))
assert.EqualError(t, err, "zip: not a valid zip file")
_, err = OpenReader(bytes.NewReader(oleIdentifier))
_, err = OpenReader(bytes.NewReader(oleIdentifier), Options{Password: "password"})
assert.EqualError(t, err, "decrypted file failed")
// Test open password protected spreadsheet created by Microsoft Office Excel 2010.

@ -206,7 +206,6 @@ func CoordinatesToCellName(col, row int) (string, error) {
if col < 1 || row < 1 {
return "", fmt.Errorf("invalid cell coordinates [%d, %d]", col, row)
}
//Using itoa will save more memory
colname, err := ColumnNumberToName(col)
return colname + strconv.Itoa(row), err
}
@ -244,11 +243,12 @@ func parseFormatSet(formatSet string) []byte {
// Transitional namespaces.
func namespaceStrictToTransitional(content []byte) []byte {
var namespaceTranslationDic = map[string]string{
StrictSourceRelationship: SourceRelationship.Value,
StrictSourceRelationshipChart: SourceRelationshipChart,
StrictSourceRelationshipComments: SourceRelationshipComments,
StrictSourceRelationshipImage: SourceRelationshipImage,
StrictNameSpaceSpreadSheet: NameSpaceSpreadSheet.Value,
StrictSourceRelationship: SourceRelationship.Value,
StrictSourceRelationshipOfficeDocument: SourceRelationshipOfficeDocument,
StrictSourceRelationshipChart: SourceRelationshipChart,
StrictSourceRelationshipComments: SourceRelationshipComments,
StrictSourceRelationshipImage: SourceRelationshipImage,
StrictNameSpaceSpreadSheet: NameSpaceSpreadSheet.Value,
}
for s, n := range namespaceTranslationDic {
content = bytesReplace(content, []byte(s), []byte(n), -1)

@ -15,6 +15,7 @@ import (
"encoding/xml"
"errors"
"fmt"
"path/filepath"
"strconv"
"strings"
)
@ -138,7 +139,9 @@ func (f *File) AddPivotTable(opt *PivotTableOption) error {
}
// workbook pivot cache
workBookPivotCacheRID := f.addRels("xl/_rels/workbook.xml.rels", SourceRelationshipPivotCache, fmt.Sprintf("pivotCache/pivotCacheDefinition%d.xml", pivotCacheID), "")
wbPath := f.getWorkbookPath()
wbRelsPath := strings.TrimPrefix(filepath.Join(filepath.Dir(wbPath), "_rels", filepath.Base(wbPath)+".rels"), string(filepath.Separator))
workBookPivotCacheRID := f.addRels(wbRelsPath, SourceRelationshipPivotCache, fmt.Sprintf("/xl/pivotCache/pivotCacheDefinition%d.xml", pivotCacheID), "")
cacheID := f.addWorkbookPivotCache(workBookPivotCacheRID)
pivotCacheRels := "xl/pivotTables/_rels/pivotTable" + strconv.Itoa(pivotTableID) + ".xml.rels"
@ -661,7 +664,7 @@ func (f *File) getPivotTableFieldNameDefaultSubtotal(name string, fields []Pivot
return false, false
}
// addWorkbookPivotCache add the association ID of the pivot cache in xl/workbook.xml.
// addWorkbookPivotCache add the association ID of the pivot cache in workbook.xml.
func (f *File) addWorkbookPivotCache(RID int) int {
wb := f.workbookReader()
if wb.PivotCaches == nil {

@ -19,7 +19,9 @@ import (
"io"
"log"
"math"
"path/filepath"
"strconv"
"strings"
)
// GetRows return all the rows in a sheet by given worksheet name (case
@ -288,7 +290,8 @@ func (f *File) GetRowHeight(sheet string, row int) (float64, error) {
// after deserialization of xl/sharedStrings.xml.
func (f *File) sharedStringsReader() *xlsxSST {
var err error
wbPath := f.getWorkbookPath()
relPath := strings.TrimPrefix(filepath.Join(filepath.Dir(wbPath), "_rels", filepath.Base(wbPath)+".rels"), string(filepath.Separator))
f.Lock()
defer f.Unlock()
if f.SharedStrings == nil {
@ -308,14 +311,14 @@ func (f *File) sharedStringsReader() *xlsxSST {
}
}
f.addContentTypePart(0, "sharedStrings")
rels := f.relsReader("xl/_rels/workbook.xml.rels")
rels := f.relsReader(relPath)
for _, rel := range rels.Relationships {
if rel.Target == "sharedStrings.xml" {
if rel.Target == "/xl/sharedStrings.xml" {
return f.SharedStrings
}
}
// Update xl/_rels/workbook.xml.rels
f.addRels("xl/_rels/workbook.xml.rels", SourceRelationshipSharedStrings, "sharedStrings.xml", "")
// Update workbook.xml.rels
f.addRels(relPath, SourceRelationshipSharedStrings, "/xl/sharedStrings.xml", "")
}
return f.SharedStrings

@ -22,6 +22,7 @@ import (
"log"
"os"
"path"
"path/filepath"
"reflect"
"regexp"
"strconv"
@ -57,9 +58,11 @@ func (f *File) NewSheet(name string) int {
f.setContentTypes("/xl/worksheets/sheet"+strconv.Itoa(sheetID)+".xml", ContentTypeSpreadSheetMLWorksheet)
// Create new sheet /xl/worksheets/sheet%d.xml
f.setSheet(sheetID, name)
// Update xl/_rels/workbook.xml.rels
rID := f.addRels("xl/_rels/workbook.xml.rels", SourceRelationshipWorkSheet, fmt.Sprintf("worksheets/sheet%d.xml", sheetID), "")
// Update xl/workbook.xml
// Update workbook.xml.rels
wbPath := f.getWorkbookPath()
wbRelsPath := strings.TrimPrefix(filepath.Join(filepath.Dir(wbPath), "_rels", filepath.Base(wbPath)+".rels"), string(filepath.Separator))
rID := f.addRels(wbRelsPath, SourceRelationshipWorkSheet, fmt.Sprintf("/xl/worksheets/sheet%d.xml", sheetID), "")
// Update workbook.xml
f.setWorkbook(name, sheetID, rID)
return f.GetSheetIndex(name)
}
@ -89,18 +92,33 @@ func (f *File) contentTypesWriter() {
}
}
// workbookReader provides a function to get the pointer to the xl/workbook.xml
// getWorkbookPath provides a function to get the path of the workbook.xml in
// the spreadsheet.
func (f *File) getWorkbookPath() (path string) {
if rels := f.relsReader("_rels/.rels"); rels != nil {
for _, rel := range rels.Relationships {
if rel.Type == SourceRelationshipOfficeDocument {
path = strings.TrimPrefix(rel.Target, string(filepath.Separator))
return
}
}
}
return
}
// workbookReader provides a function to get the pointer to the workbook.xml
// structure after deserialization.
func (f *File) workbookReader() *xlsxWorkbook {
var err error
if f.WorkBook == nil {
wbPath := f.getWorkbookPath()
f.WorkBook = new(xlsxWorkbook)
if _, ok := f.xmlAttr["xl/workbook.xml"]; !ok {
d := f.xmlNewDecoder(bytes.NewReader(namespaceStrictToTransitional(f.readXML("xl/workbook.xml"))))
f.xmlAttr["xl/workbook.xml"] = append(f.xmlAttr["xl/workbook.xml"], getRootElement(d)...)
f.addNameSpaces("xl/workbook.xml", SourceRelationship)
if _, ok := f.xmlAttr[wbPath]; !ok {
d := f.xmlNewDecoder(bytes.NewReader(namespaceStrictToTransitional(f.readXML(wbPath))))
f.xmlAttr[wbPath] = append(f.xmlAttr[wbPath], getRootElement(d)...)
f.addNameSpaces(wbPath, SourceRelationship)
}
if err = f.xmlNewDecoder(bytes.NewReader(namespaceStrictToTransitional(f.readXML("xl/workbook.xml")))).
if err = f.xmlNewDecoder(bytes.NewReader(namespaceStrictToTransitional(f.readXML(wbPath)))).
Decode(f.WorkBook); err != nil && err != io.EOF {
log.Printf("xml decode error: %s", err)
}
@ -108,31 +126,28 @@ func (f *File) workbookReader() *xlsxWorkbook {
return f.WorkBook
}
// workBookWriter provides a function to save xl/workbook.xml after serialize
// workBookWriter provides a function to save workbook.xml after serialize
// structure.
func (f *File) workBookWriter() {
if f.WorkBook != nil {
output, _ := xml.Marshal(f.WorkBook)
f.saveFileList("xl/workbook.xml", replaceRelationshipsBytes(f.replaceNameSpaceBytes("xl/workbook.xml", output)))
f.saveFileList(f.getWorkbookPath(), replaceRelationshipsBytes(f.replaceNameSpaceBytes(f.getWorkbookPath(), output)))
}
}
// workSheetWriter provides a function to save xl/worksheets/sheet%d.xml after
// serialize structure.
func (f *File) workSheetWriter() {
// optimize memory alloc
var arr []byte
buffer := bytes.NewBuffer(arr)
encoder := xml.NewEncoder(buffer)
for p, sheet := range f.Sheet {
if sheet != nil {
for k, v := range sheet.SheetData.Row {
f.Sheet[p].SheetData.Row[k].C = trimCell(v.C)
}
// reusing buffer
encoder.Encode(sheet)
_ = encoder.Encode(sheet)
f.saveFileList(p, replaceRelationshipsBytes(f.replaceNameSpaceBytes(p, buffer.Bytes())))
ok := f.checked[p]
if ok {
@ -419,10 +434,12 @@ func (f *File) GetSheetList() (list []string) {
}
// getSheetMap provides a function to get worksheet name and XML file path map
// of XLSX.
// of the spreadsheet.
func (f *File) getSheetMap() map[string]string {
content := f.workbookReader()
rels := f.relsReader("xl/_rels/workbook.xml.rels")
wbPath := f.getWorkbookPath()
wbRelsPath := strings.TrimPrefix(filepath.Join(filepath.Dir(wbPath), "_rels", filepath.Base(wbPath)+".rels"), string(filepath.Separator))
rels := f.relsReader(wbRelsPath)
maps := map[string]string{}
for _, v := range content.Sheets.Sheet {
for _, rel := range rels.Relationships {
@ -472,7 +489,9 @@ func (f *File) DeleteSheet(name string) {
}
sheetName := trimSheetName(name)
wb := f.workbookReader()
wbRels := f.relsReader("xl/_rels/workbook.xml.rels")
wbPath := f.getWorkbookPath()
wbRelsPath := strings.TrimPrefix(filepath.Join(filepath.Dir(wbPath), "_rels", filepath.Base(wbPath)+".rels"), string(filepath.Separator))
wbRels := f.relsReader(wbRelsPath)
for idx, sheet := range wb.Sheets.Sheet {
if sheet.Name == sheetName {
wb.Sheets.Sheet = append(wb.Sheets.Sheet[:idx], wb.Sheets.Sheet[idx+1:]...)
@ -511,10 +530,11 @@ func (f *File) DeleteSheet(name string) {
}
// deleteSheetFromWorkbookRels provides a function to remove worksheet
// relationships by given relationships ID in the file
// xl/_rels/workbook.xml.rels.
// relationships by given relationships ID in the file workbook.xml.rels.
func (f *File) deleteSheetFromWorkbookRels(rID string) string {
content := f.relsReader("xl/_rels/workbook.xml.rels")
wbPath := f.getWorkbookPath()
wbRelsPath := strings.TrimPrefix(filepath.Join(filepath.Dir(wbPath), "_rels", filepath.Base(wbPath)+".rels"), string(filepath.Separator))
content := f.relsReader(wbRelsPath)
for k, v := range content.Relationships {
if v.ID == rID {
content.Relationships = append(content.Relationships[:k], content.Relationships[k+1:]...)

@ -33,6 +33,7 @@ var (
// Source relationship and namespace.
const (
SourceRelationshipOfficeDocument = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"
SourceRelationshipChart = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/chart"
SourceRelationshipComments = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments"
SourceRelationshipImage = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"
@ -50,6 +51,7 @@ const (
NameSpaceXML = "http://www.w3.org/XML/1998/namespace"
NameSpaceXMLSchemaInstance = "http://www.w3.org/2001/XMLSchema-instance"
StrictSourceRelationship = "http://purl.oclc.org/ooxml/officeDocument/relationships"
StrictSourceRelationshipOfficeDocument = "http://purl.oclc.org/ooxml/officeDocument/relationships/officeDocument"
StrictSourceRelationshipChart = "http://purl.oclc.org/ooxml/officeDocument/relationships/chart"
StrictSourceRelationshipComments = "http://purl.oclc.org/ooxml/officeDocument/relationships/comments"
StrictSourceRelationshipImage = "http://purl.oclc.org/ooxml/officeDocument/relationships/image"

@ -313,7 +313,7 @@ type xlsxSheetData struct {
// xlsxRow directly maps the row element. The element expresses information
// about an entire row of a worksheet, and contains all cell definitions for a
// particular row in the worksheet.
type xlsxRow struct { // alignment word
type xlsxRow struct { // alignment word
C []xlsxC `xml:"c"`
R int `xml:"r,attr,omitempty"`
Spans string `xml:"spans,attr,omitempty"`
@ -456,7 +456,6 @@ type DataValidation struct {
// s (Shared String) | Cell containing a shared string.
// str (String) | Cell containing a formula string.
//
// fixme: how to make this structure smaller; cur size is 152 bytes. it's be too bigger.
type xlsxC struct {
XMLName xml.Name `xml:"c"`
XMLSpace xml.Attr `xml:"space,attr,omitempty"`

Loading…
Cancel
Save