// Copyright 2016 - 2021 The excelize Authors. All rights reserved. Use of // this source code is governed by a BSD-style license that can be found in // the LICENSE file. // // Package excelize providing a set of functions that allow you to write to // and read from XLSX / XLSM / XLTM files. Supports reading and writing // spreadsheet documents generated by Microsoft Excelâ„¢ 2007 and later. Supports // complex components by high compatibility, and provided streaming API for // generating or reading data from a worksheet with huge amounts of data. This // library needs Go version 1.15 or later. package excelize import ( "archive/zip" "bytes" "container/list" "encoding/xml" "fmt" "io" "io/ioutil" "os" "regexp" "strconv" "strings" ) // ReadZipReader extract spreadsheet with given options. func (f *File) ReadZipReader(r *zip.Reader) (map[string][]byte, int, error) { var ( err error docPart = map[string]string{ "[content_types].xml": "[Content_Types].xml", "xl/sharedstrings.xml": "xl/sharedStrings.xml", } fileList = make(map[string][]byte, len(r.File)) worksheets int unzipSize int64 ) for _, v := range r.File { fileSize := v.FileInfo().Size() unzipSize += fileSize if unzipSize > f.options.UnzipSizeLimit { return fileList, worksheets, newUnzipSizeLimitError(f.options.UnzipSizeLimit) } fileName := strings.Replace(v.Name, "\\", "/", -1) if partName, ok := docPart[strings.ToLower(fileName)]; ok { fileName = partName } if strings.HasPrefix(fileName, "xl/worksheets/sheet") { worksheets++ if fileSize > f.options.WorksheetUnzipMemLimit && !v.FileInfo().IsDir() { if tempFile, err := f.unzipToTemp(v); err == nil { f.tempFiles.Store(fileName, tempFile) continue } } } if fileList[fileName], err = readFile(v); err != nil { return nil, 0, err } } return fileList, worksheets, nil } // unzipToTemp unzip the zip entity to the system temporary directory and // returned the unzipped file path. func (f *File) unzipToTemp(zipFile *zip.File) (string, error) { tmp, err := ioutil.TempFile(os.TempDir(), "excelize-") if err != nil { return "", err } rc, err := zipFile.Open() if err != nil { return tmp.Name(), err } _, err = io.Copy(tmp, rc) rc.Close() tmp.Close() return tmp.Name(), err } // readXML provides a function to read XML content as bytes. func (f *File) readXML(name string) []byte { if content, _ := f.Pkg.Load(name); content != nil { return content.([]byte) } if content, ok := f.streams[name]; ok { return content.rawData.buf.Bytes() } return []byte{} } // readBytes read file as bytes by given path. func (f *File) readBytes(name string) []byte { content := f.readXML(name) if len(content) != 0 { return content } file, err := f.readTemp(name) if err != nil { return content } content, _ = ioutil.ReadAll(file) f.Pkg.Store(name, content) file.Close() return content } // readTemp read file from system temporary directory by given path. func (f *File) readTemp(name string) (file *os.File, err error) { path, ok := f.tempFiles.Load(name) if !ok { return } file, err = os.Open(path.(string)) return } // saveFileList provides a function to update given file content in file list // of spreadsheet. func (f *File) saveFileList(name string, content []byte) { f.Pkg.Store(name, append([]byte(XMLHeader), content...)) } // Read file content as string in a archive file. func readFile(file *zip.File) ([]byte, error) { rc, err := file.Open() if err != nil { return nil, err } dat := make([]byte, 0, file.FileInfo().Size()) buff := bytes.NewBuffer(dat) _, _ = io.Copy(buff, rc) return buff.Bytes(), rc.Close() } // SplitCellName splits cell name to column name and row number. // // Example: // // excelize.SplitCellName("AK74") // return "AK", 74, nil // func SplitCellName(cell string) (string, int, error) { alpha := func(r rune) bool { return ('A' <= r && r <= 'Z') || ('a' <= r && r <= 'z') || (r == 36) } if strings.IndexFunc(cell, alpha) == 0 { i := strings.LastIndexFunc(cell, alpha) if i >= 0 && i < len(cell)-1 { col, rowstr := strings.ReplaceAll(cell[:i+1], "$", ""), cell[i+1:] if row, err := strconv.Atoi(rowstr); err == nil && row > 0 { return col, row, nil } } } return "", -1, newInvalidCellNameError(cell) } // JoinCellName joins cell name from column name and row number. func JoinCellName(col string, row int) (string, error) { normCol := strings.Map(func(rune rune) rune { switch { case 'A' <= rune && rune <= 'Z': return rune case 'a' <= rune && rune <= 'z': return rune - 32 } return -1 }, col) if len(col) == 0 || len(col) != len(normCol) { return "", newInvalidColumnNameError(col) } if row < 1 { return "", newInvalidRowNumberError(row) } return normCol + strconv.Itoa(row), nil } // ColumnNameToNumber provides a function to convert Excel sheet column name // to int. Column name case insensitive. The function returns an error if // column name incorrect. // // Example: // // excelize.ColumnNameToNumber("AK") // returns 37, nil // func ColumnNameToNumber(name string) (int, error) { if len(name) == 0 { return -1, newInvalidColumnNameError(name) } col := 0 multi := 1 for i := len(name) - 1; i >= 0; i-- { r := name[i] if r >= 'A' && r <= 'Z' { col += int(r-'A'+1) * multi } else if r >= 'a' && r <= 'z' { col += int(r-'a'+1) * multi } else { return -1, newInvalidColumnNameError(name) } multi *= 26 } if col > TotalColumns { return -1, ErrColumnNumber } return col, nil } // ColumnNumberToName provides a function to convert the integer to Excel // sheet column title. // // Example: // // excelize.ColumnNumberToName(37) // returns "AK", nil // func ColumnNumberToName(num int) (string, error) { if num < 1 { return "", fmt.Errorf("incorrect column number %d", num) } if num > TotalColumns { return "", ErrColumnNumber } var col string for num > 0 { col = string(rune((num-1)%26+65)) + col num = (num - 1) / 26 } return col, nil } // CellNameToCoordinates converts alphanumeric cell name to [X, Y] coordinates // or returns an error. // // Example: // // excelize.CellNameToCoordinates("A1") // returns 1, 1, nil // excelize.CellNameToCoordinates("Z3") // returns 26, 3, nil // func CellNameToCoordinates(cell string) (int, int, error) { colname, row, err := SplitCellName(cell) if err != nil { return -1, -1, newCellNameToCoordinatesError(cell, err) } if row > TotalRows { return -1, -1, ErrMaxRows } col, err := ColumnNameToNumber(colname) return col, row, err } // CoordinatesToCellName converts [X, Y] coordinates to alpha-numeric cell // name or returns an error. // // Example: // // excelize.CoordinatesToCellName(1, 1) // returns "A1", nil // excelize.CoordinatesToCellName(1, 1, true) // returns "$A$1", nil // func CoordinatesToCellName(col, row int, abs ...bool) (string, error) { if col < 1 || row < 1 { return "", fmt.Errorf("invalid cell coordinates [%d, %d]", col, row) } sign := "" for _, a := range abs { if a { sign = "$" } } colname, err := ColumnNumberToName(col) return sign + colname + sign + strconv.Itoa(row), err } // areaRefToCoordinates provides a function to convert area reference to a // pair of coordinates. func areaRefToCoordinates(ref string) ([]int, error) { rng := strings.Split(strings.Replace(ref, "$", "", -1), ":") if len(rng) < 2 { return nil, ErrParameterInvalid } return areaRangeToCoordinates(rng[0], rng[1]) } // areaRangeToCoordinates provides a function to convert cell range to a // pair of coordinates. func areaRangeToCoordinates(firstCell, lastCell string) ([]int, error) { coordinates := make([]int, 4) var err error coordinates[0], coordinates[1], err = CellNameToCoordinates(firstCell) if err != nil { return coordinates, err } coordinates[2], coordinates[3], err = CellNameToCoordinates(lastCell) return coordinates, err } // sortCoordinates provides a function to correct the coordinate area, such // correct C1:B3 to B1:C3. func sortCoordinates(coordinates []int) error { if len(coordinates) != 4 { return ErrCoordinates } if coordinates[2] < coordinates[0] { coordinates[2], coordinates[0] = coordinates[0], coordinates[2] } if coordinates[3] < coordinates[1] { coordinates[3], coordinates[1] = coordinates[1], coordinates[3] } return nil } // coordinatesToAreaRef provides a function to convert a pair of coordinates // to area reference. func (f *File) coordinatesToAreaRef(coordinates []int) (string, error) { if len(coordinates) != 4 { return "", ErrCoordinates } firstCell, err := CoordinatesToCellName(coordinates[0], coordinates[1]) if err != nil { return "", err } lastCell, err := CoordinatesToCellName(coordinates[2], coordinates[3]) if err != nil { return "", err } return firstCell + ":" + lastCell, err } // flatSqref convert reference sequence to cell coordinates list. func (f *File) flatSqref(sqref string) (cells map[int][][]int, err error) { var coordinates []int cells = make(map[int][][]int) for _, ref := range strings.Fields(sqref) { rng := strings.Split(ref, ":") switch len(rng) { case 1: var col, row int col, row, err = CellNameToCoordinates(rng[0]) if err != nil { return } cells[col] = append(cells[col], []int{col, row}) case 2: if coordinates, err = areaRefToCoordinates(ref); err != nil { return } _ = sortCoordinates(coordinates) for c := coordinates[0]; c <= coordinates[2]; c++ { for r := coordinates[1]; r <= coordinates[3]; r++ { cells[c] = append(cells[c], []int{c, r}) } } } } return } // inCoordinates provides a method to check if an coordinate is present in // coordinates array, and return the index of its location, otherwise // return -1. func inCoordinates(a [][]int, x []int) int { for idx, n := range a { if x[0] == n[0] && x[1] == n[1] { return idx } } return -1 } // inStrSlice provides a method to check if an element is present in an array, // and return the index of its location, otherwise return -1. func inStrSlice(a []string, x string) int { for idx, n := range a { if x == n { return idx } } return -1 } // inFloat64Slice provides a method to check if an element is present in an // float64 array, and return the index of its location, otherwise return -1. func inFloat64Slice(a []float64, x float64) int { for idx, n := range a { if x == n { return idx } } return -1 } // boolPtr returns a pointer to a bool with the given value. func boolPtr(b bool) *bool { return &b } // intPtr returns a pointer to a int with the given value. func intPtr(i int) *int { return &i } // float64Ptr returns a pointer to a float64 with the given value. func float64Ptr(f float64) *float64 { return &f } // stringPtr returns a pointer to a string with the given value. func stringPtr(s string) *string { return &s } // defaultTrue returns true if b is nil, or the pointed value. func defaultTrue(b *bool) bool { if b == nil { return true } return *b } // MarshalXML convert the boolean data type to literal values 0 or 1 on // serialization. func (avb attrValBool) MarshalXML(e *xml.Encoder, start xml.StartElement) error { attr := xml.Attr{ Name: xml.Name{ Space: start.Name.Space, Local: "val", }, Value: "0", } if avb.Val != nil { if *avb.Val { attr.Value = "1" } else { attr.Value = "0" } } start.Attr = []xml.Attr{attr} e.EncodeToken(start) e.EncodeToken(start.End()) return nil } // UnmarshalXML convert the literal values true, false, 1, 0 of the XML // attribute to boolean data type on deserialization. func (avb *attrValBool) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { for { t, err := d.Token() if err != nil { return err } found := false switch t.(type) { case xml.StartElement: return ErrAttrValBool case xml.EndElement: found = true } if found { break } } for _, attr := range start.Attr { if attr.Name.Local == "val" { if attr.Value == "" { val := true avb.Val = &val } else { val, err := strconv.ParseBool(attr.Value) if err != nil { return err } avb.Val = &val } return nil } } return nil } // parseFormatSet provides a method to convert format string to []byte and // handle empty string. func parseFormatSet(formatSet string) []byte { if formatSet != "" { return []byte(formatSet) } return []byte("{}") } // namespaceStrictToTransitional provides a method to convert Strict and // Transitional namespaces. func namespaceStrictToTransitional(content []byte) []byte { var namespaceTranslationDic = map[string]string{ StrictSourceRelationship: SourceRelationship.Value, StrictSourceRelationshipOfficeDocument: SourceRelationshipOfficeDocument, StrictSourceRelationshipChart: SourceRelationshipChart, StrictSourceRelationshipComments: SourceRelationshipComments, StrictSourceRelationshipImage: SourceRelationshipImage, StrictNameSpaceSpreadSheet: NameSpaceSpreadSheet.Value, } for s, n := range namespaceTranslationDic { content = bytesReplace(content, []byte(s), []byte(n), -1) } return content } // bytesReplace replace old bytes with given new. func bytesReplace(s, old, new []byte, n int) []byte { if n == 0 { return s } if len(old) < len(new) { return bytes.Replace(s, old, new, n) } if n < 0 { n = len(s) } var wid, i, j, w int for i, j = 0, 0; i < len(s) && j < n; j++ { wid = bytes.Index(s[i:], old) if wid < 0 { break } w += copy(s[w:], s[i:i+wid]) w += copy(s[w:], new) i += wid + len(old) } w += copy(s[w:], s[i:]) return s[:w] } // genSheetPasswd provides a method to generate password for worksheet // protection by given plaintext. When an Excel sheet is being protected with // a password, a 16-bit (two byte) long hash is generated. To verify a // password, it is compared to the hash. Obviously, if the input data volume // is great, numerous passwords will match the same hash. Here is the // algorithm to create the hash value: // // take the ASCII values of all characters shift left the first character 1 bit, // the second 2 bits and so on (use only the lower 15 bits and rotate all higher bits, // the highest bit of the 16-bit value is always 0 [signed short]) // XOR all these values // XOR the count of characters // XOR the constant 0xCE4B func genSheetPasswd(plaintext string) string { var password int64 = 0x0000 var charPos uint = 1 for _, v := range plaintext { value := int64(v) << charPos charPos++ rotatedBits := value >> 15 // rotated bits beyond bit 15 value &= 0x7fff // first 15 bits password ^= (value | rotatedBits) } password ^= int64(len(plaintext)) password ^= 0xCE4B return strings.ToUpper(strconv.FormatInt(password, 16)) } // getRootElement extract root element attributes by given XML decoder. func getRootElement(d *xml.Decoder) []xml.Attr { tokenIdx := 0 for { token, _ := d.Token() if token == nil { break } switch startElement := token.(type) { case xml.StartElement: tokenIdx++ if tokenIdx == 1 { return startElement.Attr } } } return nil } // genXMLNamespace generate serialized XML attributes with a multi namespace // by given element attributes. func genXMLNamespace(attr []xml.Attr) string { var rootElement string for _, v := range attr { if lastSpace := getXMLNamespace(v.Name.Space, attr); lastSpace != "" { if lastSpace == NameSpaceXML { lastSpace = "xml" } rootElement += fmt.Sprintf("%s:%s=\"%s\" ", lastSpace, v.Name.Local, v.Value) continue } rootElement += fmt.Sprintf("%s=\"%s\" ", v.Name.Local, v.Value) } return strings.TrimSpace(rootElement) + ">" } // getXMLNamespace extract XML namespace from specified element name and attributes. func getXMLNamespace(space string, attr []xml.Attr) string { for _, attribute := range attr { if attribute.Value == space { return attribute.Name.Local } } return space } // replaceNameSpaceBytes provides a function to replace the XML root element // attribute by the given component part path and XML content. func (f *File) replaceNameSpaceBytes(path string, contentMarshal []byte) []byte { var oldXmlns = []byte(`xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">`) var newXmlns = []byte(templateNamespaceIDMap) if attr, ok := f.xmlAttr[path]; ok { newXmlns = []byte(genXMLNamespace(attr)) } return bytesReplace(contentMarshal, oldXmlns, newXmlns, -1) } // addNameSpaces provides a function to add a XML attribute by the given // component part path. func (f *File) addNameSpaces(path string, ns xml.Attr) { exist := false mc := false ignore := -1 if attr, ok := f.xmlAttr[path]; ok { for i, attribute := range attr { if attribute.Name.Local == ns.Name.Local && attribute.Name.Space == ns.Name.Space { exist = true } if attribute.Name.Local == "Ignorable" && getXMLNamespace(attribute.Name.Space, attr) == "mc" { ignore = i } if attribute.Name.Local == "mc" && attribute.Name.Space == "xmlns" { mc = true } } } if !exist { f.xmlAttr[path] = append(f.xmlAttr[path], ns) if !mc { f.xmlAttr[path] = append(f.xmlAttr[path], SourceRelationshipCompatibility) } if ignore == -1 { f.xmlAttr[path] = append(f.xmlAttr[path], xml.Attr{ Name: xml.Name{Local: "Ignorable", Space: "mc"}, Value: ns.Name.Local, }) return } f.setIgnorableNameSpace(path, ignore, ns) } } // setIgnorableNameSpace provides a function to set XML namespace as ignorable // by the given attribute. func (f *File) setIgnorableNameSpace(path string, index int, ns xml.Attr) { ignorableNS := []string{"c14", "cdr14", "a14", "pic14", "x14", "xdr14", "x14ac", "dsp", "mso14", "dgm14", "x15", "x12ac", "x15ac", "xr", "xr2", "xr3", "xr4", "xr5", "xr6", "xr7", "xr8", "xr9", "xr10", "xr11", "xr12", "xr13", "xr14", "xr15", "x15", "x16", "x16r2", "mo", "mx", "mv", "o", "v"} if inStrSlice(strings.Fields(f.xmlAttr[path][index].Value), ns.Name.Local) == -1 && inStrSlice(ignorableNS, ns.Name.Local) != -1 { f.xmlAttr[path][index].Value = strings.TrimSpace(fmt.Sprintf("%s %s", f.xmlAttr[path][index].Value, ns.Name.Local)) } } // addSheetNameSpace add XML attribute for worksheet. func (f *File) addSheetNameSpace(sheet string, ns xml.Attr) { name := f.sheetMap[trimSheetName(sheet)] f.addNameSpaces(name, ns) } // isNumeric determines whether an expression is a valid numeric type and get // the precision for the numeric. func isNumeric(s string) (bool, int) { dot := false p := 0 for i, v := range s { if v == '.' { if dot { return false, 0 } dot = true } else if v < '0' || v > '9' { if i == 0 && v == '-' { continue } return false, 0 } else if dot { p++ } } return true, p } var ( bstrExp = regexp.MustCompile(`_x[a-zA-Z\d]{4}_`) bstrEscapeExp = regexp.MustCompile(`x[a-zA-Z\d]{4}_`) ) // bstrUnmarshal parses the binary basic string, this will trim escaped string // literal which not permitted in an XML 1.0 document. The basic string // variant type can store any valid Unicode character. Unicode characters // that cannot be directly represented in XML as defined by the XML 1.0 // specification, shall be escaped using the Unicode numerical character // representation escape character format _xHHHH_, where H represents a // hexadecimal character in the character's value. For example: The Unicode // character 8 is not permitted in an XML 1.0 document, so it shall be // escaped as _x0008_. To store the literal form of an escape sequence, the // initial underscore shall itself be escaped (i.e. stored as _x005F_). For // example: The string literal _x0008_ would be stored as _x005F_x0008_. func bstrUnmarshal(s string) (result string) { matches, l, cursor := bstrExp.FindAllStringSubmatchIndex(s, -1), len(s), 0 for _, match := range matches { result += s[cursor:match[0]] subStr := s[match[0]:match[1]] if subStr == "_x005F_" { cursor = match[1] result += "_" continue } if bstrExp.MatchString(subStr) { cursor = match[1] v, err := strconv.Unquote(`"\u` + s[match[0]+2:match[1]-1] + `"`) if err != nil { if l > match[1]+6 && bstrEscapeExp.MatchString(s[match[1]:match[1]+6]) { result += subStr[:6] cursor = match[1] + 6 continue } result += subStr continue } result += v } } if cursor < l { result += s[cursor:] } return result } // bstrMarshal encode the escaped string literal which not permitted in an XML // 1.0 document. func bstrMarshal(s string) (result string) { matches, l, cursor := bstrExp.FindAllStringSubmatchIndex(s, -1), len(s), 0 for _, match := range matches { result += s[cursor:match[0]] subStr := s[match[0]:match[1]] if subStr == "_x005F_" { cursor = match[1] if match[1]+6 <= l && bstrEscapeExp.MatchString(s[match[1]:match[1]+6]) { _, err := strconv.Unquote(`"\u` + s[match[1]+1:match[1]+5] + `"`) if err == nil { result += subStr + "x005F" + subStr continue } } result += subStr + "x005F_" continue } if bstrExp.MatchString(subStr) { cursor = match[1] _, err := strconv.Unquote(`"\u` + s[match[0]+2:match[1]-1] + `"`) if err == nil { result += "_x005F" + subStr continue } result += subStr } } if cursor < l { result += s[cursor:] } return result } // Stack defined an abstract data type that serves as a collection of elements. type Stack struct { list *list.List } // NewStack create a new stack. func NewStack() *Stack { list := list.New() return &Stack{list} } // Push a value onto the top of the stack. func (stack *Stack) Push(value interface{}) { stack.list.PushBack(value) } // Pop the top item of the stack and return it. func (stack *Stack) Pop() interface{} { e := stack.list.Back() if e != nil { stack.list.Remove(e) return e.Value } return nil } // Peek view the top item on the stack. func (stack *Stack) Peek() interface{} { e := stack.list.Back() if e != nil { return e.Value } return nil } // Len return the number of items in the stack. func (stack *Stack) Len() int { return stack.list.Len() } // Empty the stack. func (stack *Stack) Empty() bool { return stack.list.Len() == 0 }