From d2e521d6aa390e2d6cd5ae988d52606c1301e662 Mon Sep 17 00:00:00 2001 From: kleeon <18507834+kleeon@users.noreply.github.com> Date: Thu, 13 Oct 2022 20:04:29 +0300 Subject: [PATCH 1/2] Fixed issue #13 --- xls/record/sst.go | 17 +++++++++++++++-- xls/workbook.go | 3 ++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/xls/record/sst.go b/xls/record/sst.go index f93858c..3d6a729 100644 --- a/xls/record/sst.go +++ b/xls/record/sst.go @@ -31,6 +31,13 @@ type SST struct { Rgb []structure.XLUnicodeRichExtendedString chLen int ByteLen int + + // These are needed to properly handle CONTINUE records. + // CONTINUE record contains grbit in the first byte unless it's a formatting run + // so we need to know whether all the string bytes have been consumed. + // OpenOffice.org - Microsoft Excel File Format - section 5.21 + RgbDone bool + Grbit byte } func (s *SST) RgbAppend(bts []byte) (err error) { @@ -80,9 +87,15 @@ func (s *SST) Read(readType string, grbit byte, prevLen int32) () { readType = "" - if cch >= (len(s.RgbSrc)-3)/(1+int(grbit&1)) || s.ByteLen > 0 { + s.Grbit = grbit + + headLen := 3 + headLen += int(grbit>>2&1) * 4 + headLen += int(grbit>>3&1) * 2 + + if cch >= (len(s.RgbSrc)-headLen)/(1+int(grbit&1)) || s.ByteLen > 0 { - addBytesLen := (len(s.RgbSrc) - 3) - s.ByteLen + addBytesLen := (len(s.RgbSrc) - headLen) - s.ByteLen if cch-s.chLen > addBytesLen/(1+int(grbit&1)) { s.chLen = s.chLen + addBytesLen/(1+int(grbit&1)) diff --git a/xls/workbook.go b/xls/workbook.go index f0d8265..af711ed 100644 --- a/xls/workbook.go +++ b/xls/workbook.go @@ -104,13 +104,14 @@ Next: if SSTContinue { readType = "continue" - if len(wb.sst.RgbSrc) == 0 { + if len(wb.sst.RgbSrc) == 0 || wb.sst.RgbDone { grbitOffset = 0 } else { grbitOffset = 1 } grbit = stream[sPoint] + grbit |= wb.sst.Grbit & 0b1100 wb.sst.RgbSrc = append(wb.sst.RgbSrc, stream[sPoint+grbitOffset:sPoint+recordDataLength]...) wb.sst.Read(readType, grbit, prevLen) From 55e93970380660eff66121efd522848277619133 Mon Sep 17 00:00:00 2001 From: kleeon <18507834+kleeon@users.noreply.github.com> Date: Tue, 21 Feb 2023 17:57:59 +0300 Subject: [PATCH 2/2] Fixed issue #18 --- cfb/cfb.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/cfb/cfb.go b/cfb/cfb.go index af15b50..0a1cc24 100644 --- a/cfb/cfb.go +++ b/cfb/cfb.go @@ -3,10 +3,11 @@ package cfb import ( "bytes" "encoding/binary" - "github.com/shakinm/xlsReader/helpers" "io" "os" "path/filepath" + + "github.com/shakinm/xlsReader/helpers" ) // Cfb - Compound File Binary @@ -227,14 +228,16 @@ func (cfb *Cfb) getFatSectors() (err error) { // nolint: gocyclo } func (cfb *Cfb) getDataFromMiniFat(miniFatSectorLocation uint32, offset uint32) (data []byte, err error) { - sPoint := cfb.sectorOffset(miniFatSectorLocation) - point := sPoint + cfb.calculateMiniFatOffset(offset) + point := cfb.calculateMiniFatOffset(offset) + + containerStreamBytes, _ := cfb.getDataFromFatChain(miniFatSectorLocation) + containerStream := bytes.NewReader(containerStreamBytes) for { sector := NewMiniFatSector(&cfb.header) - err = cfb.getData(point, §or.Data) + _, err := containerStream.ReadAt(sector.Data, int64(point)) if err != nil { return data, err @@ -248,7 +251,7 @@ func (cfb *Cfb) getDataFromMiniFat(miniFatSectorLocation uint32, offset uint32) offset = cfb.miniFatPositions[offset] - point = sPoint + cfb.calculateMiniFatOffset(offset) + point = cfb.calculateMiniFatOffset(offset) }