From 6ef8b015ccbc151e355ef5dfff1b903b719c2cfc Mon Sep 17 00:00:00 2001 From: Will Scott Date: Mon, 7 Nov 2022 13:53:38 +0100 Subject: [PATCH 1/8] Add a debugging form for car files. This change adds two new sub-commands to the car CLI car debug file.car creates a patch-file-compatible representation of the content of the car file. Blocks will be represented in dag-json pretty-printed form. car compile file.patch will do the inverse process of building a car file from a debug patch file. CIDs will be re-compiled based on the contents of blocks, with links in parent blocks updated to point to the compiled values. --- cmd/car/car.go | 26 ++ cmd/car/compile.go | 438 ++++++++++++++++++++++++++++ cmd/car/testdata/script/compile.txt | 28 ++ 3 files changed, 492 insertions(+) create mode 100644 cmd/car/compile.go create mode 100644 cmd/car/testdata/script/compile.txt diff --git a/cmd/car/car.go b/cmd/car/car.go index 9957c8a5..c66232f7 100644 --- a/cmd/car/car.go +++ b/cmd/car/car.go @@ -15,6 +15,19 @@ func main1() int { Name: "car", Usage: "Utility for working with car files", Commands: []*cli.Command{ + { + Name: "compile", + Usage: "compile a car file from a debug patch", + Action: CompileCar, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "output", + Aliases: []string{"o", "f"}, + Usage: "The file to write to", + TakesFile: true, + }, + }, + }, { Name: "create", Usage: "Create a car file", @@ -34,6 +47,19 @@ func main1() int { }, }, }, + { + Name: "debug", + Usage: "debug a car file", + Action: DebugCar, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "output", + Aliases: []string{"o", "f"}, + Usage: "The file to write to", + TakesFile: true, + }, + }, + }, { Name: "detach-index", Usage: "Detach an index to a detached file", diff --git a/cmd/car/compile.go b/cmd/car/compile.go new file mode 100644 index 00000000..3b381ad6 --- /dev/null +++ b/cmd/car/compile.go @@ -0,0 +1,438 @@ +package main + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "os" + "regexp" + "strings" + "unicode/utf8" + + blocks "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + carv1 "github.com/ipld/go-car" + "github.com/ipld/go-car/util" + carv2 "github.com/ipld/go-car/v2" + "github.com/ipld/go-car/v2/blockstore" + "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/codec/dagjson" + "github.com/ipld/go-ipld-prime/datamodel" + "github.com/ipld/go-ipld-prime/linking" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/ipld/go-ipld-prime/node/basicnode" + "github.com/ipld/go-ipld-prime/storage/memstore" + "github.com/urfave/cli/v2" +) + +// Compile is a command to translate between a human-debuggable patch-like format and a car file. +func CompileCar(c *cli.Context) error { + var err error + inStream := os.Stdin + if c.Args().Len() >= 1 { + inStream, err = os.Open(c.Args().First()) + if err != nil { + return err + } + } + + //parse headers. + br := bufio.NewReader(inStream) + header, _, err := br.ReadLine() + if err != nil { + return err + } + + v2 := strings.Contains(string(header), "--v2") + trimH := strings.TrimSpace(string(header)) + headerParts := strings.Split(trimH, " ") + carName := headerParts[len(headerParts)-1] + + roots := make([]cid.Cid, 0) + for { + peek, err := br.Peek(4) + if err == io.EOF { + break + } else if err != nil { + return err + } + if bytes.Equal(peek, []byte("--- ")) { + break + } + rootLine, _, err := br.ReadLine() + if err != nil { + return err + } + if strings.HasPrefix(string(rootLine), "root ") { + var rCidS string + fmt.Sscanf(string(rootLine), "root %s", &rCidS) + rCid, err := cid.Parse(rCidS) + if err != nil { + return err + } + roots = append(roots, rCid) + } + } + + //parse blocks. + rawBlocks := make(map[cid.Cid][]byte) + rawCodecs := make(map[cid.Cid]string) + + for { + nextCid, mode, nextBlk, err := parsePatch(br) + if err == io.EOF { + break + } else if err != nil { + return err + } + rawBlocks[nextCid] = nextBlk + rawCodecs[nextCid] = mode + } + + fmt.Printf("structuring as tree...\n") + // structure as a tree + childMap := make(map[cid.Cid][]cid.Cid) + for c := range rawBlocks { + if _, ok := childMap[c]; !ok { + childMap[c] = make([]cid.Cid, 0) + } + for d, blk := range rawBlocks { + if c.Equals(d) { + continue + } + if strings.Contains(string(blk), c.String()) { + if _, ok := childMap[d]; !ok { + childMap[d] = make([]cid.Cid, 0) + } + childMap[d] = append(childMap[d], c) + } else if strings.Contains(string(blk), string(c.Bytes())) { + if _, ok := childMap[d]; !ok { + childMap[d] = make([]cid.Cid, 0) + } + childMap[d] = append(childMap[d], c) + } + } + } + + fmt.Printf("rebuilding...\n") + // re-parse/re-build CIDs + outBlocks := make(map[cid.Cid][]byte) + for len(childMap) > 0 { + for origCid, kids := range childMap { + if len(kids) == 0 { + // compile to final cid + blk := rawBlocks[origCid] + finalCid, finalBlk, err := serializeBlock(c.Context, origCid.Prefix(), rawCodecs[origCid], blk) + if err != nil { + return err + } + outBlocks[finalCid] = finalBlk + + // update other remaining nodes of the new cid. + for otherCid, otherKids := range childMap { + for i, otherKid := range otherKids { + if otherKid.Equals(origCid) { + if !finalCid.Equals(origCid) { + // update block + rawBlocks[otherCid] = bytes.ReplaceAll(rawBlocks[otherCid], origCid.Bytes(), finalCid.Bytes()) + rawBlocks[otherCid] = bytes.ReplaceAll(rawBlocks[otherCid], []byte(origCid.String()), []byte(finalCid.String())) + } + // remove from childMap + nok := append(otherKids[0:i], otherKids[i+1:]...) + childMap[otherCid] = nok + break // to next child map entry. + } + } + } + + delete(childMap, origCid) + } + } + } + + if !v2 { + // write output + outStream := os.Stdout + if c.IsSet("output") { + outFileName := c.String("output") + if outFileName == "" { + outFileName = carName + } + outFile, err := os.Create(outFileName) + if err != nil { + return err + } + defer outFile.Close() + outStream = outFile + } + + if err := carv1.WriteHeader(&carv1.CarHeader{ + Roots: roots, + Version: 1, + }, outStream); err != nil { + return err + } + for c, blk := range outBlocks { + if err := util.LdWrite(outStream, c.Bytes(), blk); err != nil { + return err + } + } + } else { + outFileName := c.String("output") + if outFileName == "" { + outFileName = carName + } + + if outFileName == "-" && !c.IsSet("output") { + return fmt.Errorf("cannot stream carv2's to stdout") + } + bs, err := blockstore.OpenReadWrite(outFileName, roots) + if err != nil { + return err + } + for bc, blk := range outBlocks { + ob, _ := blocks.NewBlockWithCid(blk, bc) + bs.Put(c.Context, ob) + } + return bs.Finalize() + } + + return nil +} + +func serializeBlock(ctx context.Context, codec cid.Prefix, encoding string, raw []byte) (cid.Cid, []byte, error) { + ls := cidlink.DefaultLinkSystem() + store := memstore.Store{Bag: map[string][]byte{}} + ls.SetReadStorage(&store) + ls.SetWriteStorage(&store) + b := basicnode.Prototype.Any.NewBuilder() + if encoding == "json" { + if err := dagjson.Decode(b, bytes.NewBuffer(raw)); err != nil { + return cid.Undef, nil, err + } + } else if encoding == "raw" { + if err := b.AssignBytes(raw); err != nil { + return cid.Undef, nil, err + } + } else { + return cid.Undef, nil, fmt.Errorf("unknown encoding: %s", encoding) + } + lnk, err := ls.Store(linking.LinkContext{Ctx: ctx}, cidlink.LinkPrototype{Prefix: codec}, b.Build()) + if err != nil { + return cid.Undef, nil, err + } + outCid := lnk.(cidlink.Link).Cid + outBytes, outErr := store.Get(ctx, outCid.KeyString()) + return outCid, outBytes, outErr +} + +// DebugCar is a command to translate between a car file, and a human-debuggable patch-like format. +func DebugCar(c *cli.Context) error { + var err error + inStream := os.Stdin + inFile := "-" + if c.Args().Len() >= 1 { + inFile = c.Args().First() + inStream, err = os.Open(inFile) + if err != nil { + return err + } + } + + rd, err := carv2.NewBlockReader(inStream) + if err != nil { + return err + } + + // patch the header. + outStream := os.Stdout + if c.IsSet("output") { + outFileName := c.String("output") + outFile, err := os.Create(outFileName) + if err != nil { + return err + } + defer outFile.Close() + outStream = outFile + } + + outStream.WriteString("car compile ") + if rd.Version == 2 { + outStream.WriteString("--v2 ") + } + outStream.WriteString(inFile + "\r\n") + for _, rt := range rd.Roots { + outStream.WriteString("root " + rt.String() + "\r\n") + } + + // patch each block. + nxt, err := rd.Next() + if err != nil { + return err + } + for nxt != nil { + chunk, err := patch(c.Context, nxt.Cid(), nxt.RawData()) + if err != nil { + return err + } + outStream.Write(chunk) + + nxt, err = rd.Next() + if err == io.EOF { + return nil + } + } + + return nil +} + +func patch(ctx context.Context, c cid.Cid, blk []byte) ([]byte, error) { + ls := cidlink.DefaultLinkSystem() + store := memstore.Store{Bag: map[string][]byte{}} + ls.SetReadStorage(&store) + ls.SetWriteStorage(&store) + store.Put(ctx, c.KeyString(), blk) + node, err := ls.Load(linking.LinkContext{Ctx: ctx}, cidlink.Link{Cid: c}, basicnode.Prototype.Any) + if err != nil { + return nil, fmt.Errorf("could not load block: %q", err) + } + + outMode := "json" + if node.Kind() == datamodel.Kind_Bytes && isPrintable(node) { + outMode = "raw" + } + finalBuf := bytes.NewBuffer(nil) + + if outMode == "json" { + + initalJson := bytes.NewBuffer(nil) + if err := dagjson.Encode(node, initalJson); err != nil { + return nil, err + } + // re-do it with standard json to pretty print it. + if err := json.Indent(finalBuf, initalJson.Bytes(), "", " "); err != nil { + return nil, err + } + } else if outMode == "raw" { + nb, err := node.AsBytes() + if err != nil { + return nil, err + } + finalBuf.Write(nb) + fmt.Printf("for raw: bytes are %x\n", finalBuf.Bytes()) + } + + // figure out number of lines. + lcnt := strings.Count(finalBuf.String(), "\n") + crStr := " (no-end-cr)" + if finalBuf.Bytes()[len(finalBuf.Bytes())-1] == '\n' { + crStr = "" + } + + outBuf := bytes.NewBuffer(nil) + outBuf.WriteString("--- " + c.String() + "\r\n") + outBuf.WriteString("+++ " + outMode + crStr + " " + c.String() + "\r\n") + outBuf.WriteString(fmt.Sprintf("@@ -%d,%d +%d,%d @@\r\n", 0, lcnt, 0, lcnt)) + outBuf.Write(finalBuf.Bytes()) + outBuf.WriteString("\r\n") + return outBuf.Bytes(), nil +} + +func isPrintable(n ipld.Node) bool { + b, err := n.AsBytes() + if err != nil { + return false + } + if !utf8.Valid(b) { + return false + } + if bytes.ContainsAny(b, string([]byte{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x10, 0x11, 0x12, 0x13, 0x14, 0x16, 0x17, 0x18, 0x19, 0x1c, 0x1d, 0x1e, 0x1f})) { + return false + } + return true +} + +func parsePatch(br *bufio.Reader) (cid.Cid, string, []byte, error) { + // read initial line to parse CID. + l1, isPrefix, err := br.ReadLine() + if err != nil { + return cid.Undef, "", nil, err + } + if isPrefix { + return cid.Undef, "", nil, fmt.Errorf("unexpected long header l1") + } + var cs string + if _, err := fmt.Sscanf(string(l1), "--- %s", &cs); err != nil { + return cid.Undef, "", nil, fmt.Errorf("could not parse patch cid line (%s): %q", l1, err) + } + l2, isPrefix, err := br.ReadLine() + if err != nil { + return cid.Undef, "", nil, err + } + if isPrefix { + return cid.Undef, "", nil, fmt.Errorf("unexpected long header l2") + } + var mode string + var noEndReturn bool + r := regexp.MustCompile(`^\+\+\+ ([\w]+) ([\S]+ )?([\w]+)$`) + matches := r.FindSubmatch(l2) + if len(matches) >= 2 { + mode = string(matches[1]) + } + if len(matches) < 2 || string(matches[len(matches)-1]) != cs { + return cid.Undef, "", nil, fmt.Errorf("mismatched cid lines") + } + if len(matches[2]) > 0 { + noEndReturn = (string(matches[2]) == "(no-end-cr) ") + } + c, err := cid.Parse(cs) + if err != nil { + return cid.Undef, "", nil, err + } + + // skip over @@ line. + l3, isPrefix, err := br.ReadLine() + if err != nil { + return cid.Undef, "", nil, err + } + if isPrefix { + return cid.Undef, "", nil, fmt.Errorf("unexpected long header l3") + } + if !strings.HasPrefix(string(l3), "@@") { + return cid.Undef, "", nil, fmt.Errorf("unexpected missing chunk prefix") + } + + // keep going until next chunk or end. + outBuf := bytes.NewBuffer(nil) + for { + peek, err := br.Peek(4) + if err != nil && err != io.EOF { + return cid.Undef, "", nil, err + } + if bytes.Equal(peek, []byte("--- ")) { + break + } + // accumulate to buffer. + l, err := br.ReadBytes('\n') + outBuf.Write(l) + if err == io.EOF { + break + } else if err != nil { + return cid.Undef, "", nil, err + } + } + + // remove the final line return + ob := outBuf.Bytes() + if len(ob) > 2 && bytes.Equal(ob[len(ob)-2:], []byte("\r\n")) { + ob = ob[:len(ob)-2] + } + if noEndReturn && len(ob) > 2 && bytes.Equal(ob[len(ob)-2:], []byte("\r\n")) { + ob = ob[:len(ob)-2] + } + + return c, mode, ob, nil +} diff --git a/cmd/car/testdata/script/compile.txt b/cmd/car/testdata/script/compile.txt new file mode 100644 index 00000000..4607ca6c --- /dev/null +++ b/cmd/car/testdata/script/compile.txt @@ -0,0 +1,28 @@ +# debug a car to patch +car debug -o out.patch ${INPUTS}/sample-v1.car +! stderr . +grep -count=1049 \+\+\+ out.patch + +# recompile to binary +car compile -o out.car out.patch +! stderr . + +# should have same blocks as it started with. +car ls out.car +stdout -count=1043 '^bafy' +stdout -count=6 '^bafk' + +# make a small car +car create --file=small.car foo.txt + +car debug -o small.patch small.car +! stderr . + +car compile -o new.car small.patch +! stderr . + +# confirm roundtrip is stable. +cmp small.car new.car + +-- foo.txt -- +hello world \ No newline at end of file From 2bc73b2e3d4a461dfe9e26afc5118a7945601ef7 Mon Sep 17 00:00:00 2001 From: Will Scott Date: Mon, 14 Nov 2022 12:19:36 +0100 Subject: [PATCH 2/8] clean newline behavior a bit --- cmd/car/compile.go | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/cmd/car/compile.go b/cmd/car/compile.go index 3b381ad6..5128be6f 100644 --- a/cmd/car/compile.go +++ b/cmd/car/compile.go @@ -263,9 +263,10 @@ func DebugCar(c *cli.Context) error { if rd.Version == 2 { outStream.WriteString("--v2 ") } - outStream.WriteString(inFile + "\r\n") + + outStream.WriteString(inFile + "\n") for _, rt := range rd.Roots { - outStream.WriteString("root " + rt.String() + "\r\n") + fmt.Fprintf(outStream, "root %s\n", rt.String()) } // patch each block. @@ -322,7 +323,6 @@ func patch(ctx context.Context, c cid.Cid, blk []byte) ([]byte, error) { return nil, err } finalBuf.Write(nb) - fmt.Printf("for raw: bytes are %x\n", finalBuf.Bytes()) } // figure out number of lines. @@ -333,11 +333,11 @@ func patch(ctx context.Context, c cid.Cid, blk []byte) ([]byte, error) { } outBuf := bytes.NewBuffer(nil) - outBuf.WriteString("--- " + c.String() + "\r\n") - outBuf.WriteString("+++ " + outMode + crStr + " " + c.String() + "\r\n") - outBuf.WriteString(fmt.Sprintf("@@ -%d,%d +%d,%d @@\r\n", 0, lcnt, 0, lcnt)) + outBuf.WriteString("--- " + c.String() + "\n") + outBuf.WriteString("+++ " + outMode + crStr + " " + c.String() + "\n") + outBuf.WriteString(fmt.Sprintf("@@ -%d,%d +%d,%d @@\n", 0, lcnt, 0, lcnt)) outBuf.Write(finalBuf.Bytes()) - outBuf.WriteString("\r\n") + outBuf.WriteString("\n") return outBuf.Bytes(), nil } @@ -427,11 +427,12 @@ func parsePatch(br *bufio.Reader) (cid.Cid, string, []byte, error) { // remove the final line return ob := outBuf.Bytes() - if len(ob) > 2 && bytes.Equal(ob[len(ob)-2:], []byte("\r\n")) { - ob = ob[:len(ob)-2] + if len(ob) > 1 && bytes.Equal(ob[len(ob)-1:], []byte("\n")) { + ob = ob[:len(ob)-1] } - if noEndReturn && len(ob) > 2 && bytes.Equal(ob[len(ob)-2:], []byte("\r\n")) { - ob = ob[:len(ob)-2] + + if noEndReturn && len(ob) > 1 && bytes.Equal(ob[len(ob)-1:], []byte("\n")) { + ob = ob[:len(ob)-1] } return c, mode, ob, nil From 0380aab1d57baf460ecb0fde7721811d122d6986 Mon Sep 17 00:00:00 2001 From: Will Scott Date: Mon, 14 Nov 2022 12:38:09 +0100 Subject: [PATCH 3/8] continue to fiddle with line endings --- cmd/car/compile.go | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/cmd/car/compile.go b/cmd/car/compile.go index 5128be6f..65ac4114 100644 --- a/cmd/car/compile.go +++ b/cmd/car/compile.go @@ -92,7 +92,7 @@ func CompileCar(c *cli.Context) error { rawCodecs[nextCid] = mode } - fmt.Printf("structuring as tree...\n") + //fmt.Printf("structuring as tree...\n") // structure as a tree childMap := make(map[cid.Cid][]cid.Cid) for c := range rawBlocks { @@ -117,7 +117,7 @@ func CompileCar(c *cli.Context) error { } } - fmt.Printf("rebuilding...\n") + //fmt.Printf("rebuilding...\n") // re-parse/re-build CIDs outBlocks := make(map[cid.Cid][]byte) for len(childMap) > 0 { @@ -417,7 +417,9 @@ func parsePatch(br *bufio.Reader) (cid.Cid, string, []byte, error) { } // accumulate to buffer. l, err := br.ReadBytes('\n') - outBuf.Write(l) + if l != nil { + outBuf.Write(l) + } if err == io.EOF { break } else if err != nil { @@ -427,11 +429,16 @@ func parsePatch(br *bufio.Reader) (cid.Cid, string, []byte, error) { // remove the final line return ob := outBuf.Bytes() - if len(ob) > 1 && bytes.Equal(ob[len(ob)-1:], []byte("\n")) { + + if len(ob) > 2 && bytes.Equal(ob[len(ob)-2:], []byte("\r\n")) { + ob = ob[:len(ob)-2] + } else if len(ob) > 1 && bytes.Equal(ob[len(ob)-1:], []byte("\n")) { ob = ob[:len(ob)-1] } - if noEndReturn && len(ob) > 1 && bytes.Equal(ob[len(ob)-1:], []byte("\n")) { + if noEndReturn && len(ob) > 2 && bytes.Equal(ob[len(ob)-2:], []byte("\r\n")) { + ob = ob[:len(ob)-2] + } else if noEndReturn && len(ob) > 1 && bytes.Equal(ob[len(ob)-1:], []byte("\n")) { ob = ob[:len(ob)-1] } From 3b133b345f8dcae0542fda188f866b7c08e52f9e Mon Sep 17 00:00:00 2001 From: Will Scott Date: Wed, 16 Nov 2022 13:32:37 +0100 Subject: [PATCH 4/8] code review updates * add check for bytes not containing end-of-patch sequence --- cmd/car/compile.go | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/cmd/car/compile.go b/cmd/car/compile.go index 65ac4114..4431718f 100644 --- a/cmd/car/compile.go +++ b/cmd/car/compile.go @@ -4,7 +4,6 @@ import ( "bufio" "bytes" "context" - "encoding/json" "fmt" "io" "os" @@ -19,15 +18,21 @@ import ( carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/blockstore" "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/codec" "github.com/ipld/go-ipld-prime/codec/dagjson" "github.com/ipld/go-ipld-prime/datamodel" "github.com/ipld/go-ipld-prime/linking" cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/ipld/go-ipld-prime/node/basicnode" "github.com/ipld/go-ipld-prime/storage/memstore" + "github.com/polydawn/refmt/json" "github.com/urfave/cli/v2" ) +var ( + plusLineRegex = regexp.MustCompile(`^\+\+\+ ([\w-]+) ([\S]+ )?([\w]+)$`) +) + // Compile is a command to translate between a human-debuggable patch-like format and a car file. func CompileCar(c *cli.Context) error { var err error @@ -46,10 +51,12 @@ func CompileCar(c *cli.Context) error { return err } - v2 := strings.Contains(string(header), "--v2") - trimH := strings.TrimSpace(string(header)) - headerParts := strings.Split(trimH, " ") - carName := headerParts[len(headerParts)-1] + v2 := strings.HasPrefix(string(header), "car compile --v2 ") + rest := strings.TrimPrefix(string(header), "car compile ") + if v2 { + rest = strings.TrimPrefix(rest, "--v2 ") + } + carName := strings.TrimSpace(rest) roots := make([]cid.Cid, 0) for { @@ -209,7 +216,7 @@ func serializeBlock(ctx context.Context, codec cid.Prefix, encoding string, raw ls.SetReadStorage(&store) ls.SetWriteStorage(&store) b := basicnode.Prototype.Any.NewBuilder() - if encoding == "json" { + if encoding == "dag-json" { if err := dagjson.Decode(b, bytes.NewBuffer(raw)); err != nil { return cid.Undef, nil, err } @@ -301,20 +308,19 @@ func patch(ctx context.Context, c cid.Cid, blk []byte) ([]byte, error) { return nil, fmt.Errorf("could not load block: %q", err) } - outMode := "json" + outMode := "dag-json" if node.Kind() == datamodel.Kind_Bytes && isPrintable(node) { outMode = "raw" } finalBuf := bytes.NewBuffer(nil) - if outMode == "json" { - - initalJson := bytes.NewBuffer(nil) - if err := dagjson.Encode(node, initalJson); err != nil { - return nil, err + if outMode == "dag-json" { + opts := dagjson.EncodeOptions{ + EncodeLinks: true, + EncodeBytes: true, + MapSortMode: codec.MapSortMode_Lexical, } - // re-do it with standard json to pretty print it. - if err := json.Indent(finalBuf, initalJson.Bytes(), "", " "); err != nil { + if err := dagjson.Marshal(node, json.NewEncoder(finalBuf, json.EncodeOptions{Line: []byte{'\n'}, Indent: []byte{'\t'}}), opts); err != nil { return nil, err } } else if outMode == "raw" { @@ -352,6 +358,10 @@ func isPrintable(n ipld.Node) bool { if bytes.ContainsAny(b, string([]byte{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x10, 0x11, 0x12, 0x13, 0x14, 0x16, 0x17, 0x18, 0x19, 0x1c, 0x1d, 0x1e, 0x1f})) { return false } + // check if would confuse the 'end of patch' checker. + if bytes.Contains(b, []byte("\n--- ")) { + return false + } return true } @@ -377,13 +387,12 @@ func parsePatch(br *bufio.Reader) (cid.Cid, string, []byte, error) { } var mode string var noEndReturn bool - r := regexp.MustCompile(`^\+\+\+ ([\w]+) ([\S]+ )?([\w]+)$`) - matches := r.FindSubmatch(l2) + matches := plusLineRegex.FindSubmatch(l2) if len(matches) >= 2 { mode = string(matches[1]) } if len(matches) < 2 || string(matches[len(matches)-1]) != cs { - return cid.Undef, "", nil, fmt.Errorf("mismatched cid lines") + return cid.Undef, "", nil, fmt.Errorf("mismatched cid lines: %v", string(l2)) } if len(matches[2]) > 0 { noEndReturn = (string(matches[2]) == "(no-end-cr) ") @@ -427,9 +436,9 @@ func parsePatch(br *bufio.Reader) (cid.Cid, string, []byte, error) { } } - // remove the final line return ob := outBuf.Bytes() + // remove the final line return if len(ob) > 2 && bytes.Equal(ob[len(ob)-2:], []byte("\r\n")) { ob = ob[:len(ob)-2] } else if len(ob) > 1 && bytes.Equal(ob[len(ob)-1:], []byte("\n")) { From fb694797fc5b5e423c0cca1c3c619f771ff9d2c1 Mon Sep 17 00:00:00 2001 From: Will Scott Date: Wed, 16 Nov 2022 13:35:16 +0100 Subject: [PATCH 5/8] mod tidy --- cmd/go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/go.mod b/cmd/go.mod index b76f64fd..7ca1f1ca 100644 --- a/cmd/go.mod +++ b/cmd/go.mod @@ -15,6 +15,7 @@ require ( github.com/multiformats/go-multicodec v0.5.0 github.com/multiformats/go-multihash v0.2.0 github.com/multiformats/go-varint v0.0.6 + github.com/polydawn/refmt v0.0.0-20201211092308-30ac6d18308e github.com/rogpeppe/go-internal v1.8.1 github.com/urfave/cli/v2 v2.10.3 ) @@ -53,7 +54,6 @@ require ( github.com/opentracing/opentracing-go v1.2.0 // indirect github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e // indirect - github.com/polydawn/refmt v0.0.0-20201211092308-30ac6d18308e // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 // indirect From fdb958109d5a50c5b1e0549bce57b76873a6add4 Mon Sep 17 00:00:00 2001 From: Will Scott Date: Thu, 17 Nov 2022 10:36:55 +0100 Subject: [PATCH 6/8] stable map iteration --- cmd/car/compile.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cmd/car/compile.go b/cmd/car/compile.go index 4431718f..82518846 100644 --- a/cmd/car/compile.go +++ b/cmd/car/compile.go @@ -27,6 +27,7 @@ import ( "github.com/ipld/go-ipld-prime/storage/memstore" "github.com/polydawn/refmt/json" "github.com/urfave/cli/v2" + "golang.org/x/exp/slices" ) var ( @@ -85,6 +86,7 @@ func CompileCar(c *cli.Context) error { } //parse blocks. + cidList := make([]cid.Cid, 0) rawBlocks := make(map[cid.Cid][]byte) rawCodecs := make(map[cid.Cid]string) @@ -97,6 +99,7 @@ func CompileCar(c *cli.Context) error { } rawBlocks[nextCid] = nextBlk rawCodecs[nextCid] = mode + cidList = append(cidList, nextCid) } //fmt.Printf("structuring as tree...\n") @@ -137,6 +140,8 @@ func CompileCar(c *cli.Context) error { return err } outBlocks[finalCid] = finalBlk + idx := slices.Index(cidList, origCid) + cidList[idx] = finalCid // update other remaining nodes of the new cid. for otherCid, otherKids := range childMap { @@ -200,7 +205,8 @@ func CompileCar(c *cli.Context) error { if err != nil { return err } - for bc, blk := range outBlocks { + for _, bc := range cidList { + blk := outBlocks[bc] ob, _ := blocks.NewBlockWithCid(blk, bc) bs.Put(c.Context, ob) } From a2c9f855bacbdf43804681c37c9768c04b38b1f1 Mon Sep 17 00:00:00 2001 From: Will Scott Date: Thu, 17 Nov 2022 10:39:21 +0100 Subject: [PATCH 7/8] tidy --- cmd/go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/go.mod b/cmd/go.mod index 7ca1f1ca..94618943 100644 --- a/cmd/go.mod +++ b/cmd/go.mod @@ -18,6 +18,7 @@ require ( github.com/polydawn/refmt v0.0.0-20201211092308-30ac6d18308e github.com/rogpeppe/go-internal v1.8.1 github.com/urfave/cli/v2 v2.10.3 + golang.org/x/exp v0.0.0-20220613132600-b0d781184e0d ) require ( @@ -64,7 +65,6 @@ require ( go.uber.org/multierr v1.8.0 // indirect go.uber.org/zap v1.21.0 // indirect golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d // indirect - golang.org/x/exp v0.0.0-20220613132600-b0d781184e0d // indirect golang.org/x/sys v0.0.0-20220627191245-f75cf1eec38b // indirect golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f // indirect google.golang.org/protobuf v1.28.0 // indirect From af90d85928588893a32688bcfda9f496f2e657f3 Mon Sep 17 00:00:00 2001 From: Will Date: Fri, 18 Nov 2022 09:21:07 +0000 Subject: [PATCH 8/8] Apply suggestions from code review Co-authored-by: Rod Vagg --- cmd/car/compile.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cmd/car/compile.go b/cmd/car/compile.go index 82518846..0a74d1c7 100644 --- a/cmd/car/compile.go +++ b/cmd/car/compile.go @@ -102,7 +102,10 @@ func CompileCar(c *cli.Context) error { cidList = append(cidList, nextCid) } - //fmt.Printf("structuring as tree...\n") + // Re-create the original IPLD encoded blocks, but allowing for modifications of the + // patch data which may generate new CIDs; so we track the DAG relationships and + // rewrite CIDs in other referring where they get updated. + // structure as a tree childMap := make(map[cid.Cid][]cid.Cid) for c := range rawBlocks { @@ -127,7 +130,6 @@ func CompileCar(c *cli.Context) error { } } - //fmt.Printf("rebuilding...\n") // re-parse/re-build CIDs outBlocks := make(map[cid.Cid][]byte) for len(childMap) > 0 {