From 4be09fd40734430a8f366893ef3a1f0fb80f3889 Mon Sep 17 00:00:00 2001 From: Ryan Gonzalez Date: Tue, 5 Sep 2023 15:22:24 -0500 Subject: [PATCH 1/7] Use github.com/saracen/walker for file walk operations In some local tests w/ a slowed down filesystem, this massively cut down on the time to clean up a repository by ~3x, bringing a total 'publish update' time from ~16s to ~13s. Signed-off-by: Ryan Gonzalez --- api/files.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/api/files.go b/api/files.go index 2d042a5d7a..f848d8882c 100644 --- a/api/files.go +++ b/api/files.go @@ -170,11 +170,7 @@ func apiFilesListFiles(c *gin.Context) { listLock := &sync.Mutex{} root := filepath.Join(context.UploadPath(), utils.SanitizePath(c.Params.ByName("dir"))) - err := filepath.Walk(root, func(path string, _ os.FileInfo, err error) error { - if err != nil { - return err - } - + err := walker.Walk(root, func(path string, info os.FileInfo) error { if path == root { return nil } From 19a705f80d1a2c354543bb8dce879ffcbdca78d1 Mon Sep 17 00:00:00 2001 From: Ryan Gonzalez Date: Fri, 10 Nov 2023 17:01:16 -0600 Subject: [PATCH 2/7] Split reflists to share their contents across snapshots In current aptly, each repository and snapshot has its own reflist in the database. This brings a few problems with it: - Given a sufficiently large repositories and snapshots, these lists can get enormous, reaching >1MB. This is a problem for LevelDB's overall performance, as it tends to prefer values around the confiruged block size (defaults to just 4KiB). - When you take these large repositories and snapshot them, you have a full, new copy of the reflist, even if only a few packages changed. This means that having a lot of snapshots with a few changes causes the database to basically be full of largely duplicate reflists. - All the duplication also means that many of the same refs are being loaded repeatedly, which can cause some slowdown but, more notably, eats up huge amounts of memory. - Adding on more and more new repositories and snapshots will cause the time and memory spent on things like cleanup and publishing to grow roughly linearly. At the core, there are two problems here: - Reflists get very big because there are just a lot of packages. - Different reflists can tend to duplicate much of the same contents. *Split reflists* aim at solving this by separating reflists into 64 *buckets*. Package refs are sorted into individual buckets according to the following system: - Take the first 3 letters of the package name, after dropping a `lib` prefix. (Using only the first 3 letters will cause packages with similar prefixes to end up in the same bucket, under the assumption that packages with similar names tend to be updated together.) - Take the 64-bit xxhash of these letters. (xxhash was chosen because it relatively good distribution across the individual bits, which is important for the next step.) - Use the first 6 bits of the hash (range [0:63]) as an index into the buckets. Once refs are placed in buckets, a sha256 digest of all the refs in the bucket is taken. These buckets are then stored in the database, split into roughly block-sized segments, and all the repositories and snapshots simply store an array of bucket digests. This approach means that *repositories and snapshots can share their reflist buckets*. If a snapshot is taken of a repository, it will have the same contents, so its split reflist will point to the same buckets as the base repository, and only one copy of each bucket is stored in the database. When some packages in the repository change, only the buckets containing those packages will be modified; all the other buckets will remain unchanged, and thus their contents will still be shared. Later on, when these reflists are loaded, each bucket is only loaded once, short-cutting loaded many megabytes of data. In effect, split reflists are essentially copy-on-write, with only the changed buckets stored individually. Changing the disk format means that a migration needs to take place, so that task is moved into the database cleanup step, which will migrate reflists over to split reflists, as well as delete any unused reflist buckets. All the reflist tests are also changed to additionally test out split reflists; although the internal logic is all shared (since buckets are, themselves, just normal reflists), some special additions are needed to have native versions of the various reflist helper methods. In our tests, we've observed the following improvements: - Memory usage during publish and database cleanup, with `GOMEMLIMIT=2GiB`, goes down from ~3.2GiB (larger than the memory limit!) to ~0.7GiB, a decrease of ~4.5x. - Database size decreases from 1.3GB to 367MB. *In my local tests*, publish times had also decreased down to mere seconds but the same effect wasn't observed on the server, with the times staying around the same. My suspicions are that this is due to I/O performance: my local system is an M1 MBP, which almost certainly has much faster disk speeds than our DigitalOcean block volumes. Split reflists include a side effect of requiring more random accesses from reading all the buckets by their keys, so if your random I/O performance is slower, it might cancel out the benefits. That being said, even in that case, the memory usage and database size advantages still persist. Signed-off-by: Ryan Gonzalez --- api/api.go | 2 +- api/db.go | 81 ++- api/metrics.go | 2 +- api/mirror.go | 12 +- api/publish.go | 10 +- api/repos.go | 25 +- api/snapshot.go | 34 +- cmd/cmd.go | 2 +- cmd/db_cleanup.go | 130 +++-- cmd/mirror_create.go | 2 +- cmd/mirror_edit.go | 2 +- cmd/mirror_rename.go | 2 +- cmd/mirror_show.go | 9 +- cmd/mirror_update.go | 8 +- cmd/package_show.go | 6 +- cmd/publish_snapshot.go | 6 +- cmd/publish_switch.go | 4 +- cmd/publish_update.go | 2 +- cmd/repo_add.go | 6 +- cmd/repo_create.go | 4 +- cmd/repo_edit.go | 4 +- cmd/repo_include.go | 2 +- cmd/repo_list.go | 5 +- cmd/repo_move.go | 18 +- cmd/repo_remove.go | 6 +- cmd/repo_rename.go | 2 +- cmd/repo_show.go | 9 +- cmd/snapshot_create.go | 6 +- cmd/snapshot_diff.go | 6 +- cmd/snapshot_filter.go | 4 +- cmd/snapshot_merge.go | 4 +- cmd/snapshot_pull.go | 6 +- cmd/snapshot_rename.go | 2 +- cmd/snapshot_search.go | 8 +- cmd/snapshot_show.go | 13 +- cmd/snapshot_verify.go | 2 +- database/database.go | 2 + database/goleveldb/database.go | 3 + database/goleveldb/storage.go | 11 + deb/changes.go | 9 +- deb/changes_test.go | 12 +- deb/collections.go | 12 + deb/graph.go | 6 +- deb/list.go | 2 +- deb/local.go | 31 +- deb/local_test.go | 34 +- deb/publish.go | 89 +-- deb/publish_bench_test.go | 7 +- deb/publish_test.go | 84 +-- deb/reflist.go | 772 ++++++++++++++++++++++++- deb/reflist_bench_test.go | 38 ++ deb/reflist_test.go | 890 +++++++++++++++++++---------- deb/remote.go | 29 +- deb/remote_test.go | 44 +- deb/snapshot.go | 34 +- deb/snapshot_bench_test.go | 15 +- deb/snapshot_test.go | 50 +- system/t08_db/CleanupDB10Test_gold | 1 + system/t08_db/CleanupDB11Test_gold | 2 + system/t08_db/CleanupDB12Test_gold | 2 + system/t08_db/CleanupDB1Test_gold | 1 + system/t08_db/CleanupDB2Test_gold | 1 + system/t08_db/CleanupDB3Test_gold | 1 + system/t08_db/CleanupDB4Test_gold | 1 + system/t08_db/CleanupDB5Test_gold | 1 + system/t08_db/CleanupDB6Test_gold | 1 + system/t08_db/CleanupDB7Test_gold | 1 + system/t08_db/CleanupDB8Test_gold | 1 + system/t08_db/CleanupDB9Test_gold | 1 + 69 files changed, 1967 insertions(+), 667 deletions(-) diff --git a/api/api.go b/api/api.go index 2e11c022ac..ce07457dc8 100644 --- a/api/api.go +++ b/api/api.go @@ -229,7 +229,7 @@ func maybeRunTaskInBackground(c *gin.Context, name string, resources []string, p // Common piece of code to show list of packages, // with searching & details if requested -func showPackages(c *gin.Context, reflist *deb.PackageRefList, collectionFactory *deb.CollectionFactory) { +func showPackages(c *gin.Context, reflist deb.AnyRefList, collectionFactory *deb.CollectionFactory) { result := []*deb.Package{} list, err := deb.NewPackageListFromRefList(reflist, collectionFactory.PackageCollection(), nil) diff --git a/api/db.go b/api/db.go index 259a94aa46..4756dcc618 100644 --- a/api/db.go +++ b/api/db.go @@ -5,6 +5,7 @@ import ( "sort" "github.com/aptly-dev/aptly/aptly" + "github.com/aptly-dev/aptly/database" "github.com/aptly-dev/aptly/deb" "github.com/aptly-dev/aptly/task" "github.com/aptly-dev/aptly/utils" @@ -28,18 +29,22 @@ func apiDbCleanup(c *gin.Context) { collectionFactory := context.NewCollectionFactory() - // collect information about referenced packages... - existingPackageRefs := deb.NewPackageRefList() + // collect information about referenced packages and their reflist buckets... + existingPackageRefs := deb.NewSplitRefList() + existingBuckets := deb.NewRefListDigestSet() + + reflistMigration := collectionFactory.RefListCollection().NewMigration() out.Printf("Loading mirrors, local repos, snapshots and published repos...") err = collectionFactory.RemoteRepoCollection().ForEach(func(repo *deb.RemoteRepo) error { - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) - } + + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) return nil }) @@ -48,14 +53,14 @@ func apiDbCleanup(c *gin.Context) { } err = collectionFactory.LocalRepoCollection().ForEach(func(repo *deb.LocalRepo) error { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) - } + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) return nil }) @@ -64,12 +69,14 @@ func apiDbCleanup(c *gin.Context) { } err = collectionFactory.SnapshotCollection().ForEach(func(snapshot *deb.Snapshot) error { - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, snapshot.RefKey(), reflistMigration) if e != nil { return e } - existingPackageRefs = existingPackageRefs.Merge(snapshot.RefList(), false, true) + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) return nil }) @@ -81,13 +88,16 @@ func apiDbCleanup(c *gin.Context) { if published.SourceKind != deb.SourceLocalRepo { return nil } - e := collectionFactory.PublishedRepoCollection().LoadComplete(published, collectionFactory) - if e != nil { - return e - } for _, component := range published.Components() { - existingPackageRefs = existingPackageRefs.Merge(published.RefList(component), false, true) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, published.RefKey(component), reflistMigration) + if e != nil { + return e + } + + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) } return nil }) @@ -95,11 +105,20 @@ func apiDbCleanup(c *gin.Context) { return nil, err } + err = reflistMigration.Flush() + if err != nil { + return nil, err + } + if stats := reflistMigration.Stats(); stats.Reflists > 0 { + out.Printf("Split %d reflist(s) into %d bucket(s) (%d segment(s))", + stats.Reflists, stats.Buckets, stats.Segments) + } + // ... and compare it to the list of all packages out.Printf("Loading list of all packages...") allPackageRefs := collectionFactory.PackageCollection().AllPackageRefs() - toDelete := allPackageRefs.Subtract(existingPackageRefs) + toDelete := allPackageRefs.Subtract(existingPackageRefs.Flatten()) // delete packages that are no longer referenced out.Printf("Deleting unreferenced packages (%d)...", toDelete.Len()) @@ -120,6 +139,28 @@ func apiDbCleanup(c *gin.Context) { } } + bucketsToDelete, err := collectionFactory.RefListCollection().AllBucketDigests() + if err != nil { + return nil, err + } + + bucketsToDelete.RemoveAll(existingBuckets) + + out.Printf("Deleting unreferenced reflist buckets (%d)...", bucketsToDelete.Len()) + if bucketsToDelete.Len() > 0 { + batch := db.CreateBatch() + err := bucketsToDelete.ForEach(func(digest []byte) error { + return collectionFactory.RefListCollection().UnsafeDropBucket(digest, batch) + }) + if err != nil { + return nil, err + } + + if err := batch.Write(); err != nil { + return nil, err + } + } + // now, build a list of files that should be present in Repository (package pool) out.Printf("Building list of files referenced by packages...") referencedFiles := make([]string, 0, existingPackageRefs.Len()) diff --git a/api/metrics.go b/api/metrics.go index 94a9dc2525..875c3c196d 100644 --- a/api/metrics.go +++ b/api/metrics.go @@ -102,7 +102,7 @@ func countPackagesByRepos() { components := repo.Components() for _, c := range components { - count := float64(len(repo.RefList(c).Refs)) + count := float64(repo.RefList(c).Len()) apiReposPackageCountGauge.WithLabelValues(fmt.Sprintf("%s", (repo.SourceNames())), repo.Distribution, c).Set(count) } diff --git a/api/mirror.go b/api/mirror.go index df6ea6436b..24e2b3218d 100644 --- a/api/mirror.go +++ b/api/mirror.go @@ -150,7 +150,7 @@ func apiMirrorsCreate(c *gin.Context) { return } - err = collection.Add(repo) + err = collection.Add(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to add mirror: %s", err)) return @@ -229,7 +229,7 @@ func apiMirrorsShow(c *gin.Context) { return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to show: %s", err)) } @@ -260,7 +260,7 @@ func apiMirrorsPackages(c *gin.Context) { return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to show: %s", err)) } @@ -491,12 +491,12 @@ func apiMirrorsUpdate(c *gin.Context) { e := context.ReOpenDatabase() if e == nil { remote.MarkAsIdle() - collection.Update(remote) + collection.Update(remote, collectionFactory.RefListCollection()) } }() remote.MarkAsUpdating() - err = collection.Update(remote) + err = collection.Update(remote, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to update: %s", err) } @@ -654,7 +654,7 @@ func apiMirrorsUpdate(c *gin.Context) { log.Info().Msgf("%s: Finalizing download...", b.Name) remote.FinalizeDownload(collectionFactory, out) - err = collectionFactory.RemoteRepoCollection().Update(remote) + err = collectionFactory.RemoteRepoCollection().Update(remote, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to update: %s", err) } diff --git a/api/publish.go b/api/publish.go index e19afb178e..c41eea8a4b 100644 --- a/api/publish.go +++ b/api/publish.go @@ -297,10 +297,10 @@ func apiPublishRepoOrSnapshot(c *gin.Context) { switch s := source.(type) { case *deb.Snapshot: snapshotCollection := collectionFactory.SnapshotCollection() - err = snapshotCollection.LoadComplete(s) + err = snapshotCollection.LoadComplete(s, collectionFactory.RefListCollection()) case *deb.LocalRepo: localCollection := collectionFactory.LocalRepoCollection() - err = localCollection.LoadComplete(s) + err = localCollection.LoadComplete(s, collectionFactory.RefListCollection()) default: err = fmt.Errorf("unexpected type for source: %T", source) } @@ -352,7 +352,7 @@ func apiPublishRepoOrSnapshot(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to publish: %s", err) } - err = collection.Add(published) + err = collection.Add(published, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save to DB: %s", err) } @@ -469,7 +469,7 @@ func apiPublishUpdateSwitch(c *gin.Context) { resources := []string{string(published.Key())} taskName := fmt.Sprintf("Update published %s repository %s/%s", published.SourceKind, published.StoragePrefix(), published.Distribution) maybeRunTaskInBackground(c, taskName, resources, func(out aptly.Progress, _ *task.Detail) (*task.ProcessReturnValue, error) { - err = collection.LoadComplete(published, collectionFactory) + err = collection.LoadComplete(published, collectionFactory, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("Unable to update: %s", err) } @@ -495,7 +495,7 @@ func apiPublishUpdateSwitch(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("Unable to update: %s", err) } - err = collection.Update(published) + err = collection.Update(published, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save to DB: %s", err) } diff --git a/api/repos.go b/api/repos.go index 4cc04f6956..2f941734df 100644 --- a/api/repos.go +++ b/api/repos.go @@ -155,6 +155,13 @@ func apiReposCreate(c *gin.Context) { return } + collection := collectionFactory.LocalRepoCollection() + err := collection.Add(repo, collectionFactory.RefListCollection()) + if err != nil { + AbortWithJSONError(c, 400, err) + return + } + err := localRepoCollection.Add(repo) if err != nil { AbortWithJSONError(c, http.StatusInternalServerError, err) @@ -218,7 +225,7 @@ func apiReposEdit(c *gin.Context) { repo.DefaultComponent = *b.DefaultComponent } - err = collection.Update(repo) + err = collection.Update(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -326,7 +333,7 @@ func apiReposPackagesShow(c *gin.Context) { return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -360,7 +367,7 @@ func apiReposPackagesAddDelete(c *gin.Context, taskNamePrefix string, cb func(li resources := []string{string(repo.Key())} maybeRunTaskInBackground(c, taskNamePrefix+repo.Name, resources, func(out aptly.Progress, _ *task.Detail) (*task.ProcessReturnValue, error) { - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -389,9 +396,9 @@ func apiReposPackagesAddDelete(c *gin.Context, taskNamePrefix string, cb func(li } } - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save: %s", err) } @@ -517,7 +524,7 @@ func apiReposPackageFromDir(c *gin.Context) { resources := []string{string(repo.Key())} resources = append(resources, sources...) maybeRunTaskInBackground(c, taskName, resources, func(out aptly.Progress, _ *task.Detail) (*task.ProcessReturnValue, error) { - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -552,9 +559,9 @@ func apiReposPackageFromDir(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to import package files: %s", err) } - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save: %s", err) } @@ -867,7 +874,7 @@ func apiReposIncludePackageFromDir(c *gin.Context) { _, failedFiles2, err = deb.ImportChangesFiles( changesFiles, reporter, acceptUnsigned, ignoreSignature, forceReplace, noRemoveFiles, verifier, repoTemplate, context.Progress(), collectionFactory.LocalRepoCollection(), collectionFactory.PackageCollection(), - context.PackagePool(), collectionFactory.ChecksumCollection, nil, query.Parse) + collectionFactory.RefListCollection(), context.PackagePool(), collectionFactory.ChecksumCollection, nil, query.Parse) failedFiles = append(failedFiles, failedFiles2...) if err != nil { diff --git a/api/snapshot.go b/api/snapshot.go index 6082b09a60..4adf255b22 100644 --- a/api/snapshot.go +++ b/api/snapshot.go @@ -93,7 +93,7 @@ func apiSnapshotsCreateFromMirror(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusConflict, Value: nil}, err } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -107,7 +107,7 @@ func apiSnapshotsCreateFromMirror(c *gin.Context) { snapshot.Description = b.Description } - err = snapshotCollection.Add(snapshot) + err = snapshotCollection.Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusBadRequest, Value: nil}, err } @@ -174,7 +174,7 @@ func apiSnapshotsCreate(c *gin.Context) { maybeRunTaskInBackground(c, "Create snapshot "+b.Name, resources, func(_ aptly.Progress, _ *task.Detail) (*task.ProcessReturnValue, error) { for i := range sources { - err = snapshotCollection.LoadComplete(sources[i]) + err = snapshotCollection.LoadComplete(sources[i], collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -197,9 +197,9 @@ func apiSnapshotsCreate(c *gin.Context) { } } - snapshot = deb.NewSnapshotFromRefList(b.Name, sources, deb.NewPackageRefListFromPackageList(list), b.Description) + snapshot = deb.NewSnapshotFromRefList(b.Name, sources, deb.NewSplitRefListFromPackageList(list), b.Description) - err = snapshotCollection.Add(snapshot) + err = snapshotCollection.Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusBadRequest, Value: nil}, err } @@ -254,8 +254,8 @@ func apiSnapshotsCreateFromRepository(c *gin.Context) { // including snapshot resource key resources := []string{string(repo.Key()), "S" + b.Name} taskName := fmt.Sprintf("Create snapshot of repo %s", name) - maybeRunTaskInBackground(c, taskName, resources, func(_ aptly.Progress, _ *task.Detail) (*task.ProcessReturnValue, error) { - err := collection.LoadComplete(repo) + maybeRunTaskInBackground(c, taskName, resources, func(out aptly.Progress, detail *task.Detail) (*task.ProcessReturnValue, error) { + err := collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -269,7 +269,7 @@ func apiSnapshotsCreateFromRepository(c *gin.Context) { snapshot.Description = b.Description } - err = snapshotCollection.Add(snapshot) + err = snapshotCollection.Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusBadRequest, Value: nil}, err } @@ -333,7 +333,7 @@ func apiSnapshotsUpdate(c *gin.Context) { snapshot.Description = b.Description } - err = collectionFactory.SnapshotCollection().Update(snapshot) + err = collectionFactory.SnapshotCollection().Update(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -360,7 +360,7 @@ func apiSnapshotsShow(c *gin.Context) { return } - err = collection.LoadComplete(snapshot) + err = collection.LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -452,20 +452,20 @@ func apiSnapshotsDiff(c *gin.Context) { return } - err = collection.LoadComplete(snapshotA) + err = collection.LoadComplete(snapshotA, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return } - err = collection.LoadComplete(snapshotB) + err = collection.LoadComplete(snapshotB, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return } // Calculate diff - diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection()) + diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection(), nil) if err != nil { AbortWithJSONError(c, 500, err) return @@ -508,7 +508,7 @@ func apiSnapshotsSearchPackages(c *gin.Context) { return } - err = collection.LoadComplete(snapshot) + err = collection.LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -584,13 +584,13 @@ func apiSnapshotsMerge(c *gin.Context) { } maybeRunTaskInBackground(c, "Merge snapshot "+name, resources, func(_ aptly.Progress, _ *task.Detail) (*task.ProcessReturnValue, error) { - err = snapshotCollection.LoadComplete(sources[0]) + err = snapshotCollection.LoadComplete(sources[0], collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } result := sources[0].RefList() for i := 1; i < len(sources); i++ { - err = snapshotCollection.LoadComplete(sources[i]) + err = snapshotCollection.LoadComplete(sources[i], collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -609,7 +609,7 @@ func apiSnapshotsMerge(c *gin.Context) { snapshot = deb.NewSnapshotFromRefList(name, sources, result, fmt.Sprintf("Merged from sources: %s", strings.Join(sourceDescription, ", "))) - err = collectionFactory.SnapshotCollection().Add(snapshot) + err = collectionFactory.SnapshotCollection().Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/cmd.go b/cmd/cmd.go index d5bdff25d8..8c112d9df3 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -21,7 +21,7 @@ const ( ) // ListPackagesRefList shows list of packages in PackageRefList -func ListPackagesRefList(reflist *deb.PackageRefList, collectionFactory *deb.CollectionFactory) (err error) { +func ListPackagesRefList(reflist deb.AnyRefList, collectionFactory *deb.CollectionFactory) (err error) { fmt.Printf("Packages:\n") if reflist == nil { diff --git a/cmd/db_cleanup.go b/cmd/db_cleanup.go index 66fece677a..ec19068591 100644 --- a/cmd/db_cleanup.go +++ b/cmd/db_cleanup.go @@ -6,6 +6,7 @@ import ( "strings" "github.com/aptly-dev/aptly/aptly" + "github.com/aptly-dev/aptly/database" "github.com/aptly-dev/aptly/deb" "github.com/aptly-dev/aptly/utils" "github.com/smira/commander" @@ -24,12 +25,20 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { dryRun := context.Flags().Lookup("dry-run").Value.Get().(bool) collectionFactory := context.NewCollectionFactory() - // collect information about references packages... - existingPackageRefs := deb.NewPackageRefList() + // collect information about references packages and their reflistbuckets... + existingPackageRefs := deb.NewSplitRefList() + existingBuckets := deb.NewRefListDigestSet() // used only in verbose mode to report package use source packageRefSources := map[string][]string{} + var reflistMigration *deb.RefListMigration + if !dryRun { + reflistMigration = collectionFactory.RefListCollection().NewMigration() + } else { + reflistMigration = collectionFactory.RefListCollection().NewMigrationDryRun() + } + context.Progress().ColoredPrintf("@{w!}Loading mirrors, local repos, snapshots and published repos...@|") if verbose { context.Progress().ColoredPrintf("@{y}Loading mirrors:@|") @@ -39,20 +48,21 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { context.Progress().ColoredPrintf("- @{g}%s@|", repo.Name) } - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) - if verbose { - description := fmt.Sprintf("mirror %s", repo.Name) - repo.RefList().ForEach(func(key []byte) error { - packageRefSources[string(key)] = append(packageRefSources[string(key)], description) - return nil - }) - } + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) + + if verbose { + description := fmt.Sprintf("mirror %s", repo.Name) + sl.ForEach(func(key []byte) error { + packageRefSources[string(key)] = append(packageRefSources[string(key)], description) + return nil + }) } return nil @@ -71,21 +81,23 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { context.Progress().ColoredPrintf("- @{g}%s@|", repo.Name) } - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) - if verbose { - description := fmt.Sprintf("local repo %s", repo.Name) - repo.RefList().ForEach(func(key []byte) error { - packageRefSources[string(key)] = append(packageRefSources[string(key)], description) - return nil - }) - } + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + + if verbose { + description := fmt.Sprintf("local repo %s", repo.Name) + sl.ForEach(func(key []byte) error { + packageRefSources[string(key)] = append(packageRefSources[string(key)], description) + return nil + }) } return nil @@ -104,16 +116,18 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { context.Progress().ColoredPrintf("- @{g}%s@|", snapshot.Name) } - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, snapshot.RefKey(), reflistMigration) if e != nil { return e } - existingPackageRefs = existingPackageRefs.Merge(snapshot.RefList(), false, true) + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) if verbose { description := fmt.Sprintf("snapshot %s", snapshot.Name) - snapshot.RefList().ForEach(func(key []byte) error { + sl.ForEach(func(key []byte) error { packageRefSources[string(key)] = append(packageRefSources[string(key)], description) return nil }) @@ -136,17 +150,21 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { if published.SourceKind != deb.SourceLocalRepo { return nil } - e := collectionFactory.PublishedRepoCollection().LoadComplete(published, collectionFactory) - if e != nil { - return e - } for _, component := range published.Components() { - existingPackageRefs = existingPackageRefs.Merge(published.RefList(component), false, true) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, published.RefKey(component), reflistMigration) + if e != nil { + return e + } + + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) + if verbose { description := fmt.Sprintf("published repository %s:%s/%s component %s", published.Storage, published.Prefix, published.Distribution, component) - published.RefList(component).ForEach(func(key []byte) error { + sl.ForEach(func(key []byte) error { packageRefSources[string(key)] = append(packageRefSources[string(key)], description) return nil }) @@ -160,11 +178,29 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { collectionFactory.Flush() + err = reflistMigration.Flush() + if err != nil { + return err + } + + if verbose { + if stats := reflistMigration.Stats(); stats.Reflists > 0 { + if !dryRun { + context.Progress().ColoredPrintf("@{w!}Split %d reflist(s) into %d bucket(s) (%d segment(s))@|", + stats.Reflists, stats.Buckets, stats.Segments) + } else { + context.Progress().ColoredPrintf( + "@{y!}Skipped splitting %d reflist(s) into %d bucket(s) (%d segment(s)), as -dry-run has been requested.@|", + stats.Reflists, stats.Buckets, stats.Segments) + } + } + } + // ... and compare it to the list of all packages context.Progress().ColoredPrintf("@{w!}Loading list of all packages...@|") allPackageRefs := collectionFactory.PackageCollection().AllPackageRefs() - toDelete := allPackageRefs.Subtract(existingPackageRefs) + toDelete := allPackageRefs.Subtract(existingPackageRefs.Flatten()) // delete packages that are no longer referenced context.Progress().ColoredPrintf("@{r!}Deleting unreferenced packages (%d)...@|", toDelete.Len()) @@ -202,6 +238,32 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { } } + bucketsToDelete, err := collectionFactory.RefListCollection().AllBucketDigests() + if err != nil { + return err + } + + bucketsToDelete.RemoveAll(existingBuckets) + + context.Progress().ColoredPrintf("@{r!}Deleting unreferenced reflist buckets (%d)...@|", bucketsToDelete.Len()) + if bucketsToDelete.Len() > 0 { + if !dryRun { + batch := db.CreateBatch() + err := bucketsToDelete.ForEach(func(digest []byte) error { + return collectionFactory.RefListCollection().UnsafeDropBucket(digest, batch) + }) + if err != nil { + return err + } + + if err := batch.Write(); err != nil { + return err + } + } else { + context.Progress().ColoredPrintf("@{y!}Skipped reflist deletion, as -dry-run has been requested.@|") + } + } + collectionFactory.Flush() // now, build a list of files that should be present in Repository (package pool) diff --git a/cmd/mirror_create.go b/cmd/mirror_create.go index 049dd2ebb4..2839579524 100644 --- a/cmd/mirror_create.go +++ b/cmd/mirror_create.go @@ -69,7 +69,7 @@ func aptlyMirrorCreate(cmd *commander.Command, args []string) error { } collectionFactory := context.NewCollectionFactory() - err = collectionFactory.RemoteRepoCollection().Add(repo) + err = collectionFactory.RemoteRepoCollection().Add(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add mirror: %s", err) } diff --git a/cmd/mirror_edit.go b/cmd/mirror_edit.go index f95d97368b..e795257e36 100644 --- a/cmd/mirror_edit.go +++ b/cmd/mirror_edit.go @@ -78,7 +78,7 @@ func aptlyMirrorEdit(cmd *commander.Command, args []string) error { } } - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to edit: %s", err) } diff --git a/cmd/mirror_rename.go b/cmd/mirror_rename.go index 2ff9f92041..ff453b857f 100644 --- a/cmd/mirror_rename.go +++ b/cmd/mirror_rename.go @@ -37,7 +37,7 @@ func aptlyMirrorRename(cmd *commander.Command, args []string) error { } repo.Name = newName - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to rename: %s", err) } diff --git a/cmd/mirror_show.go b/cmd/mirror_show.go index 03179161a3..3c52d6e37d 100644 --- a/cmd/mirror_show.go +++ b/cmd/mirror_show.go @@ -38,7 +38,7 @@ func aptlyMirrorShowTxt(_ *commander.Command, args []string) error { return fmt.Errorf("unable to show: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -98,12 +98,13 @@ func aptlyMirrorShowJSON(_ *commander.Command, args []string) error { name := args[0] - repo, err := context.NewCollectionFactory().RemoteRepoCollection().ByName(name) + collectionFactory := context.NewCollectionFactory() + repo, err := collectionFactory.RemoteRepoCollection().ByName(name) if err != nil { return fmt.Errorf("unable to show: %s", err) } - err = context.NewCollectionFactory().RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -113,7 +114,7 @@ func aptlyMirrorShowJSON(_ *commander.Command, args []string) error { if withPackages { if repo.RefList() != nil { var list *deb.PackageList - list, err = deb.NewPackageListFromRefList(repo.RefList(), context.NewCollectionFactory().PackageCollection(), context.Progress()) + list, err = deb.NewPackageListFromRefList(repo.RefList(), collectionFactory.PackageCollection(), context.Progress()) if err != nil { return fmt.Errorf("unable to get package list: %s", err) } diff --git a/cmd/mirror_update.go b/cmd/mirror_update.go index 2aad8c533c..e8cac81662 100644 --- a/cmd/mirror_update.go +++ b/cmd/mirror_update.go @@ -29,7 +29,7 @@ func aptlyMirrorUpdate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to update: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to update: %s", err) } @@ -101,12 +101,12 @@ func aptlyMirrorUpdate(cmd *commander.Command, args []string) error { err = context.ReOpenDatabase() if err == nil { repo.MarkAsIdle() - collectionFactory.RemoteRepoCollection().Update(repo) + collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) } }() repo.MarkAsUpdating() - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to update: %s", err) } @@ -262,7 +262,7 @@ func aptlyMirrorUpdate(cmd *commander.Command, args []string) error { } repo.FinalizeDownload(collectionFactory, context.Progress()) - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to update: %s", err) } diff --git a/cmd/package_show.go b/cmd/package_show.go index 1715b52a4a..6bf6187db1 100644 --- a/cmd/package_show.go +++ b/cmd/package_show.go @@ -14,7 +14,7 @@ import ( func printReferencesTo(p *deb.Package, collectionFactory *deb.CollectionFactory) (err error) { err = collectionFactory.RemoteRepoCollection().ForEach(func(repo *deb.RemoteRepo) error { - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) + e := collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -30,7 +30,7 @@ func printReferencesTo(p *deb.Package, collectionFactory *deb.CollectionFactory) } err = collectionFactory.LocalRepoCollection().ForEach(func(repo *deb.LocalRepo) error { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -46,7 +46,7 @@ func printReferencesTo(p *deb.Package, collectionFactory *deb.CollectionFactory) } err = collectionFactory.SnapshotCollection().ForEach(func(snapshot *deb.Snapshot) error { - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + e := collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if e != nil { return e } diff --git a/cmd/publish_snapshot.go b/cmd/publish_snapshot.go index eac4c3b34d..1116ed78a8 100644 --- a/cmd/publish_snapshot.go +++ b/cmd/publish_snapshot.go @@ -49,7 +49,7 @@ func aptlyPublishSnapshotOrRepo(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to publish: %s", err) } @@ -85,7 +85,7 @@ func aptlyPublishSnapshotOrRepo(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(localRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(localRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to publish: %s", err) } @@ -175,7 +175,7 @@ func aptlyPublishSnapshotOrRepo(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.PublishedRepoCollection().Add(published) + err = collectionFactory.PublishedRepoCollection().Add(published, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save to DB: %s", err) } diff --git a/cmd/publish_switch.go b/cmd/publish_switch.go index f39269a16d..7d0bbf8201 100644 --- a/cmd/publish_switch.go +++ b/cmd/publish_switch.go @@ -72,7 +72,7 @@ func aptlyPublishSwitch(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to switch: %s", err) } - err = snapshotCollection.LoadComplete(snapshot) + err = snapshotCollection.LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to switch: %s", err) } @@ -108,7 +108,7 @@ func aptlyPublishSwitch(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.PublishedRepoCollection().Update(published) + err = collectionFactory.PublishedRepoCollection().Update(published, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save to DB: %s", err) } diff --git a/cmd/publish_update.go b/cmd/publish_update.go index 6ea638d41f..2c20023e97 100644 --- a/cmd/publish_update.go +++ b/cmd/publish_update.go @@ -69,7 +69,7 @@ func aptlyPublishUpdate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.PublishedRepoCollection().Update(published) + err = collectionFactory.PublishedRepoCollection().Update(published, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save to DB: %s", err) } diff --git a/cmd/repo_add.go b/cmd/repo_add.go index e7ccfac702..55f260dd5f 100644 --- a/cmd/repo_add.go +++ b/cmd/repo_add.go @@ -28,7 +28,7 @@ func aptlyRepoAdd(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to add: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add: %s", err) } @@ -58,9 +58,9 @@ func aptlyRepoAdd(cmd *commander.Command, args []string) error { processedFiles = append(processedFiles, otherFiles...) - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } diff --git a/cmd/repo_create.go b/cmd/repo_create.go index 5fef46d922..0e3a1e52b2 100644 --- a/cmd/repo_create.go +++ b/cmd/repo_create.go @@ -36,7 +36,7 @@ func aptlyRepoCreate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load source snapshot: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load source snapshot: %s", err) } @@ -44,7 +44,7 @@ func aptlyRepoCreate(cmd *commander.Command, args []string) error { repo.UpdateRefList(snapshot.RefList()) } - err = collectionFactory.LocalRepoCollection().Add(repo) + err = collectionFactory.LocalRepoCollection().Add(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add local repo: %s", err) } diff --git a/cmd/repo_edit.go b/cmd/repo_edit.go index bc81dc4a12..c7fbc41964 100644 --- a/cmd/repo_edit.go +++ b/cmd/repo_edit.go @@ -22,7 +22,7 @@ func aptlyRepoEdit(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to edit: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to edit: %s", err) } @@ -53,7 +53,7 @@ func aptlyRepoEdit(cmd *commander.Command, args []string) error { } } - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to edit: %s", err) } diff --git a/cmd/repo_include.go b/cmd/repo_include.go index 4aa26d419e..3b176495e0 100644 --- a/cmd/repo_include.go +++ b/cmd/repo_include.go @@ -67,7 +67,7 @@ func aptlyRepoInclude(cmd *commander.Command, args []string) error { _, failedFiles2, err = deb.ImportChangesFiles( changesFiles, reporter, acceptUnsigned, ignoreSignatures, forceReplace, noRemoveFiles, verifier, repoTemplate, context.Progress(), collectionFactory.LocalRepoCollection(), collectionFactory.PackageCollection(), - context.PackagePool(), collectionFactory.ChecksumCollection, + collectionFactory.RefListCollection(), context.PackagePool(), collectionFactory.ChecksumCollection, uploaders, query.Parse) failedFiles = append(failedFiles, failedFiles2...) diff --git a/cmd/repo_list.go b/cmd/repo_list.go index 9c4b0d47eb..f3ca4a8bae 100644 --- a/cmd/repo_list.go +++ b/cmd/repo_list.go @@ -36,7 +36,7 @@ func aptlyRepoListTxt(cmd *commander.Command, _ []string) error { if raw { repos[i] = repo.Name } else { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -77,7 +77,8 @@ func aptlyRepoListJSON(_ *commander.Command, _ []string) error { repos := make([]*deb.LocalRepo, context.NewCollectionFactory().LocalRepoCollection().Len()) i := 0 context.NewCollectionFactory().LocalRepoCollection().ForEach(func(repo *deb.LocalRepo) error { - e := context.NewCollectionFactory().LocalRepoCollection().LoadComplete(repo) + collectionFactory := context.NewCollectionFactory() + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } diff --git a/cmd/repo_move.go b/cmd/repo_move.go index bd1447ce47..732e4c95f9 100644 --- a/cmd/repo_move.go +++ b/cmd/repo_move.go @@ -25,13 +25,13 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to %s: %s", command, err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(dstRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(dstRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to %s: %s", command, err) } var ( - srcRefList *deb.PackageRefList + srcRefList *deb.SplitRefList srcRepo *deb.LocalRepo ) @@ -45,7 +45,7 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to %s: source and destination are the same", command) } - err = collectionFactory.LocalRepoCollection().LoadComplete(srcRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(srcRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to %s: %s", command, err) } @@ -59,12 +59,12 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to %s: %s", command, err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(srcRemoteRepo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(srcRemoteRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to %s: %s", command, err) } - if srcRemoteRepo.RefList() == nil { + if srcRemoteRepo.RefList().Len() == 0 { return fmt.Errorf("unable to %s: mirror not updated", command) } @@ -161,17 +161,17 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { if context.Flags().Lookup("dry-run").Value.Get().(bool) { context.Progress().Printf("\nChanges not saved, as dry run has been requested.\n") } else { - dstRepo.UpdateRefList(deb.NewPackageRefListFromPackageList(dstList)) + dstRepo.UpdateRefList(deb.NewSplitRefListFromPackageList(dstList)) - err = collectionFactory.LocalRepoCollection().Update(dstRepo) + err = collectionFactory.LocalRepoCollection().Update(dstRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } if command == "move" { // nolint: goconst - srcRepo.UpdateRefList(deb.NewPackageRefListFromPackageList(srcList)) + srcRepo.UpdateRefList(deb.NewSplitRefListFromPackageList(srcList)) - err = collectionFactory.LocalRepoCollection().Update(srcRepo) + err = collectionFactory.LocalRepoCollection().Update(srcRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } diff --git a/cmd/repo_remove.go b/cmd/repo_remove.go index 5341a4c3ed..f169fd439d 100644 --- a/cmd/repo_remove.go +++ b/cmd/repo_remove.go @@ -24,7 +24,7 @@ func aptlyRepoRemove(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to remove: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to remove: %s", err) } @@ -63,9 +63,9 @@ func aptlyRepoRemove(cmd *commander.Command, args []string) error { if context.Flags().Lookup("dry-run").Value.Get().(bool) { context.Progress().Printf("\nChanges not saved, as dry run has been requested.\n") } else { - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } diff --git a/cmd/repo_rename.go b/cmd/repo_rename.go index 9234b7c722..459afcbbb3 100644 --- a/cmd/repo_rename.go +++ b/cmd/repo_rename.go @@ -32,7 +32,7 @@ func aptlyRepoRename(cmd *commander.Command, args []string) error { } repo.Name = newName - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to rename: %s", err) } diff --git a/cmd/repo_show.go b/cmd/repo_show.go index a61a5f1f92..741915d123 100644 --- a/cmd/repo_show.go +++ b/cmd/repo_show.go @@ -36,7 +36,7 @@ func aptlyRepoShowTxt(_ *commander.Command, args []string) error { return fmt.Errorf("unable to show: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -63,12 +63,13 @@ func aptlyRepoShowJSON(_ *commander.Command, args []string) error { name := args[0] - repo, err := context.NewCollectionFactory().LocalRepoCollection().ByName(name) + collectionFactory := context.NewCollectionFactory() + repo, err := collectionFactory.LocalRepoCollection().ByName(name) if err != nil { return fmt.Errorf("unable to show: %s", err) } - err = context.NewCollectionFactory().LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -79,7 +80,7 @@ func aptlyRepoShowJSON(_ *commander.Command, args []string) error { if withPackages { if repo.RefList() != nil { var list *deb.PackageList - list, err = deb.NewPackageListFromRefList(repo.RefList(), context.NewCollectionFactory().PackageCollection(), context.Progress()) + list, err = deb.NewPackageListFromRefList(repo.RefList(), collectionFactory.PackageCollection(), context.Progress()) if err == nil { packageList = list.FullNames() } diff --git a/cmd/snapshot_create.go b/cmd/snapshot_create.go index 74e3c96678..19c48c62e2 100644 --- a/cmd/snapshot_create.go +++ b/cmd/snapshot_create.go @@ -30,7 +30,7 @@ func aptlySnapshotCreate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to create snapshot: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } @@ -50,7 +50,7 @@ func aptlySnapshotCreate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to create snapshot: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } @@ -71,7 +71,7 @@ func aptlySnapshotCreate(cmd *commander.Command, args []string) error { return commander.ErrCommandError } - err = collectionFactory.SnapshotCollection().Add(snapshot) + err = collectionFactory.SnapshotCollection().Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add snapshot: %s", err) } diff --git a/cmd/snapshot_diff.go b/cmd/snapshot_diff.go index ccbea32ee0..19da7fcc99 100644 --- a/cmd/snapshot_diff.go +++ b/cmd/snapshot_diff.go @@ -23,7 +23,7 @@ func aptlySnapshotDiff(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load snapshot A: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshotA) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshotA, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load snapshot A: %s", err) } @@ -34,13 +34,13 @@ func aptlySnapshotDiff(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load snapshot B: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshotB) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshotB, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load snapshot B: %s", err) } // Calculate diff - diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection()) + diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection(), nil) if err != nil { return fmt.Errorf("unable to calculate diff: %s", err) } diff --git a/cmd/snapshot_filter.go b/cmd/snapshot_filter.go index d4e71dc461..0276fab1aa 100644 --- a/cmd/snapshot_filter.go +++ b/cmd/snapshot_filter.go @@ -27,7 +27,7 @@ func aptlySnapshotFilter(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to filter: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(source) + err = collectionFactory.SnapshotCollection().LoadComplete(source, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to filter: %s", err) } @@ -87,7 +87,7 @@ func aptlySnapshotFilter(cmd *commander.Command, args []string) error { destination := deb.NewSnapshotFromPackageList(args[1], []*deb.Snapshot{source}, result, fmt.Sprintf("Filtered '%s', query was: '%s'", source.Name, strings.Join(args[2:], " "))) - err = collectionFactory.SnapshotCollection().Add(destination) + err = collectionFactory.SnapshotCollection().Add(destination, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/snapshot_merge.go b/cmd/snapshot_merge.go index 0a319a5ab2..e9eb0c7793 100644 --- a/cmd/snapshot_merge.go +++ b/cmd/snapshot_merge.go @@ -24,7 +24,7 @@ func aptlySnapshotMerge(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load snapshot: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(sources[i]) + err = collectionFactory.SnapshotCollection().LoadComplete(sources[i], collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load snapshot: %s", err) } @@ -57,7 +57,7 @@ func aptlySnapshotMerge(cmd *commander.Command, args []string) error { destination := deb.NewSnapshotFromRefList(args[0], sources, result, fmt.Sprintf("Merged from sources: %s", strings.Join(sourceDescription, ", "))) - err = collectionFactory.SnapshotCollection().Add(destination) + err = collectionFactory.SnapshotCollection().Add(destination, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/snapshot_pull.go b/cmd/snapshot_pull.go index f73afab74d..6d2066741c 100644 --- a/cmd/snapshot_pull.go +++ b/cmd/snapshot_pull.go @@ -29,7 +29,7 @@ func aptlySnapshotPull(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to pull: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to pull: %s", err) } @@ -40,7 +40,7 @@ func aptlySnapshotPull(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to pull: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(source) + err = collectionFactory.SnapshotCollection().LoadComplete(source, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to pull: %s", err) } @@ -149,7 +149,7 @@ func aptlySnapshotPull(cmd *commander.Command, args []string) error { destination := deb.NewSnapshotFromPackageList(args[2], []*deb.Snapshot{snapshot, source}, packageList, fmt.Sprintf("Pulled into '%s' with '%s' as source, pull request was: '%s'", snapshot.Name, source.Name, strings.Join(args[3:], " "))) - err = collectionFactory.SnapshotCollection().Add(destination) + err = collectionFactory.SnapshotCollection().Add(destination, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/snapshot_rename.go b/cmd/snapshot_rename.go index b8ac74cf33..b13b7dca6b 100644 --- a/cmd/snapshot_rename.go +++ b/cmd/snapshot_rename.go @@ -32,7 +32,7 @@ func aptlySnapshotRename(cmd *commander.Command, args []string) error { } snapshot.Name = newName - err = collectionFactory.SnapshotCollection().Update(snapshot) + err = collectionFactory.SnapshotCollection().Update(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to rename: %s", err) } diff --git a/cmd/snapshot_search.go b/cmd/snapshot_search.go index 24da005d9e..05573ee0f4 100644 --- a/cmd/snapshot_search.go +++ b/cmd/snapshot_search.go @@ -25,7 +25,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error command := cmd.Parent.Name() collectionFactory := context.NewCollectionFactory() - var reflist *deb.PackageRefList + var reflist *deb.SplitRefList if command == "snapshot" { // nolint: goconst var snapshot *deb.Snapshot @@ -34,7 +34,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error return fmt.Errorf("unable to search: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to search: %s", err) } @@ -47,7 +47,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error return fmt.Errorf("unable to search: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to search: %s", err) } @@ -60,7 +60,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error return fmt.Errorf("unable to search: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to search: %s", err) } diff --git a/cmd/snapshot_show.go b/cmd/snapshot_show.go index e03a49e586..582b6a9e37 100644 --- a/cmd/snapshot_show.go +++ b/cmd/snapshot_show.go @@ -35,7 +35,7 @@ func aptlySnapshotShowTxt(_ *commander.Command, args []string) error { return fmt.Errorf("unable to show: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -86,16 +86,17 @@ func aptlySnapshotShowTxt(_ *commander.Command, args []string) error { } func aptlySnapshotShowJSON(_ *commander.Command, args []string) error { + collectionFactory := context.NewCollectionFactory() var err error name := args[0] - snapshot, err := context.NewCollectionFactory().SnapshotCollection().ByName(name) + snapshot, err := collectionFactory.SnapshotCollection().ByName(name) if err != nil { return fmt.Errorf("unable to show: %s", err) } - err = context.NewCollectionFactory().SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -105,14 +106,14 @@ func aptlySnapshotShowJSON(_ *commander.Command, args []string) error { for _, sourceID := range snapshot.SourceIDs { if snapshot.SourceKind == deb.SourceSnapshot { var source *deb.Snapshot - source, err = context.NewCollectionFactory().SnapshotCollection().ByUUID(sourceID) + source, err = collectionFactory.SnapshotCollection().ByUUID(sourceID) if err != nil { continue } snapshot.Snapshots = append(snapshot.Snapshots, source) } else if snapshot.SourceKind == deb.SourceLocalRepo { var source *deb.LocalRepo - source, err = context.NewCollectionFactory().LocalRepoCollection().ByUUID(sourceID) + source, err = collectionFactory.LocalRepoCollection().ByUUID(sourceID) if err != nil { continue } @@ -133,7 +134,7 @@ func aptlySnapshotShowJSON(_ *commander.Command, args []string) error { if withPackages { if snapshot.RefList() != nil { var list *deb.PackageList - list, err = deb.NewPackageListFromRefList(snapshot.RefList(), context.NewCollectionFactory().PackageCollection(), context.Progress()) + list, err = deb.NewPackageListFromRefList(snapshot.RefList(), collectionFactory.PackageCollection(), context.Progress()) if err != nil { return fmt.Errorf("unable to get package list: %s", err) } diff --git a/cmd/snapshot_verify.go b/cmd/snapshot_verify.go index f815f29ce6..fc566aae39 100644 --- a/cmd/snapshot_verify.go +++ b/cmd/snapshot_verify.go @@ -23,7 +23,7 @@ func aptlySnapshotVerify(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to verify: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshots[i]) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshots[i], collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to verify: %s", err) } diff --git a/database/database.go b/database/database.go index 709a1aa803..03d896b201 100644 --- a/database/database.go +++ b/database/database.go @@ -48,6 +48,8 @@ type Storage interface { CreateTemporary() (Storage, error) + GetRecommendedMaxKVSize() int + Open() error Close() error CompactDB() error diff --git a/database/goleveldb/database.go b/database/goleveldb/database.go index a2874a6e61..011681a6bf 100644 --- a/database/goleveldb/database.go +++ b/database/goleveldb/database.go @@ -9,10 +9,13 @@ import ( "github.com/aptly-dev/aptly/database" ) +const blockSize = 4 * 1024 + func internalOpen(path string, throttleCompaction bool) (*leveldb.DB, error) { o := &opt.Options{ Filter: filter.NewBloomFilter(10), OpenFilesCacheCapacity: 256, + BlockSize: blockSize, } if throttleCompaction { diff --git a/database/goleveldb/storage.go b/database/goleveldb/storage.go index 37acf3d830..1281f3fbc6 100644 --- a/database/goleveldb/storage.go +++ b/database/goleveldb/storage.go @@ -16,6 +16,17 @@ type storage struct { db *leveldb.DB } +func (s *storage) GetRecommendedMaxKVSize() int { + // The block size configured is not actually a *set* block size, but rather a + // *minimum*. LevelDB only checks if a block is full after a new key/value pair is + // written, meaning that blocks will tend to overflow a bit. + // Therefore, using the default block size as the max value size will ensure + // that a new block will only contain a single value and that the size will + // only ever be as large as around double the block size (if the block was + // nearly full before the new items were added). + return blockSize +} + // CreateTemporary creates new DB of the same type in temp dir func (s *storage) CreateTemporary() (database.Storage, error) { tempdir, err := os.MkdirTemp("", "aptly") diff --git a/deb/changes.go b/deb/changes.go index c264986abb..6c8bf8812f 100644 --- a/deb/changes.go +++ b/deb/changes.go @@ -291,7 +291,8 @@ func CollectChangesFiles(locations []string, reporter aptly.ResultReporter) (cha // ImportChangesFiles imports referenced files in changes files into local repository func ImportChangesFiles(changesFiles []string, reporter aptly.ResultReporter, acceptUnsigned, ignoreSignatures, forceReplace, noRemoveFiles bool, verifier pgp.Verifier, repoTemplate *template.Template, progress aptly.Progress, localRepoCollection *LocalRepoCollection, packageCollection *PackageCollection, - pool aptly.PackagePool, checksumStorageProvider aptly.ChecksumStorageProvider, uploaders *Uploaders, parseQuery parseQuery) (processedFiles []string, failedFiles []string, err error) { + reflistCollection *RefListCollection, pool aptly.PackagePool, checksumStorageProvider aptly.ChecksumStorageProvider, uploaders *Uploaders, + parseQuery parseQuery) (processedFiles []string, failedFiles []string, err error) { for _, path := range changesFiles { var changes *Changes @@ -359,7 +360,7 @@ func ImportChangesFiles(changesFiles []string, reporter aptly.ResultReporter, ac } } - err = localRepoCollection.LoadComplete(repo) + err = localRepoCollection.LoadComplete(repo, reflistCollection) if err != nil { return nil, nil, fmt.Errorf("unable to load repo: %s", err) } @@ -382,9 +383,9 @@ func ImportChangesFiles(changesFiles []string, reporter aptly.ResultReporter, ac return nil, nil, fmt.Errorf("unable to import package files: %s", err) } - repo.UpdateRefList(NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(NewSplitRefListFromPackageList(list)) - err = localRepoCollection.Update(repo) + err = localRepoCollection.Update(repo, reflistCollection) if err != nil { return nil, nil, fmt.Errorf("unable to save: %s", err) } diff --git a/deb/changes_test.go b/deb/changes_test.go index b7dc4d95d0..1d50d610a5 100644 --- a/deb/changes_test.go +++ b/deb/changes_test.go @@ -21,6 +21,7 @@ type ChangesSuite struct { db database.Storage localRepoCollection *LocalRepoCollection packageCollection *PackageCollection + reflistCollection *RefListCollection packagePool aptly.PackagePool checksumStorage aptly.ChecksumStorage progress aptly.Progress @@ -42,6 +43,7 @@ func (s *ChangesSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.localRepoCollection = NewLocalRepoCollection(s.db) s.packageCollection = NewPackageCollection(s.db) + s.reflistCollection = NewRefListCollection(s.db) s.checksumStorage = files.NewMockChecksumStorage() s.packagePool = files.NewPackagePool(s.Dir, false) @@ -88,7 +90,7 @@ func (s *ChangesSuite) TestCollectChangesFiles(c *C) { func (s *ChangesSuite) TestImportChangesFiles(c *C) { repo := NewLocalRepo("test", "Test Comment") - c.Assert(s.localRepoCollection.Add(repo), IsNil) + c.Assert(s.localRepoCollection.Add(repo, s.reflistCollection), IsNil) origFailedFiles := []string{ "testdata/changes/calamares.changes", @@ -124,7 +126,8 @@ func (s *ChangesSuite) TestImportChangesFiles(c *C) { processedFiles, failedFiles, err := ImportChangesFiles( append(changesFiles, "testdata/changes/notexistent.changes"), s.Reporter, true, true, false, false, &NullVerifier{}, - template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.packagePool, func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, + template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.reflistCollection, s.packagePool, + func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, nil, nil) c.Assert(err, IsNil) c.Check(failedFiles, DeepEquals, append(expectedFailedFiles, "testdata/changes/notexistent.changes")) @@ -133,7 +136,7 @@ func (s *ChangesSuite) TestImportChangesFiles(c *C) { func (s *ChangesSuite) TestImportDbgsymWithVersionedSourceField(c *C) { repo := NewLocalRepo("test", "Test Comment") - c.Assert(s.localRepoCollection.Add(repo), IsNil) + c.Assert(s.localRepoCollection.Add(repo, s.reflistCollection), IsNil) changesFiles, failedFiles := CollectChangesFiles( []string{"testdata/dbgsym-with-source-version"}, s.Reporter) @@ -142,7 +145,8 @@ func (s *ChangesSuite) TestImportDbgsymWithVersionedSourceField(c *C) { _, failedFiles, err := ImportChangesFiles( changesFiles, s.Reporter, true, true, false, true, &NullVerifier{}, - template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.packagePool, func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, + template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.reflistCollection, s.packagePool, + func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, nil, nil) c.Assert(err, IsNil) c.Check(failedFiles, IsNil) diff --git a/deb/collections.go b/deb/collections.go index 7dfe852350..ff711e2650 100644 --- a/deb/collections.go +++ b/deb/collections.go @@ -16,6 +16,7 @@ type CollectionFactory struct { snapshots *SnapshotCollection localRepos *LocalRepoCollection publishedRepos *PublishedRepoCollection + reflists *RefListCollection checksums *ChecksumCollection } @@ -91,6 +92,17 @@ func (factory *CollectionFactory) PublishedRepoCollection() *PublishedRepoCollec return factory.publishedRepos } +func (factory *CollectionFactory) RefListCollection() *RefListCollection { + factory.Lock() + defer factory.Unlock() + + if factory.reflists == nil { + factory.reflists = NewRefListCollection(factory.db) + } + + return factory.reflists +} + // ChecksumCollection returns (or creates) new ChecksumCollection func (factory *CollectionFactory) ChecksumCollection(db database.ReaderWriter) aptly.ChecksumStorage { factory.Lock() diff --git a/deb/graph.go b/deb/graph.go index 16a7ce8543..77421c3a57 100644 --- a/deb/graph.go +++ b/deb/graph.go @@ -33,7 +33,7 @@ func BuildGraph(collectionFactory *CollectionFactory, layout string) (gographviz existingNodes := map[string]bool{} err = collectionFactory.RemoteRepoCollection().ForEach(func(repo *RemoteRepo) error { - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) + e := collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -55,7 +55,7 @@ func BuildGraph(collectionFactory *CollectionFactory, layout string) (gographviz } err = collectionFactory.LocalRepoCollection().ForEach(func(repo *LocalRepo) error { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -81,7 +81,7 @@ func BuildGraph(collectionFactory *CollectionFactory, layout string) (gographviz }) err = collectionFactory.SnapshotCollection().ForEach(func(snapshot *Snapshot) error { - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + e := collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if e != nil { return e } diff --git a/deb/list.go b/deb/list.go index 28644e4095..b2a3b5f3c5 100644 --- a/deb/list.go +++ b/deb/list.go @@ -91,7 +91,7 @@ func NewPackageListWithDuplicates(duplicates bool, capacity int) *PackageList { } // NewPackageListFromRefList loads packages list from PackageRefList -func NewPackageListFromRefList(reflist *PackageRefList, collection *PackageCollection, progress aptly.Progress) (*PackageList, error) { +func NewPackageListFromRefList(reflist AnyRefList, collection *PackageCollection, progress aptly.Progress) (*PackageList, error) { // empty reflist if reflist == nil { return NewPackageList(), nil diff --git a/deb/local.go b/deb/local.go index 42d5d3f262..7d134efe8d 100644 --- a/deb/local.go +++ b/deb/local.go @@ -26,7 +26,7 @@ type LocalRepo struct { // Uploaders configuration Uploaders *Uploaders `codec:"Uploaders,omitempty" json:"-"` // "Snapshot" of current list of packages - packageRefs *PackageRefList + packageRefs *SplitRefList } // NewLocalRepo creates new instance of Debian local repository @@ -48,20 +48,17 @@ func (repo *LocalRepo) String() string { // NumPackages return number of packages in local repo func (repo *LocalRepo) NumPackages() int { - if repo.packageRefs == nil { - return 0 - } return repo.packageRefs.Len() } // RefList returns package list for repo -func (repo *LocalRepo) RefList() *PackageRefList { +func (repo *LocalRepo) RefList() *SplitRefList { return repo.packageRefs } // UpdateRefList changes package list for local repo -func (repo *LocalRepo) UpdateRefList(reflist *PackageRefList) { - repo.packageRefs = reflist +func (repo *LocalRepo) UpdateRefList(sl *SplitRefList) { + repo.packageRefs = sl } // Encode does msgpack encoding of LocalRepo @@ -140,14 +137,14 @@ func (collection *LocalRepoCollection) search(filter func(*LocalRepo) bool, uniq } // Add appends new repo to collection and saves it -func (collection *LocalRepoCollection) Add(repo *LocalRepo) error { +func (collection *LocalRepoCollection) Add(repo *LocalRepo, reflistCollection *RefListCollection) error { _, err := collection.ByName(repo.Name) if err == nil { return fmt.Errorf("local repo with name %s already exists", repo.Name) } - err = collection.Update(repo) + err = collection.Update(repo, reflistCollection) if err != nil { return err } @@ -157,27 +154,25 @@ func (collection *LocalRepoCollection) Add(repo *LocalRepo) error { } // Update stores updated information about repo in DB -func (collection *LocalRepoCollection) Update(repo *LocalRepo) error { +func (collection *LocalRepoCollection) Update(repo *LocalRepo, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(repo.Key(), repo.Encode()) if repo.packageRefs != nil { - batch.Put(repo.RefKey(), repo.packageRefs.Encode()) + bc := reflistCollection.NewBatch(batch) + reflistCollection.UpdateInBatch(repo.packageRefs, repo.RefKey(), bc) } return batch.Write() } // LoadComplete loads additional information for local repo -func (collection *LocalRepoCollection) LoadComplete(repo *LocalRepo) error { - encoded, err := collection.db.Get(repo.RefKey()) +func (collection *LocalRepoCollection) LoadComplete(repo *LocalRepo, reflistCollection *RefListCollection) error { + repo.packageRefs = NewSplitRefList() + err := reflistCollection.LoadComplete(repo.packageRefs, repo.RefKey()) if err == database.ErrNotFound { return nil } - if err != nil { - return err - } - repo.packageRefs = &PackageRefList{} - return repo.packageRefs.Decode(encoded) + return err } // ByName looks up repository by name diff --git a/deb/local_test.go b/deb/local_test.go index c9072b7dc0..b87b1b624a 100644 --- a/deb/local_test.go +++ b/deb/local_test.go @@ -12,7 +12,7 @@ import ( type LocalRepoSuite struct { db database.Storage list *PackageList - reflist *PackageRefList + reflist *SplitRefList repo *LocalRepo } @@ -24,7 +24,7 @@ func (s *LocalRepoSuite) SetUpTest(c *C) { s.list.Add(&Package{Name: "lib", Version: "1.7", Architecture: "i386"}) s.list.Add(&Package{Name: "app", Version: "1.9", Architecture: "amd64"}) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromPackageList(s.list) s.repo = NewLocalRepo("lrepo", "Super repo") s.repo.packageRefs = s.reflist @@ -75,10 +75,11 @@ func (s *LocalRepoSuite) TestRefKey(c *C) { } type LocalRepoCollectionSuite struct { - db database.Storage - collection *LocalRepoCollection - list *PackageList - reflist *PackageRefList + db database.Storage + collection *LocalRepoCollection + reflistCollection *RefListCollection + list *PackageList + reflist *SplitRefList } var _ = Suite(&LocalRepoCollectionSuite{}) @@ -86,12 +87,13 @@ var _ = Suite(&LocalRepoCollectionSuite{}) func (s *LocalRepoCollectionSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.collection = NewLocalRepoCollection(s.db) + s.reflistCollection = NewRefListCollection(s.db) s.list = NewPackageList() s.list.Add(&Package{Name: "lib", Version: "1.7", Architecture: "i386"}) s.list.Add(&Package{Name: "app", Version: "1.9", Architecture: "amd64"}) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromRefList(NewPackageRefListFromPackageList(s.list)) } func (s *LocalRepoCollectionSuite) TearDownTest(c *C) { @@ -103,8 +105,8 @@ func (s *LocalRepoCollectionSuite) TestAddByName(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo := NewLocalRepo("local1", "Comment 1") - c.Assert(s.collection.Add(repo), IsNil) - c.Assert(s.collection.Add(repo), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(repo, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(repo, s.reflistCollection), ErrorMatches, ".*already exists") r, err := s.collection.ByName("local1") c.Assert(err, IsNil) @@ -121,7 +123,7 @@ func (s *LocalRepoCollectionSuite) TestByUUID(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo := NewLocalRepo("local1", "Comment 1") - c.Assert(s.collection.Add(repo), IsNil) + c.Assert(s.collection.Add(repo, s.reflistCollection), IsNil) r, err := s.collection.ByUUID(repo.UUID) c.Assert(err, IsNil) @@ -135,7 +137,7 @@ func (s *LocalRepoCollectionSuite) TestByUUID(c *C) { func (s *LocalRepoCollectionSuite) TestUpdateLoadComplete(c *C) { repo := NewLocalRepo("local1", "Comment 1") - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.reflistCollection), IsNil) collection := NewLocalRepoCollection(s.db) r, err := collection.ByName("local1") @@ -143,20 +145,20 @@ func (s *LocalRepoCollectionSuite) TestUpdateLoadComplete(c *C) { c.Assert(r.packageRefs, IsNil) repo.packageRefs = s.reflist - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.reflistCollection), IsNil) collection = NewLocalRepoCollection(s.db) r, err = collection.ByName("local1") c.Assert(err, IsNil) c.Assert(r.packageRefs, IsNil) c.Assert(r.NumPackages(), Equals, 0) - c.Assert(s.collection.LoadComplete(r), IsNil) + c.Assert(s.collection.LoadComplete(r, s.reflistCollection), IsNil) c.Assert(r.NumPackages(), Equals, 2) } func (s *LocalRepoCollectionSuite) TestForEachAndLen(c *C) { repo := NewLocalRepo("local1", "Comment 1") - s.collection.Add(repo) + s.collection.Add(repo, s.reflistCollection) count := 0 err := s.collection.ForEach(func(*LocalRepo) error { @@ -178,10 +180,10 @@ func (s *LocalRepoCollectionSuite) TestForEachAndLen(c *C) { func (s *LocalRepoCollectionSuite) TestDrop(c *C) { repo1 := NewLocalRepo("local1", "Comment 1") - s.collection.Add(repo1) + s.collection.Add(repo1, s.reflistCollection) repo2 := NewLocalRepo("local2", "Comment 2") - s.collection.Add(repo2) + s.collection.Add(repo2, s.reflistCollection) r1, _ := s.collection.ByUUID(repo1.UUID) c.Check(r1, Equals, repo1) diff --git a/deb/publish.go b/deb/publish.go index 1ef23fb94f..1fdbc65fb1 100644 --- a/deb/publish.go +++ b/deb/publish.go @@ -37,7 +37,7 @@ type repoSourceItem struct { // Pointer to local repo if SourceKind == "local" localRepo *LocalRepo // Package references is SourceKind == "local" - packageRefs *PackageRefList + packageRefs *SplitRefList } // PublishedRepo is a published for http/ftp representation of snapshot as Debian repository @@ -609,7 +609,7 @@ func (p *PublishedRepo) RefKey(component string) []byte { } // RefList returns list of package refs in local repo -func (p *PublishedRepo) RefList(component string) *PackageRefList { +func (p *PublishedRepo) RefList(component string) *SplitRefList { item := p.sourceItems[component] if p.SourceKind == SourceLocalRepo { return item.packageRefs @@ -1243,14 +1243,14 @@ func (collection *PublishedRepoCollection) loadList() { } // Add appends new repo to collection and saves it -func (collection *PublishedRepoCollection) Add(repo *PublishedRepo) error { +func (collection *PublishedRepoCollection) Add(repo *PublishedRepo, reflistCollection *RefListCollection) error { collection.loadList() if collection.CheckDuplicate(repo) != nil { return fmt.Errorf("published repo with storage/prefix/distribution %s/%s/%s already exists", repo.Storage, repo.Prefix, repo.Distribution) } - err := collection.Update(repo) + err := collection.Update(repo, reflistCollection) if err != nil { return err } @@ -1273,13 +1273,14 @@ func (collection *PublishedRepoCollection) CheckDuplicate(repo *PublishedRepo) * } // Update stores updated information about repo in DB -func (collection *PublishedRepoCollection) Update(repo *PublishedRepo) error { +func (collection *PublishedRepoCollection) Update(repo *PublishedRepo, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(repo.Key(), repo.Encode()) if repo.SourceKind == SourceLocalRepo { + rb := reflistCollection.NewBatch(batch) for component, item := range repo.sourceItems { - batch.Put(repo.RefKey(component), item.packageRefs.Encode()) + reflistCollection.UpdateInBatch(item.packageRefs, repo.RefKey(component), rb) } } return batch.Write() @@ -1312,7 +1313,7 @@ func (collection *PublishedRepoCollection) LoadShallow(repo *PublishedRepo, coll return } - item.packageRefs = &PackageRefList{} + item.packageRefs = NewSplitRefList() repo.sourceItems[component] = item } } else { @@ -1328,35 +1329,29 @@ func (collection *PublishedRepoCollection) LoadComplete(repo *PublishedRepo, col if repo.SourceKind == SourceSnapshot { for _, item := range repo.sourceItems { - err = collectionFactory.SnapshotCollection().LoadComplete(item.snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(item.snapshot, collectionFactory.RefListCollection()) if err != nil { return } } } else if repo.SourceKind == SourceLocalRepo { for component, item := range repo.sourceItems { - err = collectionFactory.LocalRepoCollection().LoadComplete(item.localRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(item.localRepo, collectionFactory.RefListCollection()) if err != nil { return } - var encoded []byte - encoded, err = collection.db.Get(repo.RefKey(component)) + err = collectionFactory.RefListCollection().LoadComplete(item.packageRefs, repo.RefKey(component)) if err != nil { // < 0.6 saving w/o component name if err == database.ErrNotFound && len(repo.Sources) == 1 { - encoded, err = collection.db.Get(repo.RefKey("")) + err = collectionFactory.RefListCollection().LoadComplete(item.packageRefs, repo.RefKey("")) } if err != nil { return } } - - err = item.packageRefs.Decode(encoded) - if err != nil { - return - } } } else { panic("unknown SourceKind") @@ -1461,6 +1456,11 @@ func (collection *PublishedRepoCollection) listReferencedFilesByComponent(prefix referencedFiles := map[string][]string{} processedComponentRefs := map[string]*PackageRefList{} + processedComponentBuckets := map[string]*RefListDigestSet{} + for _, component := range components { + processedComponentBuckets[component] = NewRefListDigestSet() + } + for _, r := range collection.list { if r.Prefix == prefix && !r.MultiDist { matches := false @@ -1484,36 +1484,51 @@ func (collection *PublishedRepoCollection) listReferencedFilesByComponent(prefix for _, component := range components { if utils.StrSliceHasItem(repoComponents, component) { - unseenRefs := r.RefList(component) - processedRefs := processedComponentRefs[component] - if processedRefs != nil { - unseenRefs = unseenRefs.Subtract(processedRefs) - } else { - processedRefs = NewPackageRefList() - } + processedBuckets := processedComponentBuckets[component] - if unseenRefs.Len() == 0 { - continue - } - processedComponentRefs[component] = processedRefs.Merge(unseenRefs, false, true) + err := r.RefList(component).ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + if processedBuckets.Has(digest) { + return nil + } + processedBuckets.Add(digest) - packageList, err := NewPackageListFromRefList(unseenRefs, collectionFactory.PackageCollection(), progress) - if err != nil { - return nil, err - } + unseenRefs := bucket + processedRefs := processedComponentRefs[component] + if processedRefs != nil { + unseenRefs = unseenRefs.Subtract(processedRefs) + } else { + processedRefs = NewPackageRefList() + } - packageList.ForEach(func(p *Package) error { - poolDir, err := p.PoolDirectory() + if unseenRefs.Len() == 0 { + return nil + } + processedComponentRefs[component] = processedRefs.Merge(unseenRefs, false, true) + + packageList, err := NewPackageListFromRefList(unseenRefs, collectionFactory.PackageCollection(), progress) if err != nil { return err } - for _, f := range p.Files() { - referencedFiles[component] = append(referencedFiles[component], filepath.Join(poolDir, f.Filename)) - } + packageList.ForEach(func(p *Package) error { + poolDir, err := p.PoolDirectory() + if err != nil { + return err + } + + for _, f := range p.Files() { + referencedFiles[component] = append(referencedFiles[component], filepath.Join(poolDir, f.Filename)) + } + + return nil + }) return nil }) + + if err != nil { + return nil, err + } } } } diff --git a/deb/publish_bench_test.go b/deb/publish_bench_test.go index 86f18c3014..29b17bcc8d 100644 --- a/deb/publish_bench_test.go +++ b/deb/publish_bench_test.go @@ -31,6 +31,7 @@ func BenchmarkListReferencedFiles(b *testing.B) { packageCollection := factory.PackageCollection() repoCollection := factory.LocalRepoCollection() publishCollection := factory.PublishedRepoCollection() + reflistCollection := factory.RefListCollection() sharedRefs := NewPackageRefList() { @@ -91,14 +92,14 @@ func BenchmarkListReferencedFiles(b *testing.B) { repo := NewLocalRepo(fmt.Sprintf("repo%d", repoIndex), "comment") repo.DefaultDistribution = fmt.Sprintf("dist%d", repoIndex) repo.DefaultComponent = defaultComponent - repo.UpdateRefList(refs.Merge(sharedRefs, false, true)) - repoCollection.Add(repo) + repo.UpdateRefList(NewSplitRefListFromRefList(refs.Merge(sharedRefs, false, true))) + repoCollection.Add(repo, reflistCollection) publish, err := NewPublishedRepo("", "test", "", nil, []string{defaultComponent}, []interface{}{repo}, factory, false) if err != nil { b.Fatal(err) } - publishCollection.Add(publish) + publishCollection.Add(publish, reflistCollection) } db.CompactDB() diff --git a/deb/publish_test.go b/deb/publish_test.go index c2d228ac2e..c4d51bf378 100644 --- a/deb/publish_test.go +++ b/deb/publish_test.go @@ -83,6 +83,7 @@ type PublishedRepoSuite struct { db database.Storage factory *CollectionFactory packageCollection *PackageCollection + reflistCollection *RefListCollection } var _ = Suite(&PublishedRepoSuite{}) @@ -114,21 +115,22 @@ func (s *PublishedRepoSuite) SetUpTest(c *C) { s.p2.UpdateFiles(s.p1.Files()) s.p3.UpdateFiles(s.p1.Files()) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromPackageList(s.list) + s.reflistCollection = s.factory.RefListCollection() repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) repo.packageRefs = s.reflist - s.factory.RemoteRepoCollection().Add(repo) + s.factory.RemoteRepoCollection().Add(repo, s.reflistCollection) s.localRepo = NewLocalRepo("local1", "comment1") s.localRepo.packageRefs = s.reflist - s.factory.LocalRepoCollection().Add(s.localRepo) + s.factory.LocalRepoCollection().Add(s.localRepo, s.reflistCollection) s.snapshot, _ = NewSnapshotFromRepository("snap", repo) - s.factory.SnapshotCollection().Add(s.snapshot) + s.factory.SnapshotCollection().Add(s.snapshot, s.reflistCollection) s.snapshot2, _ = NewSnapshotFromRepository("snap", repo) - s.factory.SnapshotCollection().Add(s.snapshot2) + s.factory.SnapshotCollection().Add(s.snapshot2, s.reflistCollection) s.packageCollection = s.factory.PackageCollection() s.packageCollection.Update(s.p1) @@ -337,7 +339,7 @@ func (s *PublishedRepoSuite) TestDistributionComponentGuessing(c *C) { s.localRepo.DefaultDistribution = "precise" s.localRepo.DefaultComponent = "contrib" - s.factory.LocalRepoCollection().Update(s.localRepo) + s.factory.LocalRepoCollection().Update(s.localRepo, s.reflistCollection) repo, err = NewPublishedRepo("", "ppa", "", nil, []string{""}, []interface{}{s.localRepo}, s.factory, false) c.Check(err, IsNil) @@ -538,6 +540,7 @@ type PublishedRepoCollectionSuite struct { db database.Storage factory *CollectionFactory snapshotCollection *SnapshotCollection + reflistCollection *RefListCollection collection *PublishedRepoCollection snap1, snap2 *Snapshot localRepo *LocalRepo @@ -553,22 +556,23 @@ func (s *PublishedRepoCollectionSuite) SetUpTest(c *C) { s.factory = NewCollectionFactory(s.db) s.snapshotCollection = s.factory.SnapshotCollection() + s.reflistCollection = s.factory.RefListCollection() snap1Refs := NewPackageRefList() snap1Refs.Refs = [][]byte{s.p1.Key(""), s.p2.Key("")} sort.Sort(snap1Refs) - s.snap1 = NewSnapshotFromRefList("snap1", []*Snapshot{}, snap1Refs, "desc1") + s.snap1 = NewSnapshotFromRefList("snap1", []*Snapshot{}, NewSplitRefListFromRefList(snap1Refs), "desc1") snap2Refs := NewPackageRefList() snap2Refs.Refs = [][]byte{s.p3.Key("")} sort.Sort(snap2Refs) - s.snap2 = NewSnapshotFromRefList("snap2", []*Snapshot{}, snap2Refs, "desc2") + s.snap2 = NewSnapshotFromRefList("snap2", []*Snapshot{}, NewSplitRefListFromRefList(snap2Refs), "desc2") - s.snapshotCollection.Add(s.snap1) - s.snapshotCollection.Add(s.snap2) + s.snapshotCollection.Add(s.snap1, s.reflistCollection) + s.snapshotCollection.Add(s.snap2, s.reflistCollection) s.localRepo = NewLocalRepo("local1", "comment1") - s.factory.LocalRepoCollection().Add(s.localRepo) + s.factory.LocalRepoCollection().Add(s.localRepo, s.reflistCollection) s.repo1, _ = NewPublishedRepo("", "ppa", "anaconda", []string{}, []string{"main"}, []interface{}{s.snap1}, s.factory, false) s.repo2, _ = NewPublishedRepo("", "", "anaconda", []string{}, []string{"main", "contrib"}, []interface{}{s.snap2, s.snap1}, s.factory, false) @@ -587,14 +591,14 @@ func (s *PublishedRepoCollectionSuite) TestAddByStoragePrefixDistribution(c *C) _, err := s.collection.ByStoragePrefixDistribution("", "ppa", "anaconda") c.Assert(err, ErrorMatches, "*.not found") - c.Assert(s.collection.Add(s.repo1), IsNil) - c.Assert(s.collection.Add(s.repo1), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.repo1, s.reflistCollection), ErrorMatches, ".*already exists") c.Assert(s.collection.CheckDuplicate(s.repo2), IsNil) - c.Assert(s.collection.Add(s.repo2), IsNil) - c.Assert(s.collection.Add(s.repo3), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(s.repo2, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.repo3, s.reflistCollection), ErrorMatches, ".*already exists") c.Assert(s.collection.CheckDuplicate(s.repo3), Equals, s.repo1) - c.Assert(s.collection.Add(s.repo4), IsNil) - c.Assert(s.collection.Add(s.repo5), IsNil) + c.Assert(s.collection.Add(s.repo4, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.repo5, s.reflistCollection), IsNil) r, err := s.collection.ByStoragePrefixDistribution("", "ppa", "anaconda") c.Assert(err, IsNil) @@ -620,7 +624,7 @@ func (s *PublishedRepoCollectionSuite) TestByUUID(c *C) { _, err := s.collection.ByUUID(s.repo1.UUID) c.Assert(err, ErrorMatches, "*.not found") - c.Assert(s.collection.Add(s.repo1), IsNil) + c.Assert(s.collection.Add(s.repo1, s.reflistCollection), IsNil) r, err := s.collection.ByUUID(s.repo1.UUID) c.Assert(err, IsNil) @@ -631,8 +635,8 @@ func (s *PublishedRepoCollectionSuite) TestByUUID(c *C) { } func (s *PublishedRepoCollectionSuite) TestUpdateLoadComplete(c *C) { - c.Assert(s.collection.Update(s.repo1), IsNil) - c.Assert(s.collection.Update(s.repo4), IsNil) + c.Assert(s.collection.Update(s.repo1, s.reflistCollection), IsNil) + c.Assert(s.collection.Update(s.repo4, s.reflistCollection), IsNil) collection := NewPublishedRepoCollection(s.db) r, err := collection.ByStoragePrefixDistribution("", "ppa", "anaconda") @@ -680,7 +684,7 @@ func (s *PublishedRepoCollectionSuite) TestLoadPre0_6(c *C) { encoder.Encode(&old) c.Assert(s.db.Put(s.repo1.Key(), buf.Bytes()), IsNil) - c.Assert(s.db.Put(s.repo1.RefKey(""), s.localRepo.RefList().Encode()), IsNil) + c.Assert(s.db.Put(s.repo1.RefKey(""), NewPackageRefList().Encode()), IsNil) collection := NewPublishedRepoCollection(s.db) repo, err := collection.ByStoragePrefixDistribution("", "ppa", "anaconda") @@ -695,7 +699,7 @@ func (s *PublishedRepoCollectionSuite) TestLoadPre0_6(c *C) { } func (s *PublishedRepoCollectionSuite) TestForEachAndLen(c *C) { - s.collection.Add(s.repo1) + s.collection.Add(s.repo1, s.reflistCollection) count := 0 err := s.collection.ForEach(func(*PublishedRepo) error { @@ -716,17 +720,17 @@ func (s *PublishedRepoCollectionSuite) TestForEachAndLen(c *C) { } func (s *PublishedRepoCollectionSuite) TestBySnapshot(c *C) { - c.Check(s.collection.Add(s.repo1), IsNil) - c.Check(s.collection.Add(s.repo2), IsNil) + c.Check(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo2, s.reflistCollection), IsNil) c.Check(s.collection.BySnapshot(s.snap1), DeepEquals, []*PublishedRepo{s.repo1, s.repo2}) c.Check(s.collection.BySnapshot(s.snap2), DeepEquals, []*PublishedRepo{s.repo2}) } func (s *PublishedRepoCollectionSuite) TestByLocalRepo(c *C) { - c.Check(s.collection.Add(s.repo1), IsNil) - c.Check(s.collection.Add(s.repo4), IsNil) - c.Check(s.collection.Add(s.repo5), IsNil) + c.Check(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo4, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo5, s.reflistCollection), IsNil) c.Check(s.collection.ByLocalRepo(s.localRepo), DeepEquals, []*PublishedRepo{s.repo4, s.repo5}) } @@ -736,10 +740,10 @@ func (s *PublishedRepoCollectionSuite) TestListReferencedFiles(c *C) { c.Check(s.factory.PackageCollection().Update(s.p2), IsNil) c.Check(s.factory.PackageCollection().Update(s.p3), IsNil) - c.Check(s.collection.Add(s.repo1), IsNil) - c.Check(s.collection.Add(s.repo2), IsNil) - c.Check(s.collection.Add(s.repo4), IsNil) - c.Check(s.collection.Add(s.repo5), IsNil) + c.Check(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo2, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo4, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo5, s.reflistCollection), IsNil) files, err := s.collection.listReferencedFilesByComponent(".", []string{"main", "contrib"}, s.factory, nil) c.Assert(err, IsNil) @@ -755,12 +759,12 @@ func (s *PublishedRepoCollectionSuite) TestListReferencedFiles(c *C) { }) snap3 := NewSnapshotFromRefList("snap3", []*Snapshot{}, s.snap2.RefList(), "desc3") - s.snapshotCollection.Add(snap3) + s.snapshotCollection.Add(snap3, s.reflistCollection) // Ensure that adding a second publish point with matching files doesn't give duplicate results. repo3, err := NewPublishedRepo("", "", "anaconda-2", []string{}, []string{"main"}, []interface{}{snap3}, s.factory, false) c.Check(err, IsNil) - c.Check(s.collection.Add(repo3), IsNil) + c.Check(s.collection.Add(repo3, s.reflistCollection), IsNil) files, err = s.collection.listReferencedFilesByComponent(".", []string{"main", "contrib"}, s.factory, nil) c.Assert(err, IsNil) @@ -781,6 +785,7 @@ type PublishedRepoRemoveSuite struct { db database.Storage factory *CollectionFactory snapshotCollection *SnapshotCollection + reflistCollection *RefListCollection collection *PublishedRepoCollection root, root2 string provider *FakeStorageProvider @@ -796,10 +801,11 @@ func (s *PublishedRepoRemoveSuite) SetUpTest(c *C) { s.factory = NewCollectionFactory(s.db) s.snapshotCollection = s.factory.SnapshotCollection() + s.reflistCollection = s.factory.RefListCollection() s.snap1 = NewSnapshotFromPackageList("snap1", []*Snapshot{}, NewPackageList(), "desc1") - s.snapshotCollection.Add(s.snap1) + s.snapshotCollection.Add(s.snap1, s.reflistCollection) s.repo1, _ = NewPublishedRepo("", "ppa", "anaconda", []string{}, []string{"main"}, []interface{}{s.snap1}, s.factory, false) s.repo2, _ = NewPublishedRepo("", "", "anaconda", []string{}, []string{"main"}, []interface{}{s.snap1}, s.factory, false) @@ -808,11 +814,11 @@ func (s *PublishedRepoRemoveSuite) SetUpTest(c *C) { s.repo5, _ = NewPublishedRepo("files:other", "ppa", "osminog", []string{}, []string{"contrib"}, []interface{}{s.snap1}, s.factory, false) s.collection = s.factory.PublishedRepoCollection() - s.collection.Add(s.repo1) - s.collection.Add(s.repo2) - s.collection.Add(s.repo3) - s.collection.Add(s.repo4) - s.collection.Add(s.repo5) + s.collection.Add(s.repo1, s.reflistCollection) + s.collection.Add(s.repo2, s.reflistCollection) + s.collection.Add(s.repo3, s.reflistCollection) + s.collection.Add(s.repo4, s.reflistCollection) + s.collection.Add(s.repo5, s.reflistCollection) s.root = c.MkDir() s.publishedStorage = files.NewPublishedStorage(s.root, "", "") diff --git a/deb/reflist.go b/deb/reflist.go index 30396548cc..e039de1274 100644 --- a/deb/reflist.go +++ b/deb/reflist.go @@ -2,10 +2,15 @@ package deb import ( "bytes" + "crypto/sha256" + "encoding/base64" "encoding/json" + "fmt" "sort" "github.com/AlekSi/pointer" + "github.com/aptly-dev/aptly/database" + "github.com/cespare/xxhash/v2" "github.com/ugorji/go/codec" ) @@ -44,6 +49,13 @@ func NewPackageRefListFromPackageList(list *PackageList) *PackageRefList { return reflist } +func (l *PackageRefList) Clone() *PackageRefList { + clone := &PackageRefList{} + clone.Refs = make([][]byte, l.Len()) + copy(clone.Refs, l.Refs) + return clone +} + // Len returns number of refs func (l *PackageRefList) Len() int { return len(l.Refs) @@ -184,8 +196,12 @@ func (d PackageDiff) MarshalJSON() ([]byte, error) { type PackageDiffs []PackageDiff // Diff calculates difference between two reflists -func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageCollection) (result PackageDiffs, err error) { - result = make(PackageDiffs, 0, 128) +func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageCollection, result PackageDiffs) (PackageDiffs, error) { + var err error + + if result == nil { + result = make(PackageDiffs, 0, 128) + } // pointer to left and right reflists il, ir := 0, 0 @@ -258,7 +274,7 @@ func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageColle } } - return + return result, nil } // Merge merges reflist r into current reflist. If overrideMatching, merge @@ -391,3 +407,753 @@ func (l *PackageRefList) FilterLatestRefs() { lastArch, lastName, lastVer = arch, name, ver } } + +const ( + reflistBucketCount = 1 << 6 + reflistBucketMask = reflistBucketCount - 1 +) + +type reflistDigestArray [sha256.Size]byte + +func bucketRefPrefix(ref []byte) []byte { + const maxPrefixLen = 3 + + // Cut out the arch, leaving behind the package name and subsequent info. + _, ref, _ = bytes.Cut(ref, []byte{' '}) + + // Strip off the lib prefix, so that "libxyz" and "xyz", which are likely + // to be updated together, go in the same bucket. + libPrefix := []byte("lib") + if bytes.HasPrefix(ref, libPrefix) { + ref = ref[len(libPrefix):] + } + + prefixLen := min(maxPrefixLen, len(ref)) + prefix, _, _ := bytes.Cut(ref[:prefixLen], []byte{' '}) + return prefix +} + +func bucketIdxForRef(ref []byte) int { + return int(xxhash.Sum64(bucketRefPrefix(ref))) & reflistBucketMask +} + +// SplitRefList is a list of package refs, similar to a PackageRefList. However, +// instead of storing a linear array of refs, SplitRefList splits the refs into +// PackageRefList "buckets", based on a hash of the package name inside the ref. +// Each bucket has a digest of its contents that serves as its key in the database. +// +// When serialized, a SplitRefList just becomes an array of bucket digests, and +// the buckets themselves are stored separately. Because the buckets are then +// referenced by their digests, multiple independent reflists can share buckets, +// if their buckets have matching digests. +// +// Buckets themselves may not be confirmed to a single database value; instead, +// they're split into "segments", based on the database's preferred maximum +// value size. This prevents large buckets from slowing down the database. +type SplitRefList struct { + Buckets [][]byte + + bucketRefs []*PackageRefList +} + +// NewSplitRefList creates empty SplitRefList +func NewSplitRefList() *SplitRefList { + sl := &SplitRefList{} + sl.reset() + return sl +} + +// NewSplitRefListFromRefList creates SplitRefList from PackageRefList +func NewSplitRefListFromRefList(reflist *PackageRefList) *SplitRefList { + sl := NewSplitRefList() + sl.Replace(reflist) + return sl +} + +// NewSplitRefListFromRefList creates SplitRefList from PackageList +func NewSplitRefListFromPackageList(list *PackageList) *SplitRefList { + return NewSplitRefListFromRefList(NewPackageRefListFromPackageList(list)) +} + +func (sl *SplitRefList) reset() { + sl.Buckets = make([][]byte, reflistBucketCount) + sl.bucketRefs = make([]*PackageRefList, reflistBucketCount) +} + +// Has checks whether package is part of reflist +func (sl *SplitRefList) Has(p *Package) bool { + idx := bucketIdxForRef(p.Key("")) + if bucket := sl.bucketRefs[idx]; bucket != nil { + return bucket.Has(p) + } + return false +} + +// Len returns number of refs +func (sl *SplitRefList) Len() int { + total := 0 + for _, bucket := range sl.bucketRefs { + if bucket != nil { + total += bucket.Len() + } + } + return total +} + +func reflistDigest(l *PackageRefList) []byte { + // Different algorithms on PackageRefLists will sometimes return a nil slice + // of refs and other times return an empty slice. Regardless, they should + // both be treated identically and be given an empty digest. + if len(l.Refs) == 0 { + return nil + } + + h := sha256.New() + for _, ref := range l.Refs { + h.Write(ref) + h.Write([]byte{0}) + } + return h.Sum(nil) +} + +// Removes all the refs inside and replaces them with those in the given reflist +func (sl *SplitRefList) Replace(reflist *PackageRefList) { + sl.reset() + + for _, ref := range reflist.Refs { + idx := bucketIdxForRef(ref) + + bucket := sl.bucketRefs[idx] + if bucket == nil { + bucket = NewPackageRefList() + sl.bucketRefs[idx] = bucket + } + + bucket.Refs = append(bucket.Refs, ref) + } + + for idx, bucket := range sl.bucketRefs { + if bucket != nil { + sort.Sort(bucket) + sl.Buckets[idx] = reflistDigest(bucket) + } + } +} + +// Merge merges reflist r into current reflist (see PackageRefList.Merge) +func (sl *SplitRefList) Merge(r *SplitRefList, overrideMatching, ignoreConflicting bool) (result *SplitRefList) { + result = NewSplitRefList() + + var empty PackageRefList + for idx, lbucket := range sl.bucketRefs { + rbucket := r.bucketRefs[idx] + if lbucket == nil && rbucket == nil { + continue + } + + if lbucket == nil { + lbucket = &empty + } else if rbucket == nil { + rbucket = &empty + } + + result.bucketRefs[idx] = lbucket.Merge(rbucket, overrideMatching, ignoreConflicting) + result.Buckets[idx] = reflistDigest(result.bucketRefs[idx]) + } + + return +} + +// Subtract returns all packages in l that are not in r +func (sl *SplitRefList) Subtract(r *SplitRefList) (result *SplitRefList) { + result = NewSplitRefList() + + for idx, lbucket := range sl.bucketRefs { + rbucket := r.bucketRefs[idx] + if lbucket != nil { + if rbucket != nil { + result.bucketRefs[idx] = lbucket.Subtract(rbucket) + result.Buckets[idx] = reflistDigest(result.bucketRefs[idx]) + } else { + result.bucketRefs[idx] = lbucket.Clone() + result.Buckets[idx] = sl.Buckets[idx] + } + } + } + + return +} + +// Diff calculates difference between two reflists +func (sl *SplitRefList) Diff(r *SplitRefList, packageCollection *PackageCollection, result PackageDiffs) (PackageDiffs, error) { + var err error + + if result == nil { + result = make(PackageDiffs, 0, 128) + } + + var empty PackageRefList + for idx, lbucket := range sl.bucketRefs { + rbucket := r.bucketRefs[idx] + if lbucket != nil { + if rbucket != nil { + result, err = lbucket.Diff(rbucket, packageCollection, result) + } else { + result, err = lbucket.Diff(&empty, packageCollection, result) + } + } else if rbucket != nil { + result, err = empty.Diff(rbucket, packageCollection, result) + } + + if err != nil { + return nil, err + } + } + + sort.Slice(result, func(i, j int) bool { + var ri, rj []byte + if result[i].Left != nil { + ri = result[i].Left.Key("") + } else { + ri = result[i].Right.Key("") + } + if result[j].Left != nil { + rj = result[j].Left.Key("") + } else { + rj = result[j].Right.Key("") + } + + return bytes.Compare(ri, rj) < 0 + }) + + return result, nil +} + +// FilterLatestRefs reduces a reflist to the latest of each package (see PackageRefList.FilterLatestRefs) +func (sl *SplitRefList) FilterLatestRefs() { + for idx, bucket := range sl.bucketRefs { + if bucket != nil { + bucket.FilterLatestRefs() + sl.Buckets[idx] = reflistDigest(bucket) + } + } +} + +// Flatten creates a flat PackageRefList containing all the refs in this reflist +func (sl *SplitRefList) Flatten() *PackageRefList { + reflist := NewPackageRefList() + sl.ForEach(func(ref []byte) error { + reflist.Refs = append(reflist.Refs, ref) + return nil + }) + sort.Sort(reflist) + return reflist +} + +// ForEachBucket calls handler for each bucket in list +func (sl *SplitRefList) ForEachBucket(handler func(digest []byte, bucket *PackageRefList) error) error { + for idx, digest := range sl.Buckets { + if len(digest) == 0 { + continue + } + + bucket := sl.bucketRefs[idx] + if bucket != nil { + if err := handler(digest, bucket); err != nil { + return err + } + } + } + + return nil +} + +// ForEach calls handler for each package ref in list +// +// IMPORTANT: unlike PackageRefList.ForEach, the order of handler invocations +// is *not* guaranteed to be sorted. +func (sl *SplitRefList) ForEach(handler func([]byte) error) error { + for idx, digest := range sl.Buckets { + if len(digest) == 0 { + continue + } + + bucket := sl.bucketRefs[idx] + if bucket != nil { + if err := bucket.ForEach(handler); err != nil { + return err + } + } + } + + return nil +} + +// RefListDigestSet is a set of SplitRefList bucket digests +type RefListDigestSet struct { + items map[reflistDigestArray]struct{} +} + +// NewRefListDigestSet creates empty RefListDigestSet +func NewRefListDigestSet() *RefListDigestSet { + return &RefListDigestSet{items: map[reflistDigestArray]struct{}{}} +} + +// Len returns number of digests in the set +func (set *RefListDigestSet) Len() int { + return len(set.items) +} + +// ForEach calls handler for each digest in the set +func (set *RefListDigestSet) ForEach(handler func(digest []byte) error) error { + for digest := range set.items { + if err := handler(digest[:]); err != nil { + return err + } + } + + return nil +} + +// Add adds digest to set, doing nothing if the digest was already present +func (set *RefListDigestSet) Add(digest []byte) { + set.items[reflistDigestArray(digest)] = struct{}{} +} + +// AddAllInRefList adds all the bucket digests in a SplitRefList to the set +func (set *RefListDigestSet) AddAllInRefList(sl *SplitRefList) { + for _, digest := range sl.Buckets { + if len(digest) > 0 { + set.Add(digest) + } + } +} + +// Has checks whether a digest is part of set +func (set *RefListDigestSet) Has(digest []byte) bool { + _, ok := set.items[reflistDigestArray(digest)] + return ok +} + +// Remove removes a digest from set +func (set *RefListDigestSet) Remove(digest []byte) { + delete(set.items, reflistDigestArray(digest)) +} + +// RemoveAll removes all the digests in other from the current set +func (set *RefListDigestSet) RemoveAll(other *RefListDigestSet) { + for digest := range other.items { + delete(set.items, digest) + } +} + +// RefListCollection does listing, updating/adding/deleting of SplitRefLists +type RefListCollection struct { + db database.Storage + + cache map[reflistDigestArray]*PackageRefList +} + +// NewRefListCollection creates a RefListCollection +func NewRefListCollection(db database.Storage) *RefListCollection { + return &RefListCollection{db: db, cache: make(map[reflistDigestArray]*PackageRefList)} +} + +type reflistStorageFormat int + +const ( + // (legacy format) all the refs are stored inline in a single value + reflistStorageFormatInline reflistStorageFormat = iota + // the refs are split into buckets that are stored externally from the value + reflistStorageFormatSplit +) + +// NoPadding is used because all digests are the same length, so the padding +// is useless and only serves to muddy the output. +var bucketDigestEncoding = base64.StdEncoding.WithPadding(base64.NoPadding) + +func segmentPrefix(encodedDigest string) []byte { + return []byte(fmt.Sprintf("F%s-", encodedDigest)) +} + +func segmentIndexKey(prefix []byte, idx int) []byte { + // Assume most buckets won't have more than 0xFFFF = ~65k segments (which + // would be an extremely large bucket!). + return append(bytes.Clone(prefix), []byte(fmt.Sprintf("%04x", idx))...) +} + +// AllBucketDigests returns a set of all the bucket digests in the database +func (collection *RefListCollection) AllBucketDigests() (*RefListDigestSet, error) { + digests := NewRefListDigestSet() + + err := collection.db.ProcessByPrefix([]byte("F"), func(key []byte, value []byte) error { + if !bytes.HasSuffix(key, []byte("-0000")) { + // Ignore additional segments for the same digest. + return nil + } + + encodedDigest, _, foundDash := bytes.Cut(key[1:], []byte("-")) + if !foundDash { + return fmt.Errorf("invalid key: %s", string(key)) + } + digest := make([]byte, bucketDigestEncoding.DecodedLen(len(encodedDigest))) + if _, err := bucketDigestEncoding.Decode(digest, encodedDigest); err != nil { + return fmt.Errorf("decoding key %s: %w", string(key), err) + } + + digests.Add(digest) + return nil + }) + + if err != nil { + return nil, err + } + return digests, nil +} + +// UnsafeDropBucket drops the bucket associated with digest from the database, +// doing so inside batch +// +// This is considered "unsafe" because no checks are performed to ensure that +// the bucket is no longer referenced by any saved reflists. +func (collection *RefListCollection) UnsafeDropBucket(digest []byte, batch database.Batch) error { + prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) + return collection.db.ProcessByPrefix(prefix, func(key []byte, value []byte) error { + return batch.Delete(key) + }) +} + +func (collection *RefListCollection) load(sl *SplitRefList, key []byte) (reflistStorageFormat, error) { + sl.reset() + + data, err := collection.db.Get(key) + if err != nil { + return 0, err + } + + var splitOrInlineRefList struct { + *SplitRefList + *PackageRefList + } + handle := &codec.MsgpackHandle{} + handle.ZeroCopy = true + decoder := codec.NewDecoderBytes(data, handle) + if err := decoder.Decode(&splitOrInlineRefList); err != nil { + return 0, err + } + + if splitOrInlineRefList.SplitRefList != nil { + sl.Buckets = splitOrInlineRefList.Buckets + } else if splitOrInlineRefList.PackageRefList != nil { + sl.Replace(splitOrInlineRefList.PackageRefList) + return reflistStorageFormatInline, nil + } + + return reflistStorageFormatSplit, nil +} + +func (collection *RefListCollection) loadBuckets(sl *SplitRefList) error { + for idx := range sl.Buckets { + if sl.bucketRefs[idx] != nil { + continue + } + + var bucket *PackageRefList + + if digest := sl.Buckets[idx]; len(digest) > 0 { + cacheKey := reflistDigestArray(digest) + bucket = collection.cache[cacheKey] + if bucket == nil { + bucket = NewPackageRefList() + prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) + err := collection.db.ProcessByPrefix(prefix, func(digest []byte, value []byte) error { + var l PackageRefList + if err := l.Decode(append([]byte{}, value...)); err != nil { + return err + } + + bucket.Refs = append(bucket.Refs, l.Refs...) + return nil + }) + + if err != nil { + return err + } + + // The segments may not have been iterated in order, so make sure to re-sort + // here. + sort.Sort(bucket) + collection.cache[cacheKey] = bucket + } + + actualDigest := reflistDigest(bucket) + if !bytes.Equal(actualDigest, digest) { + return fmt.Errorf("corrupt reflist bucket %d: expected digest %s, got %s", + idx, + bucketDigestEncoding.EncodeToString(digest), + bucketDigestEncoding.EncodeToString(actualDigest)) + } + } + + sl.bucketRefs[idx] = bucket + } + + return nil +} + +// LoadComplete loads the reflist stored at the given key, as well as all the +// buckets referenced by a split reflist +func (collection *RefListCollection) LoadComplete(sl *SplitRefList, key []byte) error { + if _, err := collection.load(sl, key); err != nil { + return err + } + + return collection.loadBuckets(sl) +} + +// RefListBatch is a wrapper over a database.Batch that tracks already-written +// reflists to avoid writing them multiple times +// +// It is *not* safe to use the same underlying database.Batch that has already +// been given to UnsafeDropBucket. +type RefListBatch struct { + batch database.Batch + + alreadyWritten *RefListDigestSet +} + +// NewBatch creates a new RefListBatch wrapping the given database.Batch +func (collection *RefListCollection) NewBatch(batch database.Batch) *RefListBatch { + return &RefListBatch{ + batch: batch, + alreadyWritten: NewRefListDigestSet(), + } +} + +type reflistUpdateContext struct { + rb *RefListBatch + stats *RefListMigrationStats +} + +func clearSegmentRefs(reflist *PackageRefList, recommendedMaxKVSize int) { + avgRefsInSegment := recommendedMaxKVSize / 70 + reflist.Refs = make([][]byte, 0, avgRefsInSegment) +} + +func flushSegmentRefs(uctx *reflistUpdateContext, prefix []byte, segment int, reflist *PackageRefList) error { + encoded := reflist.Encode() + err := uctx.rb.batch.Put(segmentIndexKey(prefix, segment), encoded) + if err == nil && uctx.stats != nil { + uctx.stats.Segments++ + } + return err +} + +func (collection *RefListCollection) updateWithContext(sl *SplitRefList, key []byte, uctx *reflistUpdateContext) error { + if sl != nil { + recommendedMaxKVSize := collection.db.GetRecommendedMaxKVSize() + + for idx, digest := range sl.Buckets { + if len(digest) == 0 { + continue + } + + if uctx.rb.alreadyWritten.Has(digest) { + continue + } + + prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) + if collection.db.HasPrefix(prefix) { + continue + } + + // All the sizing information taken from the msgpack spec: + // https://github.com/msgpack/msgpack/blob/master/spec.md + + // Assume that a segment will have [16,2^16) elements, which would + // fit into an array 16 and thus have 3 bytes of overhead. + // (A database would need a massive recommendedMaxKVSize to pass + // that limit.) + size := len(segmentIndexKey(prefix, 0)) + 3 + segment := 0 + + var reflist PackageRefList + clearSegmentRefs(&reflist, recommendedMaxKVSize) + for _, ref := range sl.bucketRefs[idx].Refs { + // In order to determine the size of the ref in the database, + // we need to know how much overhead will be added with by msgpack + // encoding. + requiredSize := len(ref) + if requiredSize < 1<<5 { + requiredSize++ + } else if requiredSize < 1<<8 { + requiredSize += 2 + } else if requiredSize < 1<<16 { + requiredSize += 3 + } else { + requiredSize += 4 + } + if size+requiredSize > recommendedMaxKVSize { + if err := flushSegmentRefs(uctx, prefix, segment, &reflist); err != nil { + return err + } + clearSegmentRefs(&reflist, recommendedMaxKVSize) + segment++ + } + + reflist.Refs = append(reflist.Refs, ref) + size += requiredSize + } + + if len(reflist.Refs) > 0 { + if err := flushSegmentRefs(uctx, prefix, segment, &reflist); err != nil { + return err + } + } + + uctx.rb.alreadyWritten.Add(digest) + if uctx.stats != nil { + uctx.stats.Buckets++ + } + } + } + + var buf bytes.Buffer + encoder := codec.NewEncoder(&buf, &codec.MsgpackHandle{}) + encoder.Encode(sl) + err := uctx.rb.batch.Put(key, buf.Bytes()) + if err == nil && uctx.stats != nil { + uctx.stats.Reflists++ + } + return err +} + +// UpdateInBatch will save or update the SplitRefList at key, as well as save the buckets inside, +// as part of the given batch +func (collection *RefListCollection) UpdateInBatch(sl *SplitRefList, key []byte, batch *RefListBatch) error { + return collection.updateWithContext(sl, key, &reflistUpdateContext{rb: batch}) +} + +// Update will save or update the SplitRefList at key, as well as save the buckets inside +func (collection *RefListCollection) Update(sl *SplitRefList, key []byte) error { + rb := collection.NewBatch(collection.db.CreateBatch()) + err := collection.UpdateInBatch(sl, key, rb) + if err == nil { + err = rb.batch.Write() + } + return err +} + +// RefListMigrationStats counts a number of reflists, buckets, and segments +type RefListMigrationStats struct { + Reflists, Buckets, Segments int +} + +// RefListMigration wraps a RefListBatch for the purpose of migrating inline format +// reflists to split reflists +// +// Once the batch gets too large, it will automatically be flushed to the database, +// and a new batch will be created in its place. +type RefListMigration struct { + rb *RefListBatch + + dryRun bool + + // current number of reflists/buckets/segments queued in the current, unwritten batch + batchStats RefListMigrationStats + flushStats RefListMigrationStats +} + +// NewMigration creates an empty RefListMigration +func (collection *RefListCollection) NewMigration() *RefListMigration { + return &RefListMigration{} +} + +// NewMigrationDryRun creates an empty RefListMigration that will track the +// changes to make as usual but avoid actually writing to the db +func (collection *RefListCollection) NewMigrationDryRun() *RefListMigration { + return &RefListMigration{dryRun: true} +} + +// Stats returns statistics on the written values in the current migration +func (migration *RefListMigration) Stats() RefListMigrationStats { + return migration.flushStats +} + +// Flush will flush the current batch in the migration to the database +func (migration *RefListMigration) Flush() error { + if migration.batchStats.Segments > 0 { + if !migration.dryRun { + if err := migration.rb.batch.Write(); err != nil { + return err + } + + // It's important that we don't clear the batch on dry runs, because + // the batch is what contains the list of already-written buckets. + // If we're not writing to the database, and we clear that list, + // duplicate "writes" will occur. + migration.rb = nil + } + + migration.flushStats.Reflists += migration.batchStats.Reflists + migration.flushStats.Buckets += migration.batchStats.Buckets + migration.flushStats.Segments += migration.batchStats.Segments + migration.batchStats = RefListMigrationStats{} + } + + return nil +} + +// LoadCompleteAndMigrate will load the reflist and its buckets as RefListCollection.LoadComplete, +// migrating any inline reflists to split ones along the way +func (collection *RefListCollection) LoadCompleteAndMigrate(sl *SplitRefList, key []byte, migration *RefListMigration) error { + // Given enough reflists, the memory used by a batch starts to become massive, so + // make sure to flush the written segments periodically. Note that this is only + // checked *after* a migration of a full bucket (and all the segments inside) + // takes place, as splitting a single bucket write into multiple batches would + // be unsafe if an interruption occurs midway. + const maxMigratorBatch = 50000 + + format, err := collection.load(sl, key) + if err != nil { + return err + } + + switch format { + case reflistStorageFormatInline: + if migration.rb == nil { + migration.rb = collection.NewBatch(collection.db.CreateBatch()) + } + + collection.updateWithContext(sl, key, &reflistUpdateContext{ + rb: migration.rb, + stats: &migration.batchStats, + }) + + if migration.batchStats.Segments > maxMigratorBatch { + if err := migration.Flush(); err != nil { + return err + } + } + + return nil + case reflistStorageFormatSplit: + return collection.loadBuckets(sl) + default: + panic(fmt.Sprintf("unexpected format %v", format)) + } +} + +// AnyRefList is implemented by both PackageRefList and SplitRefList +type AnyRefList interface { + Has(p *Package) bool + Len() int + ForEach(handler func([]byte) error) error + FilterLatestRefs() +} + +// Check interface +var ( + _ AnyRefList = (*PackageRefList)(nil) + _ AnyRefList = (*SplitRefList)(nil) +) diff --git a/deb/reflist_bench_test.go b/deb/reflist_bench_test.go index b377574ce4..f81a84d0ae 100644 --- a/deb/reflist_bench_test.go +++ b/deb/reflist_bench_test.go @@ -45,3 +45,41 @@ func BenchmarkReflistDecode(b *testing.B) { (&PackageRefList{}).Decode(data) } } + +func BenchmarkSplitRefListCreationSmall(b *testing.B) { + const count = 400 + + l := NewPackageRefList() + + for i := 0; i < count; i++ { + l.Refs = append(l.Refs, []byte(fmt.Sprintf("Pamd64 %x %d", i, i))) + } + + sort.Sort(l) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for j := 0; j < 8; j++ { + NewSplitRefListFromRefList(l) + } + } +} + +func BenchmarkSplitRefListCreationLarge(b *testing.B) { + const count = 4096 + + l := NewPackageRefList() + + for i := 0; i < count; i++ { + l.Refs = append(l.Refs, []byte(fmt.Sprintf("Pamd64 %x %d", i, i))) + } + + sort.Sort(l) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for j := 0; j < 8; j++ { + NewSplitRefListFromRefList(l) + } + } +} diff --git a/deb/reflist_test.go b/deb/reflist_test.go index bcabec3c21..0582168488 100644 --- a/deb/reflist_test.go +++ b/deb/reflist_test.go @@ -1,7 +1,10 @@ package deb import ( + "bytes" + "encoding/hex" "errors" + "fmt" "github.com/aptly-dev/aptly/database/goleveldb" @@ -9,24 +12,83 @@ import ( ) type PackageRefListSuite struct { - // Simple list with "real" packages from stanzas - list *PackageList p1, p2, p3, p4, p5, p6 *Package } var _ = Suite(&PackageRefListSuite{}) -func toStrSlice(reflist *PackageRefList) (result []string) { +func verifyRefListIntegrity(c *C, rl AnyRefList) AnyRefList { + if rl, ok := rl.(*SplitRefList); ok { + for idx, bucket := range rl.bucketRefs { + if bucket == nil { + bucket = NewPackageRefList() + } + c.Check(rl.Buckets[idx], DeepEquals, reflistDigest(bucket)) + } + } + + return rl +} + +func getRefs(rl AnyRefList) (refs [][]byte) { + switch rl := rl.(type) { + case *PackageRefList: + refs = rl.Refs + case *SplitRefList: + refs = rl.Flatten().Refs + default: + panic(fmt.Sprintf("unexpected reflist type %t", rl)) + } + + // Hack so that passing getRefs-returned slices to DeepEquals won't fail given a nil + // slice and an empty slice. + if len(refs) == 0 { + refs = nil + } + return +} + +func toStrSlice(reflist AnyRefList) (result []string) { result = make([]string, reflist.Len()) - for i, r := range reflist.Refs { + for i, r := range getRefs(reflist) { result[i] = string(r) } return } -func (s *PackageRefListSuite) SetUpTest(c *C) { - s.list = NewPackageList() +type reflistFactory struct { + new func() AnyRefList + newFromRefs func(refs ...[]byte) AnyRefList + newFromPackageList func(list *PackageList) AnyRefList +} + +func forEachRefList(test func(f reflistFactory)) { + test(reflistFactory{ + new: func() AnyRefList { + return NewPackageRefList() + }, + newFromRefs: func(refs ...[]byte) AnyRefList { + return &PackageRefList{Refs: refs} + }, + newFromPackageList: func(list *PackageList) AnyRefList { + return NewPackageRefListFromPackageList(list) + }, + }) + test(reflistFactory{ + new: func() AnyRefList { + return NewSplitRefList() + }, + newFromRefs: func(refs ...[]byte) AnyRefList { + return NewSplitRefListFromRefList(&PackageRefList{Refs: refs}) + }, + newFromPackageList: func(list *PackageList) AnyRefList { + return NewSplitRefListFromPackageList(list) + }, + }) +} + +func (s *PackageRefListSuite) SetUpTest(c *C) { s.p1 = NewPackageFromControlFile(packageStanza.Copy()) s.p2 = NewPackageFromControlFile(packageStanza.Copy()) stanza := packageStanza.Copy() @@ -44,346 +106,600 @@ func (s *PackageRefListSuite) SetUpTest(c *C) { } func (s *PackageRefListSuite) TestNewPackageListFromRefList(c *C) { - db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) - coll.Update(s.p1) - coll.Update(s.p3) + forEachRefList(func(f reflistFactory) { + list := NewPackageList() - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) + coll.Update(s.p1) + coll.Update(s.p3) - reflist := NewPackageRefListFromPackageList(s.list) + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) - _, err := NewPackageListFromRefList(reflist, coll, nil) - c.Assert(err, ErrorMatches, "unable to load package with key.*") + reflist := f.newFromPackageList(list) - coll.Update(s.p5) - coll.Update(s.p6) + _, err := NewPackageListFromRefList(reflist, coll, nil) + c.Assert(err, ErrorMatches, "unable to load package with key.*") - list, err := NewPackageListFromRefList(reflist, coll, nil) - c.Assert(err, IsNil) - c.Check(list.Len(), Equals, 4) - c.Check(list.Add(s.p4), ErrorMatches, "package already exists and is different: .*") + coll.Update(s.p5) + coll.Update(s.p6) - list, err = NewPackageListFromRefList(nil, coll, nil) - c.Assert(err, IsNil) - c.Check(list.Len(), Equals, 0) + list, err = NewPackageListFromRefList(reflist, coll, nil) + c.Assert(err, IsNil) + c.Check(list.Len(), Equals, 4) + c.Check(list.Add(s.p4), ErrorMatches, "package already exists and is different: .*") + + list, err = NewPackageListFromRefList(nil, coll, nil) + c.Assert(err, IsNil) + c.Check(list.Len(), Equals, 0) + }) } func (s *PackageRefListSuite) TestNewPackageRefList(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) - - reflist := NewPackageRefListFromPackageList(s.list) - c.Assert(reflist.Len(), Equals, 4) - c.Check(reflist.Refs[0], DeepEquals, []byte(s.p1.Key(""))) - c.Check(reflist.Refs[1], DeepEquals, []byte(s.p6.Key(""))) - c.Check(reflist.Refs[2], DeepEquals, []byte(s.p5.Key(""))) - c.Check(reflist.Refs[3], DeepEquals, []byte(s.p3.Key(""))) - - reflist = NewPackageRefList() - c.Check(reflist.Len(), Equals, 0) + forEachRefList(func(f reflistFactory) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) + + reflist := f.newFromPackageList(list) + verifyRefListIntegrity(c, reflist) + c.Assert(reflist.Len(), Equals, 4) + refs := getRefs(reflist) + c.Check(refs[0], DeepEquals, []byte(s.p1.Key(""))) + c.Check(refs[1], DeepEquals, []byte(s.p6.Key(""))) + c.Check(refs[2], DeepEquals, []byte(s.p5.Key(""))) + c.Check(refs[3], DeepEquals, []byte(s.p3.Key(""))) + + reflist = f.new() + c.Check(reflist.Len(), Equals, 0) + }) } -func (s *PackageRefListSuite) TestPackageRefListEncodeDecode(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) +func (s *PackageRefListSuite) TestPackageRefListForeach(c *C) { + forEachRefList(func(f reflistFactory) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) - reflist := NewPackageRefListFromPackageList(s.list) + reflist := f.newFromPackageList(list) - reflist2 := &PackageRefList{} - err := reflist2.Decode(reflist.Encode()) - c.Assert(err, IsNil) - c.Check(reflist2.Len(), Equals, reflist.Len()) - c.Check(reflist2.Refs, DeepEquals, reflist.Refs) -} + Len := 0 + err := reflist.ForEach(func([]byte) error { + Len++ + return nil + }) -func (s *PackageRefListSuite) TestPackageRefListForeach(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) + c.Check(Len, Equals, 4) + c.Check(err, IsNil) - reflist := NewPackageRefListFromPackageList(s.list) + e := errors.New("b") - Len := 0 - err := reflist.ForEach(func([]byte) error { - Len++ - return nil - }) - - c.Check(Len, Equals, 4) - c.Check(err, IsNil) + err = reflist.ForEach(func([]byte) error { + return e + }) - e := errors.New("b") + c.Check(err, Equals, e) + }) +} - err = reflist.ForEach(func([]byte) error { - return e +func (s *PackageRefListSuite) TestHas(c *C) { + forEachRefList(func(f reflistFactory) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + reflist := f.newFromPackageList(list) + + c.Check(reflist.Has(s.p1), Equals, true) + c.Check(reflist.Has(s.p3), Equals, true) + c.Check(reflist.Has(s.p5), Equals, true) + c.Check(reflist.Has(s.p2), Equals, true) + c.Check(reflist.Has(s.p6), Equals, false) }) +} - c.Check(err, Equals, e) +func subtractRefLists(l, r AnyRefList) AnyRefList { + switch l := l.(type) { + case *PackageRefList: + return l.Subtract(r.(*PackageRefList)) + case *SplitRefList: + return l.Subtract(r.(*SplitRefList)) + default: + panic(fmt.Sprintf("unexpected reflist type %t", l)) + } } -func (s *PackageRefListSuite) TestHas(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - reflist := NewPackageRefListFromPackageList(s.list) - - c.Check(reflist.Has(s.p1), Equals, true) - c.Check(reflist.Has(s.p3), Equals, true) - c.Check(reflist.Has(s.p5), Equals, true) - c.Check(reflist.Has(s.p2), Equals, true) - c.Check(reflist.Has(s.p6), Equals, false) +func (s *PackageRefListSuite) TestSubtract(c *C) { + forEachRefList(func(f reflistFactory) { + r1 := []byte("Pall r1") + r2 := []byte("Pall r2") + r3 := []byte("Pall r3") + r4 := []byte("Pall r4") + r5 := []byte("Pall r5") + + empty := f.newFromRefs() + l1 := f.newFromRefs(r1, r2, r3, r4) + l2 := f.newFromRefs(r1, r3) + l3 := f.newFromRefs(r2, r4) + l4 := f.newFromRefs(r4, r5) + l5 := f.newFromRefs(r1, r2, r3) + + c.Check(getRefs(subtractRefLists(l1, empty)), DeepEquals, getRefs(l1)) + c.Check(getRefs(subtractRefLists(l1, l2)), DeepEquals, getRefs(l3)) + c.Check(getRefs(subtractRefLists(l1, l3)), DeepEquals, getRefs(l2)) + c.Check(getRefs(subtractRefLists(l1, l4)), DeepEquals, getRefs(l5)) + c.Check(getRefs(subtractRefLists(empty, l1)), DeepEquals, getRefs(empty)) + c.Check(getRefs(subtractRefLists(l2, l3)), DeepEquals, getRefs(l2)) + }) } -func (s *PackageRefListSuite) TestSubstract(c *C) { - r1 := []byte("r1") - r2 := []byte("r2") - r3 := []byte("r3") - r4 := []byte("r4") - r5 := []byte("r5") - - empty := &PackageRefList{Refs: [][]byte{}} - l1 := &PackageRefList{Refs: [][]byte{r1, r2, r3, r4}} - l2 := &PackageRefList{Refs: [][]byte{r1, r3}} - l3 := &PackageRefList{Refs: [][]byte{r2, r4}} - l4 := &PackageRefList{Refs: [][]byte{r4, r5}} - l5 := &PackageRefList{Refs: [][]byte{r1, r2, r3}} - - c.Check(l1.Subtract(empty), DeepEquals, l1) - c.Check(l1.Subtract(l2), DeepEquals, l3) - c.Check(l1.Subtract(l3), DeepEquals, l2) - c.Check(l1.Subtract(l4), DeepEquals, l5) - c.Check(empty.Subtract(l1), DeepEquals, empty) - c.Check(l2.Subtract(l3), DeepEquals, l2) +func diffRefLists(l, r AnyRefList, packageCollection *PackageCollection) (PackageDiffs, error) { + switch l := l.(type) { + case *PackageRefList: + return l.Diff(r.(*PackageRefList), packageCollection, nil) + case *SplitRefList: + return l.Diff(r.(*SplitRefList), packageCollection, nil) + default: + panic(fmt.Sprintf("unexpected reflist type %t", l)) + } } func (s *PackageRefListSuite) TestDiff(c *C) { - db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) - - packages := []*Package{ - {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 - {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 - {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 - {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 - {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 - {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 - {Name: "xyz", Version: "3.0", Architecture: "sparc"}, //6 - } + forEachRefList(func(f reflistFactory) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) + + packages := []*Package{ + {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 + {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 + {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 + {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 + {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 + {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 + {Name: "xyz", Version: "3.0", Architecture: "sparc"}, //6 + } + + for _, p := range packages { + coll.Update(p) + } + + listA := NewPackageList() + listA.Add(packages[0]) + listA.Add(packages[1]) + listA.Add(packages[2]) + listA.Add(packages[3]) + listA.Add(packages[6]) + + listB := NewPackageList() + listB.Add(packages[0]) + listB.Add(packages[2]) + listB.Add(packages[4]) + listB.Add(packages[5]) + + reflistA := f.newFromPackageList(listA) + reflistB := f.newFromPackageList(listB) + + diffAA, err := diffRefLists(reflistA, reflistA, coll) + c.Check(err, IsNil) + c.Check(diffAA, HasLen, 0) + + diffAB, err := diffRefLists(reflistA, reflistB, coll) + c.Check(err, IsNil) + c.Check(diffAB, HasLen, 4) + + c.Check(diffAB[0].Left, IsNil) + c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + + c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") + c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") + + c.Check(diffAB[2].Left.String(), Equals, "dpkg_1.7_i386") + c.Check(diffAB[2].Right, IsNil) + + c.Check(diffAB[3].Left.String(), Equals, "xyz_3.0_sparc") + c.Check(diffAB[3].Right, IsNil) + + diffBA, err := diffRefLists(reflistB, reflistA, coll) + c.Check(err, IsNil) + c.Check(diffBA, HasLen, 4) + + c.Check(diffBA[0].Right, IsNil) + c.Check(diffBA[0].Left.String(), Equals, "app_1.1~bp2_amd64") + + c.Check(diffBA[1].Right.String(), Equals, "app_1.1~bp1_i386") + c.Check(diffBA[1].Left.String(), Equals, "app_1.1~bp2_i386") + + c.Check(diffBA[2].Right.String(), Equals, "dpkg_1.7_i386") + c.Check(diffBA[2].Left, IsNil) + + c.Check(diffBA[3].Right.String(), Equals, "xyz_3.0_sparc") + c.Check(diffBA[3].Left, IsNil) + }) +} - for _, p := range packages { - coll.Update(p) - } +func (s *PackageRefListSuite) TestDiffCompactsAtEnd(c *C) { + forEachRefList(func(f reflistFactory) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) - listA := NewPackageList() - listA.Add(packages[0]) - listA.Add(packages[1]) - listA.Add(packages[2]) - listA.Add(packages[3]) - listA.Add(packages[6]) + packages := []*Package{ + {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //0 + {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //1 + {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //2 + } - listB := NewPackageList() - listB.Add(packages[0]) - listB.Add(packages[2]) - listB.Add(packages[4]) - listB.Add(packages[5]) + for _, p := range packages { + coll.Update(p) + } - reflistA := NewPackageRefListFromPackageList(listA) - reflistB := NewPackageRefListFromPackageList(listB) + listA := NewPackageList() + listA.Add(packages[0]) - diffAA, err := reflistA.Diff(reflistA, coll) - c.Check(err, IsNil) - c.Check(diffAA, HasLen, 0) + listB := NewPackageList() + listB.Add(packages[1]) + listB.Add(packages[2]) - diffAB, err := reflistA.Diff(reflistB, coll) - c.Check(err, IsNil) - c.Check(diffAB, HasLen, 4) + reflistA := f.newFromPackageList(listA) + reflistB := f.newFromPackageList(listB) - c.Check(diffAB[0].Left, IsNil) - c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + diffAB, err := diffRefLists(reflistA, reflistB, coll) + c.Check(err, IsNil) + c.Check(diffAB, HasLen, 2) + + c.Check(diffAB[0].Left, IsNil) + c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + + c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") + c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") + }) +} - c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") - c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") +func mergeRefLists(l, r AnyRefList, overrideMatching, ignoreConflicting bool) AnyRefList { + switch l := l.(type) { + case *PackageRefList: + return l.Merge(r.(*PackageRefList), overrideMatching, ignoreConflicting) + case *SplitRefList: + return l.Merge(r.(*SplitRefList), overrideMatching, ignoreConflicting) + default: + panic(fmt.Sprintf("unexpected reflist type %t", l)) + } +} - c.Check(diffAB[2].Left.String(), Equals, "dpkg_1.7_i386") - c.Check(diffAB[2].Right, IsNil) +func (s *PackageRefListSuite) TestMerge(c *C) { + forEachRefList(func(f reflistFactory) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) + + packages := []*Package{ + {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 + {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 + {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 + {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 + {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 + {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 + {Name: "dpkg", Version: "1.0", Architecture: "i386"}, //6 + {Name: "xyz", Version: "1.0", Architecture: "sparc"}, //7 + {Name: "dpkg", Version: "1.0", Architecture: "i386", FilesHash: 0x34445}, //8 + {Name: "app", Version: "1.1~bp2", Architecture: "i386", FilesHash: 0x44}, //9 + } + + for _, p := range packages { + p.V06Plus = true + coll.Update(p) + } + + listA := NewPackageList() + listA.Add(packages[0]) + listA.Add(packages[1]) + listA.Add(packages[2]) + listA.Add(packages[3]) + listA.Add(packages[7]) + + listB := NewPackageList() + listB.Add(packages[0]) + listB.Add(packages[2]) + listB.Add(packages[4]) + listB.Add(packages[5]) + listB.Add(packages[6]) + + listC := NewPackageList() + listC.Add(packages[0]) + listC.Add(packages[8]) + listC.Add(packages[9]) + + reflistA := f.newFromPackageList(listA) + reflistB := f.newFromPackageList(listB) + reflistC := f.newFromPackageList(listC) + + mergeAB := mergeRefLists(reflistA, reflistB, true, false) + mergeBA := mergeRefLists(reflistB, reflistA, true, false) + mergeAC := mergeRefLists(reflistA, reflistC, true, false) + mergeBC := mergeRefLists(reflistB, reflistC, true, false) + mergeCB := mergeRefLists(reflistC, reflistB, true, false) + + verifyRefListIntegrity(c, mergeAB) + verifyRefListIntegrity(c, mergeBA) + verifyRefListIntegrity(c, mergeAC) + verifyRefListIntegrity(c, mergeBC) + verifyRefListIntegrity(c, mergeCB) + + c.Check(toStrSlice(mergeAB), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeBA), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeAC), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeBC), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) + c.Check(toStrSlice(mergeCB), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000"}) + + mergeABall := mergeRefLists(reflistA, reflistB, false, false) + mergeBAall := mergeRefLists(reflistB, reflistA, false, false) + mergeACall := mergeRefLists(reflistA, reflistC, false, false) + mergeBCall := mergeRefLists(reflistB, reflistC, false, false) + mergeCBall := mergeRefLists(reflistC, reflistB, false, false) + + verifyRefListIntegrity(c, mergeABall) + verifyRefListIntegrity(c, mergeBAall) + verifyRefListIntegrity(c, mergeACall) + verifyRefListIntegrity(c, mergeBCall) + verifyRefListIntegrity(c, mergeCBall) + + c.Check(mergeABall, DeepEquals, mergeBAall) + c.Check(toStrSlice(mergeBAall), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000000", + "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + + c.Check(mergeBCall, Not(DeepEquals), mergeCBall) + c.Check(toStrSlice(mergeACall), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", + "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeBCall), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", + "Pi386 lib 1.0 00000000"}) + + mergeBCwithConflicts := mergeRefLists(reflistB, reflistC, false, true) + c.Check(toStrSlice(mergeBCwithConflicts), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", + "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) + }) +} - c.Check(diffAB[3].Left.String(), Equals, "xyz_3.0_sparc") - c.Check(diffAB[3].Right, IsNil) +func (s *PackageRefListSuite) TestFilterLatestRefs(c *C) { + forEachRefList(func(f reflistFactory) { + packages := []*Package{ + {Name: "lib", Version: "1.0", Architecture: "i386"}, + {Name: "lib", Version: "1.2~bp1", Architecture: "i386"}, + {Name: "lib", Version: "1.2", Architecture: "i386"}, + {Name: "dpkg", Version: "1.2", Architecture: "i386"}, + {Name: "dpkg", Version: "1.3", Architecture: "i386"}, + {Name: "dpkg", Version: "1.3~bp2", Architecture: "i386"}, + {Name: "dpkg", Version: "1.5", Architecture: "i386"}, + {Name: "dpkg", Version: "1.6", Architecture: "i386"}, + } + + rl := NewPackageList() + rl.Add(packages[0]) + rl.Add(packages[1]) + rl.Add(packages[2]) + rl.Add(packages[3]) + rl.Add(packages[4]) + rl.Add(packages[5]) + rl.Add(packages[6]) + rl.Add(packages[7]) + + result := f.newFromPackageList(rl) + result.FilterLatestRefs() + + verifyRefListIntegrity(c, result) + c.Check(toStrSlice(result), DeepEquals, + []string{"Pi386 dpkg 1.6", "Pi386 lib 1.2"}) + }) +} - diffBA, err := reflistB.Diff(reflistA, coll) - c.Check(err, IsNil) - c.Check(diffBA, HasLen, 4) +func (s *PackageRefListSuite) TestPackageRefListEncodeDecode(c *C) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) - c.Check(diffBA[0].Right, IsNil) - c.Check(diffBA[0].Left.String(), Equals, "app_1.1~bp2_amd64") + reflist := NewPackageRefListFromPackageList(list) - c.Check(diffBA[1].Right.String(), Equals, "app_1.1~bp1_i386") - c.Check(diffBA[1].Left.String(), Equals, "app_1.1~bp2_i386") + reflist2 := &PackageRefList{} + err := reflist2.Decode(reflist.Encode()) + c.Assert(err, IsNil) + c.Check(reflist2.Len(), Equals, reflist.Len()) + c.Check(reflist2.Refs, DeepEquals, reflist.Refs) +} - c.Check(diffBA[2].Right.String(), Equals, "dpkg_1.7_i386") - c.Check(diffBA[2].Left, IsNil) +func (s *PackageRefListSuite) TestRefListBucketPrefix(c *C) { + c.Check(bucketRefPrefix([]byte("Pall abcd 1.0")), DeepEquals, []byte("abc")) + c.Check(bucketRefPrefix([]byte("Pall libabcd 1.0")), DeepEquals, []byte("abc")) + c.Check(bucketRefPrefix([]byte("Pamd64 xy 1.0")), DeepEquals, []byte("xy")) +} - c.Check(diffBA[3].Right.String(), Equals, "xyz_3.0_sparc") - c.Check(diffBA[3].Left, IsNil) +func (s *PackageRefListSuite) TestRefListBucketIdx(c *C) { + c.Check(bucketIdxForRef(s.p1.Key("")), Equals, 46) + c.Check(bucketIdxForRef(s.p2.Key("")), Equals, 46) + c.Check(bucketIdxForRef(s.p3.Key("")), Equals, 26) + c.Check(bucketIdxForRef(s.p4.Key("")), Equals, 46) + c.Check(bucketIdxForRef(s.p5.Key("")), Equals, 4) + c.Check(bucketIdxForRef(s.p6.Key("")), Equals, 46) +} +func (s *PackageRefListSuite) TestSplitRefListBuckets(c *C) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + list.Add(s.p6) + + sl := NewSplitRefListFromPackageList(list) + verifyRefListIntegrity(c, sl) + + c.Check(hex.EncodeToString(sl.Buckets[4]), Equals, "7287aed32daad5d1aab4e89533bde135381d932e60548cfc00b882ca8858ae07") + c.Check(toStrSlice(sl.bucketRefs[4]), DeepEquals, []string{string(s.p5.Key(""))}) + c.Check(hex.EncodeToString(sl.Buckets[26]), Equals, "f31fc28e82368b63c8be47eefc64b8e217e2e5349c7e3827b98f80536b956f6e") + c.Check(toStrSlice(sl.bucketRefs[26]), DeepEquals, []string{string(s.p3.Key(""))}) + c.Check(hex.EncodeToString(sl.Buckets[46]), Equals, "55e70286393afc5da5046d68c632d35f98bec24781ae433bd1a1069b52853367") + c.Check(toStrSlice(sl.bucketRefs[46]), DeepEquals, []string{string(s.p1.Key("")), string(s.p6.Key(""))}) } -func (s *PackageRefListSuite) TestDiffCompactsAtEnd(c *C) { - db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) +func (s *PackageRefListSuite) TestRefListDigestSet(c *C) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + list.Add(s.p6) - packages := []*Package{ - {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //0 - {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //1 - {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //2 - } + sl := NewSplitRefListFromPackageList(list) - for _, p := range packages { - coll.Update(p) - } + set := NewRefListDigestSet() + c.Check(set.Len(), Equals, 0) - listA := NewPackageList() - listA.Add(packages[0]) + err := sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set.Has(digest), Equals, false) + return nil + }) + c.Assert(err, IsNil) - listB := NewPackageList() - listB.Add(packages[1]) - listB.Add(packages[2]) + set.AddAllInRefList(sl) + c.Check(set.Len(), Equals, 3) - reflistA := NewPackageRefListFromPackageList(listA) - reflistB := NewPackageRefListFromPackageList(listB) + err = sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set.Has(digest), Equals, true) + return nil + }) + c.Assert(err, IsNil) - diffAB, err := reflistA.Diff(reflistB, coll) - c.Check(err, IsNil) - c.Check(diffAB, HasLen, 2) + firstDigest := sl.Buckets[bucketIdxForRef(s.p1.Key(""))] + set.Remove(firstDigest) + c.Check(set.Len(), Equals, 2) - c.Check(diffAB[0].Left, IsNil) - c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + err = sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set.Has(digest), Equals, !bytes.Equal(digest, firstDigest)) + return nil + }) + c.Assert(err, IsNil) - c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") - c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") + set2 := NewRefListDigestSet() + set2.AddAllInRefList(sl) + set2.RemoveAll(set) + + err = sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set2.Has(digest), Equals, bytes.Equal(digest, firstDigest)) + return nil + }) + c.Assert(err, IsNil) } -func (s *PackageRefListSuite) TestMerge(c *C) { +func (s *PackageRefListSuite) TestRefListCollectionLoadSave(c *C) { db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) - - packages := []*Package{ - {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 - {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 - {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 - {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 - {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 - {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 - {Name: "dpkg", Version: "1.0", Architecture: "i386"}, //6 - {Name: "xyz", Version: "1.0", Architecture: "sparc"}, //7 - {Name: "dpkg", Version: "1.0", Architecture: "i386", FilesHash: 0x34445}, //8 - {Name: "app", Version: "1.1~bp2", Architecture: "i386", FilesHash: 0x44}, //9 - } + reflistCollection := NewRefListCollection(db) + packageCollection := NewPackageCollection(db) + + packageCollection.Update(s.p1) + packageCollection.Update(s.p2) + packageCollection.Update(s.p3) + packageCollection.Update(s.p4) + packageCollection.Update(s.p5) + packageCollection.Update(s.p6) + + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p2) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + + key := []byte("test") + + reflist := NewPackageRefListFromPackageList(list) + db.Put(key, reflist.Encode()) + + sl := NewSplitRefList() + err := reflistCollection.LoadComplete(sl, key) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(reflist)) - for _, p := range packages { - p.V06Plus = true - coll.Update(p) - } + list.Add(s.p6) + sl = NewSplitRefListFromPackageList(list) + err = reflistCollection.Update(sl, key) + c.Assert(err, IsNil) - listA := NewPackageList() - listA.Add(packages[0]) - listA.Add(packages[1]) - listA.Add(packages[2]) - listA.Add(packages[3]) - listA.Add(packages[7]) - - listB := NewPackageList() - listB.Add(packages[0]) - listB.Add(packages[2]) - listB.Add(packages[4]) - listB.Add(packages[5]) - listB.Add(packages[6]) - - listC := NewPackageList() - listC.Add(packages[0]) - listC.Add(packages[8]) - listC.Add(packages[9]) - - reflistA := NewPackageRefListFromPackageList(listA) - reflistB := NewPackageRefListFromPackageList(listB) - reflistC := NewPackageRefListFromPackageList(listC) - - mergeAB := reflistA.Merge(reflistB, true, false) - mergeBA := reflistB.Merge(reflistA, true, false) - mergeAC := reflistA.Merge(reflistC, true, false) - mergeBC := reflistB.Merge(reflistC, true, false) - mergeCB := reflistC.Merge(reflistB, true, false) - - c.Check(toStrSlice(mergeAB), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeBA), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeAC), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeBC), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) - c.Check(toStrSlice(mergeCB), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000"}) - - mergeABall := reflistA.Merge(reflistB, false, false) - mergeBAall := reflistB.Merge(reflistA, false, false) - mergeACall := reflistA.Merge(reflistC, false, false) - mergeBCall := reflistB.Merge(reflistC, false, false) - mergeCBall := reflistC.Merge(reflistB, false, false) - - c.Check(mergeABall, DeepEquals, mergeBAall) - c.Check(toStrSlice(mergeBAall), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000000", - "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - - c.Check(mergeBCall, Not(DeepEquals), mergeCBall) - c.Check(toStrSlice(mergeACall), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", - "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeBCall), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", - "Pi386 lib 1.0 00000000"}) - - mergeBCwithConflicts := reflistB.Merge(reflistC, false, true) - c.Check(toStrSlice(mergeBCwithConflicts), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", - "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) + sl = NewSplitRefList() + err = reflistCollection.LoadComplete(sl, key) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(NewPackageRefListFromPackageList(list))) } -func (s *PackageRefListSuite) TestFilterLatestRefs(c *C) { - packages := []*Package{ - {Name: "lib", Version: "1.0", Architecture: "i386"}, - {Name: "lib", Version: "1.2~bp1", Architecture: "i386"}, - {Name: "lib", Version: "1.2", Architecture: "i386"}, - {Name: "dpkg", Version: "1.2", Architecture: "i386"}, - {Name: "dpkg", Version: "1.3", Architecture: "i386"}, - {Name: "dpkg", Version: "1.3~bp2", Architecture: "i386"}, - {Name: "dpkg", Version: "1.5", Architecture: "i386"}, - {Name: "dpkg", Version: "1.6", Architecture: "i386"}, - } +func (s *PackageRefListSuite) TestRefListCollectionMigrate(c *C) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + reflistCollection := NewRefListCollection(db) + packageCollection := NewPackageCollection(db) + + packageCollection.Update(s.p1) + packageCollection.Update(s.p2) + packageCollection.Update(s.p3) + packageCollection.Update(s.p4) + packageCollection.Update(s.p5) + packageCollection.Update(s.p6) + + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p2) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + + key := []byte("test") + + reflist := NewPackageRefListFromPackageList(list) + db.Put(key, reflist.Encode()) + + sl := NewSplitRefList() + format, err := reflistCollection.load(sl, key) + c.Assert(err, IsNil) + c.Check(format, Equals, reflistStorageFormatInline) + + migrator := reflistCollection.NewMigration() + err = reflistCollection.LoadCompleteAndMigrate(sl, key, migrator) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(NewPackageRefListFromPackageList(list))) - rl := NewPackageList() - rl.Add(packages[0]) - rl.Add(packages[1]) - rl.Add(packages[2]) - rl.Add(packages[3]) - rl.Add(packages[4]) - rl.Add(packages[5]) - rl.Add(packages[6]) - rl.Add(packages[7]) - - result := NewPackageRefListFromPackageList(rl) - result.FilterLatestRefs() - - c.Check(toStrSlice(result), DeepEquals, - []string{"Pi386 dpkg 1.6", "Pi386 lib 1.2"}) + stats := migrator.Stats() + c.Check(stats.Reflists, Equals, 0) + c.Check(stats.Buckets, Equals, 0) + c.Check(stats.Segments, Equals, 0) + + err = migrator.Flush() + c.Assert(err, IsNil) + stats = migrator.Stats() + c.Check(stats.Reflists, Equals, 1) + c.Check(stats.Buckets, Not(Equals), 0) + c.Check(stats.Segments, Equals, stats.Segments) + + sl = NewSplitRefList() + err = reflistCollection.LoadComplete(sl, key) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(NewPackageRefListFromPackageList(list))) + + format, err = reflistCollection.load(sl, key) + c.Assert(err, IsNil) + c.Check(format, Equals, reflistStorageFormatSplit) } diff --git a/deb/remote.go b/deb/remote.go index b10af86550..7dc5eee31b 100644 --- a/deb/remote.go +++ b/deb/remote.go @@ -73,7 +73,7 @@ type RemoteRepo struct { // Packages for json output Packages []string `codec:"-" json:",omitempty"` // "Snapshot" of current list of packages - packageRefs *PackageRefList + packageRefs *SplitRefList // Parsed archived root archiveRootURL *url.URL // Current list of packages (filled while updating mirror) @@ -163,14 +163,11 @@ func (repo *RemoteRepo) IsFlat() bool { // NumPackages return number of packages retrieved from remote repo func (repo *RemoteRepo) NumPackages() int { - if repo.packageRefs == nil { - return 0 - } return repo.packageRefs.Len() } // RefList returns package list for repo -func (repo *RemoteRepo) RefList() *PackageRefList { +func (repo *RemoteRepo) RefList() *SplitRefList { return repo.packageRefs } @@ -686,7 +683,7 @@ func (repo *RemoteRepo) FinalizeDownload(collectionFactory *CollectionFactory, p }) if err == nil { - repo.packageRefs = NewPackageRefListFromPackageList(repo.packageList) + repo.packageRefs = NewSplitRefListFromPackageList(repo.packageList) repo.packageList = nil } @@ -828,14 +825,14 @@ func (collection *RemoteRepoCollection) search(filter func(*RemoteRepo) bool, un } // Add appends new repo to collection and saves it -func (collection *RemoteRepoCollection) Add(repo *RemoteRepo) error { +func (collection *RemoteRepoCollection) Add(repo *RemoteRepo, reflistCollection *RefListCollection) error { _, err := collection.ByName(repo.Name) if err == nil { return fmt.Errorf("mirror with name %s already exists", repo.Name) } - err = collection.Update(repo) + err = collection.Update(repo, reflistCollection) if err != nil { return err } @@ -845,28 +842,26 @@ func (collection *RemoteRepoCollection) Add(repo *RemoteRepo) error { } // Update stores updated information about repo in DB -func (collection *RemoteRepoCollection) Update(repo *RemoteRepo) error { +func (collection *RemoteRepoCollection) Update(repo *RemoteRepo, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(repo.Key(), repo.Encode()) if repo.packageRefs != nil { - batch.Put(repo.RefKey(), repo.packageRefs.Encode()) + rb := reflistCollection.NewBatch(batch) + reflistCollection.UpdateInBatch(repo.packageRefs, repo.RefKey(), rb) } return batch.Write() } // LoadComplete loads additional information for remote repo -func (collection *RemoteRepoCollection) LoadComplete(repo *RemoteRepo) error { - encoded, err := collection.db.Get(repo.RefKey()) +func (collection *RemoteRepoCollection) LoadComplete(repo *RemoteRepo, reflistCollection *RefListCollection) error { + repo.packageRefs = NewSplitRefList() + err := reflistCollection.LoadComplete(repo.packageRefs, repo.RefKey()) if err == database.ErrNotFound { return nil } - if err != nil { - return err - } - repo.packageRefs = &PackageRefList{} - return repo.packageRefs.Decode(encoded) + return err } // ByName looks up repository by name diff --git a/deb/remote_test.go b/deb/remote_test.go index 1998db7e0f..bf523e5464 100644 --- a/deb/remote_test.go +++ b/deb/remote_test.go @@ -52,7 +52,7 @@ func (n *NullVerifier) IsClearSigned(clearsign io.Reader) (bool, error) { type PackageListMixinSuite struct { p1, p2, p3 *Package list *PackageList - reflist *PackageRefList + reflist *SplitRefList } func (s *PackageListMixinSuite) SetUpPackages() { @@ -72,7 +72,7 @@ func (s *PackageListMixinSuite) SetUpPackages() { s.list.Add(s.p2) s.list.Add(s.p3) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromPackageList(s.list) } type RemoteRepoSuite struct { @@ -290,7 +290,7 @@ func (s *RemoteRepoSuite) TestDownload(c *C) { s.repo.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.repo.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") @@ -372,12 +372,12 @@ func (s *RemoteRepoSuite) TestDownloadWithInstaller(c *C) { s.repo.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.repo.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") - pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[1]) + pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[1]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "installer") } @@ -418,12 +418,12 @@ func (s *RemoteRepoSuite) TestDownloadWithSources(c *C) { s.repo.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.repo.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") - pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[1]) + pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[1]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "access-modifier-checker") @@ -502,7 +502,7 @@ func (s *RemoteRepoSuite) TestDownloadFlat(c *C) { s.flat.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.flat.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") @@ -592,12 +592,12 @@ func (s *RemoteRepoSuite) TestDownloadWithSourcesFlat(c *C) { s.flat.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.flat.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") - pkg, err = s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Refs[1]) + pkg, err = s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Flatten().Refs[1]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "access-modifier-checker") @@ -657,8 +657,9 @@ func (s *RemoteRepoSuite) TestDownloadWithSourcesFlat(c *C) { type RemoteRepoCollectionSuite struct { PackageListMixinSuite - db database.Storage - collection *RemoteRepoCollection + db database.Storage + collection *RemoteRepoCollection + refListCollection *RefListCollection } var _ = Suite(&RemoteRepoCollectionSuite{}) @@ -666,6 +667,7 @@ var _ = Suite(&RemoteRepoCollectionSuite{}) func (s *RemoteRepoCollectionSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.collection = NewRemoteRepoCollection(s.db) + s.refListCollection = NewRefListCollection(s.db) s.SetUpPackages() } @@ -678,8 +680,8 @@ func (s *RemoteRepoCollectionSuite) TestAddByName(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - c.Assert(s.collection.Add(repo), IsNil) - c.Assert(s.collection.Add(repo), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(repo, s.refListCollection), IsNil) + c.Assert(s.collection.Add(repo, s.refListCollection), ErrorMatches, ".*already exists") r, err := s.collection.ByName("yandex") c.Assert(err, IsNil) @@ -696,7 +698,7 @@ func (s *RemoteRepoCollectionSuite) TestByUUID(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - c.Assert(s.collection.Add(repo), IsNil) + c.Assert(s.collection.Add(repo, s.refListCollection), IsNil) r, err := s.collection.ByUUID(repo.UUID) c.Assert(err, IsNil) @@ -710,7 +712,7 @@ func (s *RemoteRepoCollectionSuite) TestByUUID(c *C) { func (s *RemoteRepoCollectionSuite) TestUpdateLoadComplete(c *C) { repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.refListCollection), IsNil) collection := NewRemoteRepoCollection(s.db) r, err := collection.ByName("yandex") @@ -718,20 +720,20 @@ func (s *RemoteRepoCollectionSuite) TestUpdateLoadComplete(c *C) { c.Assert(r.packageRefs, IsNil) repo.packageRefs = s.reflist - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.refListCollection), IsNil) collection = NewRemoteRepoCollection(s.db) r, err = collection.ByName("yandex") c.Assert(err, IsNil) c.Assert(r.packageRefs, IsNil) c.Assert(r.NumPackages(), Equals, 0) - c.Assert(s.collection.LoadComplete(r), IsNil) + c.Assert(s.collection.LoadComplete(r, s.refListCollection), IsNil) c.Assert(r.NumPackages(), Equals, 3) } func (s *RemoteRepoCollectionSuite) TestForEachAndLen(c *C) { repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - s.collection.Add(repo) + s.collection.Add(repo, s.refListCollection) count := 0 err := s.collection.ForEach(func(*RemoteRepo) error { @@ -753,10 +755,10 @@ func (s *RemoteRepoCollectionSuite) TestForEachAndLen(c *C) { func (s *RemoteRepoCollectionSuite) TestDrop(c *C) { repo1, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - s.collection.Add(repo1) + s.collection.Add(repo1, s.refListCollection) repo2, _ := NewRemoteRepo("tyndex", "http://mirror.yandex.ru/debian/", "wheezy", []string{"main"}, []string{}, false, false, false) - s.collection.Add(repo2) + s.collection.Add(repo2, s.refListCollection) r1, _ := s.collection.ByUUID(repo1.UUID) c.Check(r1, Equals, repo1) diff --git a/deb/snapshot.go b/deb/snapshot.go index ed8572ad6c..d5f9a1b38a 100644 --- a/deb/snapshot.go +++ b/deb/snapshot.go @@ -40,7 +40,7 @@ type Snapshot struct { NotAutomatic string ButAutomaticUpgrades string - packageRefs *PackageRefList + packageRefs *SplitRefList } // NewSnapshotFromRepository creates snapshot from current state of repository @@ -76,7 +76,7 @@ func NewSnapshotFromLocalRepo(name string, repo *LocalRepo) (*Snapshot, error) { } if snap.packageRefs == nil { - snap.packageRefs = NewPackageRefList() + snap.packageRefs = NewSplitRefList() } return snap, nil @@ -84,11 +84,13 @@ func NewSnapshotFromLocalRepo(name string, repo *LocalRepo) (*Snapshot, error) { // NewSnapshotFromPackageList creates snapshot from PackageList func NewSnapshotFromPackageList(name string, sources []*Snapshot, list *PackageList, description string) *Snapshot { - return NewSnapshotFromRefList(name, sources, NewPackageRefListFromPackageList(list), description) + sl := NewSplitRefList() + sl.Replace(NewPackageRefListFromPackageList(list)) + return NewSnapshotFromRefList(name, sources, sl, description) } -// NewSnapshotFromRefList creates snapshot from PackageRefList -func NewSnapshotFromRefList(name string, sources []*Snapshot, list *PackageRefList, description string) *Snapshot { +// NewSnapshotFromRefList creates snapshot from SplitRefList +func NewSnapshotFromRefList(name string, sources []*Snapshot, list *SplitRefList, description string) *Snapshot { sourceUUIDs := make([]string, len(sources)) for i := range sources { sourceUUIDs[i] = sources[i].UUID @@ -116,7 +118,7 @@ func (s *Snapshot) NumPackages() int { } // RefList returns list of package refs in snapshot -func (s *Snapshot) RefList() *PackageRefList { +func (s *Snapshot) RefList() *SplitRefList { return s.packageRefs } @@ -209,13 +211,13 @@ func NewSnapshotCollection(db database.Storage) *SnapshotCollection { } // Add appends new repo to collection and saves it -func (collection *SnapshotCollection) Add(snapshot *Snapshot) error { +func (collection *SnapshotCollection) Add(snapshot *Snapshot, reflistCollection *RefListCollection) error { _, err := collection.ByName(snapshot.Name) if err == nil { return fmt.Errorf("snapshot with name %s already exists", snapshot.Name) } - err = collection.Update(snapshot) + err = collection.Update(snapshot, reflistCollection) if err != nil { return err } @@ -225,26 +227,22 @@ func (collection *SnapshotCollection) Add(snapshot *Snapshot) error { } // Update stores updated information about snapshot in DB -func (collection *SnapshotCollection) Update(snapshot *Snapshot) error { +func (collection *SnapshotCollection) Update(snapshot *Snapshot, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(snapshot.Key(), snapshot.Encode()) if snapshot.packageRefs != nil { - batch.Put(snapshot.RefKey(), snapshot.packageRefs.Encode()) + rb := reflistCollection.NewBatch(batch) + reflistCollection.UpdateInBatch(snapshot.packageRefs, snapshot.RefKey(), rb) } return batch.Write() } // LoadComplete loads additional information about snapshot -func (collection *SnapshotCollection) LoadComplete(snapshot *Snapshot) error { - encoded, err := collection.db.Get(snapshot.RefKey()) - if err != nil { - return err - } - - snapshot.packageRefs = &PackageRefList{} - return snapshot.packageRefs.Decode(encoded) +func (collection *SnapshotCollection) LoadComplete(snapshot *Snapshot, reflistCollection *RefListCollection) error { + snapshot.packageRefs = NewSplitRefList() + return reflistCollection.LoadComplete(snapshot.packageRefs, snapshot.RefKey()) } func (collection *SnapshotCollection) search(filter func(*Snapshot) bool, unique bool) []*Snapshot { diff --git a/deb/snapshot_bench_test.go b/deb/snapshot_bench_test.go index c6bb94a2c6..4475ca57b5 100644 --- a/deb/snapshot_bench_test.go +++ b/deb/snapshot_bench_test.go @@ -18,10 +18,11 @@ func BenchmarkSnapshotCollectionForEach(b *testing.B) { defer db.Close() collection := NewSnapshotCollection(db) + reflistCollection := NewRefListCollection(db) for i := 0; i < count; i++ { - snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewPackageRefList(), fmt.Sprintf("Snapshot number %d", i)) - if collection.Add(snapshot) != nil { + snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewSplitRefList(), fmt.Sprintf("Snapshot number %d", i)) + if collection.Add(snapshot, reflistCollection) != nil { b.FailNow() } } @@ -47,11 +48,12 @@ func BenchmarkSnapshotCollectionByUUID(b *testing.B) { defer db.Close() collection := NewSnapshotCollection(db) + reflistCollection := NewRefListCollection(db) uuids := []string{} for i := 0; i < count; i++ { - snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewPackageRefList(), fmt.Sprintf("Snapshot number %d", i)) - if collection.Add(snapshot) != nil { + snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewSplitRefList(), fmt.Sprintf("Snapshot number %d", i)) + if collection.Add(snapshot, reflistCollection) != nil { b.FailNow() } uuids = append(uuids, snapshot.UUID) @@ -78,10 +80,11 @@ func BenchmarkSnapshotCollectionByName(b *testing.B) { defer db.Close() collection := NewSnapshotCollection(db) + reflistCollection := NewRefListCollection(db) for i := 0; i < count; i++ { - snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewPackageRefList(), fmt.Sprintf("Snapshot number %d", i)) - if collection.Add(snapshot) != nil { + snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewSplitRefList(), fmt.Sprintf("Snapshot number %d", i)) + if collection.Add(snapshot, reflistCollection) != nil { b.FailNow() } } diff --git a/deb/snapshot_test.go b/deb/snapshot_test.go index d27c422690..805ccc8e5b 100644 --- a/deb/snapshot_test.go +++ b/deb/snapshot_test.go @@ -109,6 +109,7 @@ type SnapshotCollectionSuite struct { snapshot1, snapshot2 *Snapshot snapshot3, snapshot4 *Snapshot collection *SnapshotCollection + reflistCollection *RefListCollection } var _ = Suite(&SnapshotCollectionSuite{}) @@ -116,6 +117,7 @@ var _ = Suite(&SnapshotCollectionSuite{}) func (s *SnapshotCollectionSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.collection = NewSnapshotCollection(s.db) + s.reflistCollection = NewRefListCollection(s.db) s.SetUpPackages() s.repo1, _ = NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) @@ -143,10 +145,10 @@ func (s *SnapshotCollectionSuite) TestAddByNameByUUID(c *C) { _, err := s.collection.ByName("snap1") c.Assert(err, ErrorMatches, "*.not found") - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot1), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), ErrorMatches, ".*already exists") - c.Assert(s.collection.Add(s.snapshot2), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) snapshot, err := s.collection.ByName("snap1") c.Assert(err, IsNil) @@ -167,20 +169,20 @@ func (s *SnapshotCollectionSuite) TestAddByNameByUUID(c *C) { } func (s *SnapshotCollectionSuite) TestUpdateLoadComplete(c *C) { - c.Assert(s.collection.Update(s.snapshot1), IsNil) + c.Assert(s.collection.Update(s.snapshot1, s.reflistCollection), IsNil) collection := NewSnapshotCollection(s.db) snapshot, err := collection.ByName("snap1") c.Assert(err, IsNil) c.Assert(snapshot.packageRefs, IsNil) - c.Assert(s.collection.LoadComplete(snapshot), IsNil) + c.Assert(s.collection.LoadComplete(snapshot, s.reflistCollection), IsNil) c.Assert(snapshot.NumPackages(), Equals, 3) } func (s *SnapshotCollectionSuite) TestForEachAndLen(c *C) { - s.collection.Add(s.snapshot1) - s.collection.Add(s.snapshot2) + s.collection.Add(s.snapshot1, s.reflistCollection) + s.collection.Add(s.snapshot2, s.reflistCollection) count := 0 err := s.collection.ForEach(func(*Snapshot) error { @@ -200,10 +202,10 @@ func (s *SnapshotCollectionSuite) TestForEachAndLen(c *C) { } func (s *SnapshotCollectionSuite) TestForEachSorted(c *C) { - s.collection.Add(s.snapshot2) - s.collection.Add(s.snapshot1) - s.collection.Add(s.snapshot4) - s.collection.Add(s.snapshot3) + s.collection.Add(s.snapshot2, s.reflistCollection) + s.collection.Add(s.snapshot1, s.reflistCollection) + s.collection.Add(s.snapshot4, s.reflistCollection) + s.collection.Add(s.snapshot3, s.reflistCollection) names := []string{} @@ -217,8 +219,8 @@ func (s *SnapshotCollectionSuite) TestForEachSorted(c *C) { } func (s *SnapshotCollectionSuite) TestFindByRemoteRepoSource(c *C) { - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot2), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) c.Check(s.collection.ByRemoteRepoSource(s.repo1), DeepEquals, []*Snapshot{s.snapshot1}) c.Check(s.collection.ByRemoteRepoSource(s.repo2), DeepEquals, []*Snapshot{s.snapshot2}) @@ -229,10 +231,10 @@ func (s *SnapshotCollectionSuite) TestFindByRemoteRepoSource(c *C) { } func (s *SnapshotCollectionSuite) TestFindByLocalRepoSource(c *C) { - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot2), IsNil) - c.Assert(s.collection.Add(s.snapshot3), IsNil) - c.Assert(s.collection.Add(s.snapshot4), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot3, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot4, s.reflistCollection), IsNil) c.Check(s.collection.ByLocalRepoSource(s.lrepo1), DeepEquals, []*Snapshot{s.snapshot3}) c.Check(s.collection.ByLocalRepoSource(s.lrepo2), DeepEquals, []*Snapshot{s.snapshot4}) @@ -247,11 +249,11 @@ func (s *SnapshotCollectionSuite) TestFindSnapshotSource(c *C) { snapshot4 := NewSnapshotFromRefList("snap4", []*Snapshot{s.snapshot1}, s.reflist, "desc2") snapshot5 := NewSnapshotFromRefList("snap5", []*Snapshot{snapshot3}, s.reflist, "desc3") - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot2), IsNil) - c.Assert(s.collection.Add(snapshot3), IsNil) - c.Assert(s.collection.Add(snapshot4), IsNil) - c.Assert(s.collection.Add(snapshot5), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(snapshot3, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(snapshot4, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(snapshot5, s.reflistCollection), IsNil) list := s.collection.BySnapshotSource(s.snapshot1) sorter, _ := newSnapshotSorter("name", list) @@ -263,8 +265,8 @@ func (s *SnapshotCollectionSuite) TestFindSnapshotSource(c *C) { } func (s *SnapshotCollectionSuite) TestDrop(c *C) { - s.collection.Add(s.snapshot1) - s.collection.Add(s.snapshot2) + s.collection.Add(s.snapshot1, s.reflistCollection) + s.collection.Add(s.snapshot2, s.reflistCollection) snap, _ := s.collection.ByUUID(s.snapshot1.UUID) c.Check(snap, Equals, s.snapshot1) diff --git a/system/t08_db/CleanupDB10Test_gold b/system/t08_db/CleanupDB10Test_gold index 138adc2947..faa25944e3 100644 --- a/system/t08_db/CleanupDB10Test_gold +++ b/system/t08_db/CleanupDB10Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB11Test_gold b/system/t08_db/CleanupDB11Test_gold index be3767e9cf..33be8b2a4e 100644 --- a/system/t08_db/CleanupDB11Test_gold +++ b/system/t08_db/CleanupDB11Test_gold @@ -14,6 +14,7 @@ Loading mirrors: Loading local repos: Loading snapshots: Loading published repositories: +Split 11 reflist(s) into 510 bucket(s) (123181 segment(s)) Loading list of all packages... Deleting unreferenced packages (7)... List of package keys to delete: @@ -24,6 +25,7 @@ List of package keys to delete: - Pi386 gnuplot-nox 4.6.1-1~maverick2 17785995cf0f815 - Pi386 gnuplot-x11 4.6.1-1~maverick2 d42e1d0d2f23740 - Psource gnuplot 4.6.1-1~maverick2 b8cd36358f5db41f +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (9)... diff --git a/system/t08_db/CleanupDB12Test_gold b/system/t08_db/CleanupDB12Test_gold index 31da9b23c4..4e88abc212 100644 --- a/system/t08_db/CleanupDB12Test_gold +++ b/system/t08_db/CleanupDB12Test_gold @@ -14,6 +14,7 @@ Loading mirrors: Loading local repos: Loading snapshots: Loading published repositories: +Skipped splitting 11 reflist(s) into 510 bucket(s) (123181 segment(s)), as -dry-run has been requested. Loading list of all packages... Deleting unreferenced packages (7)... List of package keys to delete: @@ -25,6 +26,7 @@ List of package keys to delete: - Pi386 gnuplot-x11 4.6.1-1~maverick2 d42e1d0d2f23740 - Psource gnuplot 4.6.1-1~maverick2 b8cd36358f5db41f Skipped deletion, as -dry-run has been requested. +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (9)... diff --git a/system/t08_db/CleanupDB1Test_gold b/system/t08_db/CleanupDB1Test_gold index 138adc2947..faa25944e3 100644 --- a/system/t08_db/CleanupDB1Test_gold +++ b/system/t08_db/CleanupDB1Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB2Test_gold b/system/t08_db/CleanupDB2Test_gold index 1f289e677b..4e84de6a82 100644 --- a/system/t08_db/CleanupDB2Test_gold +++ b/system/t08_db/CleanupDB2Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (73270)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB3Test_gold b/system/t08_db/CleanupDB3Test_gold index 73279e145f..73c82f87d1 100644 --- a/system/t08_db/CleanupDB3Test_gold +++ b/system/t08_db/CleanupDB3Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (7)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (9)... diff --git a/system/t08_db/CleanupDB4Test_gold b/system/t08_db/CleanupDB4Test_gold index 138adc2947..faa25944e3 100644 --- a/system/t08_db/CleanupDB4Test_gold +++ b/system/t08_db/CleanupDB4Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB5Test_gold b/system/t08_db/CleanupDB5Test_gold index 73279e145f..47bba4e2ba 100644 --- a/system/t08_db/CleanupDB5Test_gold +++ b/system/t08_db/CleanupDB5Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (7)... +Deleting unreferenced reflist buckets (1)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (9)... diff --git a/system/t08_db/CleanupDB6Test_gold b/system/t08_db/CleanupDB6Test_gold index 138adc2947..faa25944e3 100644 --- a/system/t08_db/CleanupDB6Test_gold +++ b/system/t08_db/CleanupDB6Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB7Test_gold b/system/t08_db/CleanupDB7Test_gold index 138adc2947..faa25944e3 100644 --- a/system/t08_db/CleanupDB7Test_gold +++ b/system/t08_db/CleanupDB7Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB8Test_gold b/system/t08_db/CleanupDB8Test_gold index f769f203f7..43ebe9aaa5 100644 --- a/system/t08_db/CleanupDB8Test_gold +++ b/system/t08_db/CleanupDB8Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (4)... +Deleting unreferenced reflist buckets (1)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (6)... diff --git a/system/t08_db/CleanupDB9Test_gold b/system/t08_db/CleanupDB9Test_gold index 138adc2947..faa25944e3 100644 --- a/system/t08_db/CleanupDB9Test_gold +++ b/system/t08_db/CleanupDB9Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... From 1a3346a8fa8405d39b0f159846075f7b5ea2ff0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Roth?= Date: Sun, 21 Apr 2024 12:17:08 +0200 Subject: [PATCH 3/7] fix golangci-lint error --- api/files.go | 2 +- api/snapshot.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/files.go b/api/files.go index f848d8882c..3fde21dbfb 100644 --- a/api/files.go +++ b/api/files.go @@ -170,7 +170,7 @@ func apiFilesListFiles(c *gin.Context) { listLock := &sync.Mutex{} root := filepath.Join(context.UploadPath(), utils.SanitizePath(c.Params.ByName("dir"))) - err := walker.Walk(root, func(path string, info os.FileInfo) error { + err := walker.Walk(root, func(path string, _ os.FileInfo) error { if path == root { return nil } diff --git a/api/snapshot.go b/api/snapshot.go index 4adf255b22..55cbf15e31 100644 --- a/api/snapshot.go +++ b/api/snapshot.go @@ -254,7 +254,7 @@ func apiSnapshotsCreateFromRepository(c *gin.Context) { // including snapshot resource key resources := []string{string(repo.Key()), "S" + b.Name} taskName := fmt.Sprintf("Create snapshot of repo %s", name) - maybeRunTaskInBackground(c, taskName, resources, func(out aptly.Progress, detail *task.Detail) (*task.ProcessReturnValue, error) { + maybeRunTaskInBackground(c, taskName, resources, func(_ aptly.Progress, _ *task.Detail) (*task.ProcessReturnValue, error) { err := collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err From 174bdc2b5e2b70c82ebedb0f22fb4d749649dd73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Roth?= Date: Sun, 21 Apr 2024 15:30:48 +0200 Subject: [PATCH 4/7] fix golangci-lint errors --- deb/reflist.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deb/reflist.go b/deb/reflist.go index e039de1274..55588a998c 100644 --- a/deb/reflist.go +++ b/deb/reflist.go @@ -786,7 +786,7 @@ func segmentIndexKey(prefix []byte, idx int) []byte { func (collection *RefListCollection) AllBucketDigests() (*RefListDigestSet, error) { digests := NewRefListDigestSet() - err := collection.db.ProcessByPrefix([]byte("F"), func(key []byte, value []byte) error { + err := collection.db.ProcessByPrefix([]byte("F"), func(key []byte, _ []byte) error { if !bytes.HasSuffix(key, []byte("-0000")) { // Ignore additional segments for the same digest. return nil @@ -818,7 +818,7 @@ func (collection *RefListCollection) AllBucketDigests() (*RefListDigestSet, erro // the bucket is no longer referenced by any saved reflists. func (collection *RefListCollection) UnsafeDropBucket(digest []byte, batch database.Batch) error { prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) - return collection.db.ProcessByPrefix(prefix, func(key []byte, value []byte) error { + return collection.db.ProcessByPrefix(prefix, func(key []byte, _ []byte) error { return batch.Delete(key) }) } @@ -866,7 +866,7 @@ func (collection *RefListCollection) loadBuckets(sl *SplitRefList) error { if bucket == nil { bucket = NewPackageRefList() prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) - err := collection.db.ProcessByPrefix(prefix, func(digest []byte, value []byte) error { + err := collection.db.ProcessByPrefix(prefix, func(_ []byte, value []byte) error { var l PackageRefList if err := l.Decode(append([]byte{}, value...)); err != nil { return err From 2a07494910ff07390874448a50eb68d3499a94de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Roth?= Date: Mon, 17 Jun 2024 13:50:49 +0200 Subject: [PATCH 5/7] fix unit tests --- deb/local.go | 3 +++ deb/remote.go | 3 +++ 2 files changed, 6 insertions(+) diff --git a/deb/local.go b/deb/local.go index 7d134efe8d..18cddbf346 100644 --- a/deb/local.go +++ b/deb/local.go @@ -48,6 +48,9 @@ func (repo *LocalRepo) String() string { // NumPackages return number of packages in local repo func (repo *LocalRepo) NumPackages() int { + if repo.packageRefs == nil { + return 0 + } return repo.packageRefs.Len() } diff --git a/deb/remote.go b/deb/remote.go index 7dc5eee31b..febf62f8d4 100644 --- a/deb/remote.go +++ b/deb/remote.go @@ -163,6 +163,9 @@ func (repo *RemoteRepo) IsFlat() bool { // NumPackages return number of packages retrieved from remote repo func (repo *RemoteRepo) NumPackages() int { + if repo.packageRefs == nil { + return 0 + } return repo.packageRefs.Len() } From 483dcf19b3d0d500ac5163c31de6b5813243f2c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Roth?= Date: Mon, 17 Jun 2024 15:18:23 +0200 Subject: [PATCH 6/7] make compatible with go 1.19 --- deb/reflist.go | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/deb/reflist.go b/deb/reflist.go index 55588a998c..df1b9b2e6e 100644 --- a/deb/reflist.go +++ b/deb/reflist.go @@ -428,7 +428,10 @@ func bucketRefPrefix(ref []byte) []byte { ref = ref[len(libPrefix):] } - prefixLen := min(maxPrefixLen, len(ref)) + prefixLen := len(ref) + if maxPrefixLen < prefixLen { + prefixLen = maxPrefixLen + } prefix, _, _ := bytes.Cut(ref[:prefixLen], []byte{' '}) return prefix } @@ -715,9 +718,16 @@ func (set *RefListDigestSet) ForEach(handler func(digest []byte) error) error { return nil } +// workaround for: conversion of slices to arrays requires go1.20 or later +func newRefListArray(digest []byte) reflistDigestArray { + var array reflistDigestArray + copy(array[:], digest) + return array +} + // Add adds digest to set, doing nothing if the digest was already present func (set *RefListDigestSet) Add(digest []byte) { - set.items[reflistDigestArray(digest)] = struct{}{} + set.items[newRefListArray(digest)] = struct{}{} } // AddAllInRefList adds all the bucket digests in a SplitRefList to the set @@ -731,13 +741,13 @@ func (set *RefListDigestSet) AddAllInRefList(sl *SplitRefList) { // Has checks whether a digest is part of set func (set *RefListDigestSet) Has(digest []byte) bool { - _, ok := set.items[reflistDigestArray(digest)] + _, ok := set.items[newRefListArray(digest)] return ok } // Remove removes a digest from set func (set *RefListDigestSet) Remove(digest []byte) { - delete(set.items, reflistDigestArray(digest)) + delete(set.items, newRefListArray(digest)) } // RemoveAll removes all the digests in other from the current set @@ -776,10 +786,20 @@ func segmentPrefix(encodedDigest string) []byte { return []byte(fmt.Sprintf("F%s-", encodedDigest)) } +// workaround for go 1.19 instead of bytes.Clone +func cloneBytes(b []byte) []byte { + if b == nil { + return nil + } + cloned := make([]byte, len(b)) + copy(cloned, b) + return cloned +} + func segmentIndexKey(prefix []byte, idx int) []byte { // Assume most buckets won't have more than 0xFFFF = ~65k segments (which // would be an extremely large bucket!). - return append(bytes.Clone(prefix), []byte(fmt.Sprintf("%04x", idx))...) + return append(cloneBytes(prefix), []byte(fmt.Sprintf("%04x", idx))...) } // AllBucketDigests returns a set of all the bucket digests in the database @@ -861,7 +881,7 @@ func (collection *RefListCollection) loadBuckets(sl *SplitRefList) error { var bucket *PackageRefList if digest := sl.Buckets[idx]; len(digest) > 0 { - cacheKey := reflistDigestArray(digest) + cacheKey := newRefListArray(digest) bucket = collection.cache[cacheKey] if bucket == nil { bucket = NewPackageRefList() From 3e99e37cc7b9c6e9da4070d70629f77520ba4814 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Roth?= Date: Thu, 20 Jun 2024 09:39:21 +0200 Subject: [PATCH 7/7] handle packageRefs == nil --- deb/snapshot.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/deb/snapshot.go b/deb/snapshot.go index d5f9a1b38a..7ea4493fe3 100644 --- a/deb/snapshot.go +++ b/deb/snapshot.go @@ -114,6 +114,9 @@ func (s *Snapshot) String() string { // NumPackages returns number of packages in snapshot func (s *Snapshot) NumPackages() int { + if s.packageRefs == nil { + return 0 + } return s.packageRefs.Len() }