The {@code Last-Modified} value is the HTTP response header from the
+ * upstream registry, which is the artifact publish date — the source of
+ * {@code release_date} in production (via
+ * {@code MavenProxyPackageProcessor.releaseMillis()}).
+ *
+ * @since 1.20.13
+ */
+final class ArtipieMetaSidecar {
+
+ /**
+ * Sidecar file suffix appended to the artifact path.
+ */
+ static final String SUFFIX = ".artipie-meta.json";
+
+ /**
+ * Logger.
+ */
+ private static final Logger LOG =
+ LoggerFactory.getLogger(ArtipieMetaSidecar.class);
+
+ /**
+ * Private ctor — utility class, not instantiated.
+ */
+ private ArtipieMetaSidecar() {
+ }
+
+ /**
+ * Read the release date (epoch millis) from the
+ * {@code .artipie-meta.json} sidecar alongside the given artifact.
+ *
+ *
Returns empty if the sidecar is absent, the {@code headers}
+ * array is missing, no {@code Last-Modified} entry is present, or
+ * the date value cannot be parsed as RFC 1123.
Bulk: {@code --config-dir}, {@code --storage-root} — reads all
+ * {@code *.yaml} Artipie repo configs and scans each repo automatically
+ *
+ *
+ * @since 1.20.13
+ */
+public final class BackfillCli {
+
+ /**
+ * SLF4J logger.
+ */
+ private static final Logger LOG =
+ LoggerFactory.getLogger(BackfillCli.class);
+
+ /**
+ * Default batch size for inserts.
+ */
+ private static final int DEFAULT_BATCH_SIZE = 1000;
+
+ /**
+ * Default progress log interval.
+ */
+ private static final int DEFAULT_LOG_INTERVAL = 10000;
+
+ /**
+ * Default database user.
+ */
+ private static final String DEFAULT_DB_USER = "artipie";
+
+ /**
+ * Default database password.
+ */
+ private static final String DEFAULT_DB_PASSWORD = "artipie";
+
+ /**
+ * Default owner.
+ */
+ private static final String DEFAULT_OWNER = "system";
+
+ /**
+ * HikariCP maximum pool size.
+ */
+ private static final int POOL_MAX_SIZE = 5;
+
+ /**
+ * HikariCP minimum idle connections.
+ */
+ private static final int POOL_MIN_IDLE = 1;
+
+ /**
+ * HikariCP connection timeout in millis.
+ */
+ private static final long POOL_CONN_TIMEOUT = 5000L;
+
+ /**
+ * HikariCP idle timeout in millis.
+ */
+ private static final long POOL_IDLE_TIMEOUT = 30000L;
+
+ /**
+ * Private ctor to prevent instantiation.
+ */
+ private BackfillCli() {
+ }
+
+ /**
+ * CLI entry point.
+ *
+ * @param args Command-line arguments
+ */
+ public static void main(final String... args) {
+ System.exit(run(args));
+ }
+
+ /**
+ * Core logic extracted for testability. Returns an exit code
+ * (0 = success, 1 = error).
+ *
+ * @param args Command-line arguments
+ * @return Exit code
+ */
+ @SuppressWarnings("PMD.CyclomaticComplexity")
+ static int run(final String... args) {
+ final Options options = buildOptions();
+ for (final String arg : args) {
+ if ("--help".equals(arg) || "-h".equals(arg)) {
+ printHelp(options);
+ return 0;
+ }
+ }
+ final CommandLine cmd;
+ try {
+ cmd = new DefaultParser().parse(options, args);
+ } catch (final ParseException ex) {
+ LOG.error("Failed to parse arguments: {}", ex.getMessage());
+ printHelp(options);
+ return 1;
+ }
+ final boolean hasBulkFlags =
+ cmd.hasOption("config-dir") || cmd.hasOption("storage-root");
+ final boolean hasSingleFlags =
+ cmd.hasOption("type") || cmd.hasOption("path")
+ || cmd.hasOption("repo-name");
+ if (hasBulkFlags && hasSingleFlags) {
+ LOG.error(
+ "--config-dir/--storage-root cannot be combined with "
+ + "--type/--path/--repo-name"
+ );
+ return 1;
+ }
+ if (cmd.hasOption("config-dir") && !cmd.hasOption("storage-root")) {
+ LOG.error("--config-dir requires --storage-root");
+ return 1;
+ }
+ if (cmd.hasOption("storage-root") && !cmd.hasOption("config-dir")) {
+ LOG.error("--storage-root requires --config-dir");
+ return 1;
+ }
+ if (!hasBulkFlags && !hasSingleFlags) {
+ LOG.error(
+ "Either --type/--path/--repo-name or "
+ + "--config-dir/--storage-root must be provided"
+ );
+ printHelp(options);
+ return 1;
+ }
+ final boolean dryRun = cmd.hasOption("dry-run");
+ final String dbUrl = cmd.getOptionValue("db-url");
+ final String dbUser = cmd.getOptionValue("db-user", DEFAULT_DB_USER);
+ final String dbPassword =
+ cmd.getOptionValue("db-password", DEFAULT_DB_PASSWORD);
+ final int batchSize = Integer.parseInt(
+ cmd.getOptionValue(
+ "batch-size", String.valueOf(DEFAULT_BATCH_SIZE)
+ )
+ );
+ final String owner = cmd.getOptionValue("owner", DEFAULT_OWNER);
+ final int logInterval = Integer.parseInt(
+ cmd.getOptionValue(
+ "log-interval", String.valueOf(DEFAULT_LOG_INTERVAL)
+ )
+ );
+ if (cmd.hasOption("config-dir")) {
+ return runBulk(
+ cmd.getOptionValue("config-dir"),
+ cmd.getOptionValue("storage-root"),
+ dryRun, dbUrl, dbUser, dbPassword,
+ batchSize, owner, logInterval
+ );
+ }
+ return runSingle(
+ cmd.getOptionValue("type"),
+ cmd.getOptionValue("path"),
+ cmd.getOptionValue("repo-name"),
+ dryRun, dbUrl, dbUser, dbPassword,
+ batchSize, owner, logInterval
+ );
+ }
+
+ /**
+ * Run bulk mode: scan all repos from the config directory.
+ *
+ * @param configDirStr Config directory path string
+ * @param storageRootStr Storage root path string
+ * @param dryRun Dry run flag
+ * @param dbUrl JDBC URL (may be null if dryRun)
+ * @param dbUser DB user
+ * @param dbPassword DB password
+ * @param batchSize Batch insert size
+ * @param owner Artifact owner
+ * @param logInterval Progress log interval
+ * @return Exit code
+ * @checkstyle ParameterNumberCheck (15 lines)
+ */
+ @SuppressWarnings("PMD.ExcessiveParameterList")
+ private static int runBulk(
+ final String configDirStr,
+ final String storageRootStr,
+ final boolean dryRun,
+ final String dbUrl,
+ final String dbUser,
+ final String dbPassword,
+ final int batchSize,
+ final String owner,
+ final int logInterval
+ ) {
+ final Path configDir = Paths.get(configDirStr);
+ final Path storageRoot = Paths.get(storageRootStr);
+ if (!Files.isDirectory(configDir)) {
+ LOG.error("--config-dir is not a directory: {}", configDirStr);
+ return 1;
+ }
+ if (!Files.isDirectory(storageRoot)) {
+ LOG.error("--storage-root is not a directory: {}", storageRootStr);
+ return 1;
+ }
+ if (!dryRun && (dbUrl == null || dbUrl.isEmpty())) {
+ LOG.error("--db-url is required unless --dry-run is set");
+ return 1;
+ }
+ DataSource dataSource = null;
+ if (!dryRun) {
+ dataSource = buildDataSource(dbUrl, dbUser, dbPassword);
+ }
+ try {
+ return new BulkBackfillRunner(
+ configDir, storageRoot, dataSource,
+ owner, batchSize, dryRun, logInterval, System.err
+ ).run();
+ } catch (final IOException ex) {
+ LOG.error("Bulk backfill failed: {}", ex.getMessage(), ex);
+ return 1;
+ } finally {
+ closeDataSource(dataSource);
+ }
+ }
+
+ /**
+ * Run single-repo mode (original behaviour).
+ *
+ * @param type Scanner type
+ * @param pathStr Path string
+ * @param repoName Repo name
+ * @param dryRun Dry run flag
+ * @param dbUrl JDBC URL
+ * @param dbUser DB user
+ * @param dbPassword DB password
+ * @param batchSize Batch size
+ * @param owner Artifact owner
+ * @param logInterval Progress interval
+ * @return Exit code
+ * @checkstyle ParameterNumberCheck (15 lines)
+ */
+ @SuppressWarnings("PMD.ExcessiveParameterList")
+ private static int runSingle(
+ final String type,
+ final String pathStr,
+ final String repoName,
+ final boolean dryRun,
+ final String dbUrl,
+ final String dbUser,
+ final String dbPassword,
+ final int batchSize,
+ final String owner,
+ final int logInterval
+ ) {
+ if (type == null || pathStr == null || repoName == null) {
+ LOG.error(
+ "--type, --path, and --repo-name are all required in single-repo mode"
+ );
+ return 1;
+ }
+ final Path root = Paths.get(pathStr);
+ if (!Files.exists(root) || !Files.isDirectory(root)) {
+ LOG.error(
+ "Path does not exist or is not a directory: {}", pathStr
+ );
+ return 1;
+ }
+ if (!dryRun && (dbUrl == null || dbUrl.isEmpty())) {
+ LOG.error("--db-url is required unless --dry-run is set");
+ return 1;
+ }
+ final Scanner scanner;
+ try {
+ scanner = ScannerFactory.create(type);
+ } catch (final IllegalArgumentException ex) {
+ LOG.error(
+ "Invalid scanner type '{}': {}", type, ex.getMessage()
+ );
+ return 1;
+ }
+ LOG.info(
+ "Backfill starting: type={}, path={}, repo-name={}, "
+ + "batch-size={}, dry-run={}",
+ type, root, repoName, batchSize, dryRun
+ );
+ DataSource dataSource = null;
+ if (!dryRun) {
+ dataSource = buildDataSource(dbUrl, dbUser, dbPassword);
+ }
+ final ProgressReporter progress =
+ new ProgressReporter(logInterval);
+ try (BatchInserter inserter =
+ new BatchInserter(dataSource, batchSize, dryRun)) {
+ try (Stream stream =
+ scanner.scan(root, repoName)) {
+ stream
+ .map(rec -> new ArtifactRecord(
+ rec.repoType(), rec.repoName(), rec.name(),
+ rec.version(), rec.size(), rec.createdDate(),
+ rec.releaseDate(), owner, rec.pathPrefix()
+ ))
+ .forEach(record -> {
+ inserter.accept(record);
+ progress.increment();
+ });
+ }
+ } catch (final Exception ex) {
+ LOG.error("Backfill failed: {}", ex.getMessage(), ex);
+ return 1;
+ } finally {
+ closeDataSource(dataSource);
+ }
+ progress.printFinalSummary();
+ LOG.info("Backfill completed successfully");
+ return 0;
+ }
+
+ /**
+ * Build a HikariCP datasource.
+ *
+ * @param dbUrl JDBC URL
+ * @param dbUser DB user
+ * @param dbPassword DB password
+ * @return DataSource
+ */
+ private static DataSource buildDataSource(
+ final String dbUrl,
+ final String dbUser,
+ final String dbPassword
+ ) {
+ final HikariConfig config = new HikariConfig();
+ config.setJdbcUrl(dbUrl);
+ config.setUsername(dbUser);
+ config.setPassword(dbPassword);
+ config.setMaximumPoolSize(POOL_MAX_SIZE);
+ config.setMinimumIdle(POOL_MIN_IDLE);
+ config.setConnectionTimeout(POOL_CONN_TIMEOUT);
+ config.setIdleTimeout(POOL_IDLE_TIMEOUT);
+ config.setPoolName("Backfill-Pool");
+ return new HikariDataSource(config);
+ }
+
+ /**
+ * Close a HikariDataSource if non-null.
+ *
+ * @param dataSource DataSource to close (may be null)
+ */
+ private static void closeDataSource(final DataSource dataSource) {
+ if (dataSource instanceof HikariDataSource) {
+ ((HikariDataSource) dataSource).close();
+ }
+ }
+
+ /**
+ * Build the CLI option definitions.
+ *
+ * @return Options instance
+ */
+ private static Options buildOptions() {
+ final Options options = new Options();
+ options.addOption(
+ Option.builder("t").longOpt("type")
+ .hasArg().argName("TYPE")
+ .desc("Scanner type — single-repo mode (maven, docker, npm, "
+ + "pypi, go, helm, composer, file, etc.)")
+ .build()
+ );
+ options.addOption(
+ Option.builder("p").longOpt("path")
+ .hasArg().argName("PATH")
+ .desc("Root directory path to scan — single-repo mode")
+ .build()
+ );
+ options.addOption(
+ Option.builder("r").longOpt("repo-name")
+ .hasArg().argName("NAME")
+ .desc("Repository name — single-repo mode")
+ .build()
+ );
+ options.addOption(
+ Option.builder("C").longOpt("config-dir")
+ .hasArg().argName("DIR")
+ .desc("Directory of Artipie *.yaml repo configs — bulk mode")
+ .build()
+ );
+ options.addOption(
+ Option.builder("R").longOpt("storage-root")
+ .hasArg().argName("DIR")
+ .desc("Storage root; each repo lives at // "
+ + "— bulk mode")
+ .build()
+ );
+ options.addOption(
+ Option.builder().longOpt("db-url")
+ .hasArg().argName("URL")
+ .desc("JDBC PostgreSQL URL (required unless --dry-run)")
+ .build()
+ );
+ options.addOption(
+ Option.builder().longOpt("db-user")
+ .hasArg().argName("USER")
+ .desc("Database user (default: artipie)")
+ .build()
+ );
+ options.addOption(
+ Option.builder().longOpt("db-password")
+ .hasArg().argName("PASS")
+ .desc("Database password (default: artipie)")
+ .build()
+ );
+ options.addOption(
+ Option.builder("b").longOpt("batch-size")
+ .hasArg().argName("SIZE")
+ .desc("Batch insert size (default: 1000)")
+ .build()
+ );
+ options.addOption(
+ Option.builder().longOpt("owner")
+ .hasArg().argName("OWNER")
+ .desc("Default owner (default: system)")
+ .build()
+ );
+ options.addOption(
+ Option.builder().longOpt("log-interval")
+ .hasArg().argName("N")
+ .desc("Progress log interval (default: 10000)")
+ .build()
+ );
+ options.addOption(
+ Option.builder().longOpt("dry-run")
+ .desc("Scan only, do not write to database")
+ .build()
+ );
+ options.addOption(
+ Option.builder("h").longOpt("help")
+ .desc("Print help and exit")
+ .build()
+ );
+ return options;
+ }
+
+ /**
+ * Print usage help to stdout.
+ *
+ * @param options CLI options
+ */
+ private static void printHelp(final Options options) {
+ new HelpFormatter().printHelp(
+ "backfill-cli",
+ "Backfill the PostgreSQL artifacts table from disk storage",
+ options,
+ "",
+ true
+ );
+ }
+}
diff --git a/artipie-backfill/src/main/java/com/artipie/backfill/BatchInserter.java b/artipie-backfill/src/main/java/com/artipie/backfill/BatchInserter.java
new file mode 100644
index 000000000..695d31805
--- /dev/null
+++ b/artipie-backfill/src/main/java/com/artipie/backfill/BatchInserter.java
@@ -0,0 +1,304 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.sql.Types;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicLong;
+import javax.sql.DataSource;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Batches artifact records and inserts them into PostgreSQL using JDBC batch
+ * operations. Supports dry-run mode where records are counted but not
+ * persisted to the database.
+ *
+ *
On first call the {@code artifacts} table and its indexes are created
+ * if they do not already exist.
+ *
+ *
When a batch commit fails the inserter falls back to individual inserts
+ * so that a single bad record does not block the entire batch.
+ *
+ * @since 1.20.13
+ */
+public final class BatchInserter implements AutoCloseable {
+
+ /**
+ * SLF4J logger.
+ */
+ private static final Logger LOG =
+ LoggerFactory.getLogger(BatchInserter.class);
+
+ /**
+ * UPSERT SQL — must match DbConsumer parameter binding order exactly.
+ */
+ private static final String UPSERT_SQL = String.join(
+ " ",
+ "INSERT INTO artifacts",
+ "(repo_type, repo_name, name, version, size,",
+ "created_date, release_date, owner, path_prefix)",
+ "VALUES (?,?,?,?,?,?,?,?,?)",
+ "ON CONFLICT (repo_name, name, version)",
+ "DO UPDATE SET repo_type = EXCLUDED.repo_type,",
+ "size = EXCLUDED.size,",
+ "created_date = EXCLUDED.created_date,",
+ "release_date = EXCLUDED.release_date,",
+ "owner = EXCLUDED.owner,",
+ "path_prefix = COALESCE(EXCLUDED.path_prefix, artifacts.path_prefix)"
+ );
+
+ /**
+ * JDBC data source.
+ */
+ private final DataSource source;
+
+ /**
+ * Maximum number of records per batch.
+ */
+ private final int batchSize;
+
+ /**
+ * When {@code true} records are counted but not written to the database.
+ */
+ private final boolean dryRun;
+
+ /**
+ * Buffer of records awaiting the next flush.
+ */
+ private final List buffer;
+
+ /**
+ * Total records successfully inserted (or counted in dry-run mode).
+ */
+ private final AtomicLong insertedCount;
+
+ /**
+ * Total records that could not be inserted.
+ */
+ private final AtomicLong skippedCount;
+
+ /**
+ * Whether the table DDL has already been executed in this session.
+ */
+ private boolean tableCreated;
+
+ /**
+ * Ctor.
+ *
+ * @param source JDBC data source
+ * @param batchSize Maximum records per batch flush
+ * @param dryRun If {@code true}, count only — no DB writes
+ */
+ public BatchInserter(final DataSource source, final int batchSize,
+ final boolean dryRun) {
+ this.source = source;
+ this.batchSize = batchSize;
+ this.dryRun = dryRun;
+ this.buffer = new ArrayList<>(batchSize);
+ this.insertedCount = new AtomicLong(0L);
+ this.skippedCount = new AtomicLong(0L);
+ this.tableCreated = false;
+ }
+
+ /**
+ * Accept a single artifact record. The record is buffered internally
+ * and flushed automatically when the buffer reaches {@code batchSize}.
+ *
+ * @param record Artifact record to insert
+ */
+ public void accept(final ArtifactRecord record) {
+ this.buffer.add(record);
+ if (this.buffer.size() >= this.batchSize) {
+ this.flush();
+ }
+ }
+
+ /**
+ * Flush all buffered records to the database (or count them in dry-run).
+ */
+ public void flush() {
+ if (this.buffer.isEmpty()) {
+ return;
+ }
+ if (this.dryRun) {
+ this.insertedCount.addAndGet(this.buffer.size());
+ LOG.info("[dry-run] Would insert {} records (total: {})",
+ this.buffer.size(), this.insertedCount.get());
+ this.buffer.clear();
+ return;
+ }
+ this.ensureTable();
+ final List batch = new ArrayList<>(this.buffer);
+ this.buffer.clear();
+ try (Connection conn = this.source.getConnection()) {
+ conn.setAutoCommit(false);
+ try (PreparedStatement stmt = conn.prepareStatement(UPSERT_SQL)) {
+ for (final ArtifactRecord rec : batch) {
+ bindRecord(stmt, rec);
+ stmt.addBatch();
+ }
+ stmt.executeBatch();
+ conn.commit();
+ this.insertedCount.addAndGet(batch.size());
+ } catch (final SQLException ex) {
+ rollback(conn);
+ LOG.warn("Batch insert of {} records failed, falling back to "
+ + "individual inserts: {}", batch.size(), ex.getMessage());
+ this.insertIndividually(batch);
+ }
+ } catch (final SQLException ex) {
+ LOG.warn("Failed to obtain DB connection for batch of {} records: {}",
+ batch.size(), ex.getMessage());
+ this.skippedCount.addAndGet(batch.size());
+ }
+ }
+
+ /**
+ * Return total number of successfully inserted records.
+ *
+ * @return Inserted count
+ */
+ public long getInsertedCount() {
+ return this.insertedCount.get();
+ }
+
+ /**
+ * Return total number of records that were skipped due to errors.
+ *
+ * @return Skipped count
+ */
+ public long getSkippedCount() {
+ return this.skippedCount.get();
+ }
+
+ @Override
+ public void close() {
+ this.flush();
+ LOG.info("BatchInserter closed — inserted: {}, skipped: {}",
+ this.insertedCount.get(), this.skippedCount.get());
+ }
+
+ /**
+ * Ensure the artifacts table and performance indexes exist.
+ * Called once per session on the first real flush.
+ */
+ private void ensureTable() {
+ if (this.tableCreated) {
+ return;
+ }
+ try (Connection conn = this.source.getConnection();
+ Statement stmt = conn.createStatement()) {
+ stmt.executeUpdate(
+ String.join(
+ "\n",
+ "CREATE TABLE IF NOT EXISTS artifacts(",
+ " id BIGSERIAL PRIMARY KEY,",
+ " repo_type VARCHAR NOT NULL,",
+ " repo_name VARCHAR NOT NULL,",
+ " name VARCHAR NOT NULL,",
+ " version VARCHAR NOT NULL,",
+ " size BIGINT NOT NULL,",
+ " created_date BIGINT NOT NULL,",
+ " release_date BIGINT,",
+ " owner VARCHAR NOT NULL,",
+ " UNIQUE (repo_name, name, version)",
+ ");"
+ )
+ );
+ stmt.executeUpdate(
+ "CREATE INDEX IF NOT EXISTS idx_artifacts_repo_lookup "
+ + "ON artifacts(repo_name, name, version)"
+ );
+ stmt.executeUpdate(
+ "CREATE INDEX IF NOT EXISTS idx_artifacts_repo_type_name "
+ + "ON artifacts(repo_type, repo_name, name)"
+ );
+ stmt.executeUpdate(
+ "CREATE INDEX IF NOT EXISTS idx_artifacts_created_date "
+ + "ON artifacts(created_date)"
+ );
+ stmt.executeUpdate(
+ "CREATE INDEX IF NOT EXISTS idx_artifacts_owner "
+ + "ON artifacts(owner)"
+ );
+ this.tableCreated = true;
+ LOG.info("Artifacts table and indexes verified/created");
+ } catch (final SQLException ex) {
+ LOG.warn("Failed to create artifacts table: {}", ex.getMessage());
+ }
+ }
+
+ /**
+ * Fall back to inserting records one by one after a batch failure.
+ *
+ * @param records Records to insert individually
+ */
+ private void insertIndividually(final List records) {
+ for (final ArtifactRecord rec : records) {
+ try (Connection conn = this.source.getConnection();
+ PreparedStatement stmt = conn.prepareStatement(UPSERT_SQL)) {
+ conn.setAutoCommit(false);
+ bindRecord(stmt, rec);
+ stmt.executeUpdate();
+ conn.commit();
+ this.insertedCount.incrementAndGet();
+ } catch (final SQLException ex) {
+ LOG.warn("Individual insert failed for {}/{}:{} — {}",
+ rec.repoName(), rec.name(), rec.version(),
+ ex.getMessage());
+ this.skippedCount.incrementAndGet();
+ }
+ }
+ }
+
+ /**
+ * Bind an {@link ArtifactRecord} to a {@link PreparedStatement}.
+ * Parameter order must match the UPSERT_SQL and DbConsumer exactly.
+ *
+ * @param stmt Prepared statement
+ * @param rec Artifact record
+ * @throws SQLException On binding error
+ */
+ private static void bindRecord(final PreparedStatement stmt,
+ final ArtifactRecord rec) throws SQLException {
+ stmt.setString(1, rec.repoType());
+ stmt.setString(2, rec.repoName() == null
+ ? null : rec.repoName().trim());
+ stmt.setString(3, rec.name());
+ stmt.setString(4, rec.version());
+ stmt.setLong(5, rec.size());
+ stmt.setLong(6, rec.createdDate());
+ if (rec.releaseDate() == null) {
+ stmt.setNull(7, Types.BIGINT);
+ } else {
+ stmt.setLong(7, rec.releaseDate());
+ }
+ stmt.setString(8, rec.owner());
+ if (rec.pathPrefix() == null) {
+ stmt.setNull(9, Types.VARCHAR);
+ } else {
+ stmt.setString(9, rec.pathPrefix());
+ }
+ }
+
+ /**
+ * Attempt to rollback the current transaction, logging any failure.
+ *
+ * @param conn JDBC connection
+ */
+ private static void rollback(final Connection conn) {
+ try {
+ conn.rollback();
+ } catch (final SQLException ex) {
+ LOG.warn("Rollback failed: {}", ex.getMessage());
+ }
+ }
+}
diff --git a/artipie-backfill/src/main/java/com/artipie/backfill/BulkBackfillRunner.java b/artipie-backfill/src/main/java/com/artipie/backfill/BulkBackfillRunner.java
new file mode 100644
index 000000000..26258c500
--- /dev/null
+++ b/artipie-backfill/src/main/java/com/artipie/backfill/BulkBackfillRunner.java
@@ -0,0 +1,338 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Stream;
+import javax.sql.DataSource;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Orchestrates a bulk backfill run over a directory of Artipie repo configs.
+ *
+ *
For each {@code *.yaml} file found (non-recursively, sorted alphabetically)
+ * in the config directory, derives the repo name from the filename stem and the
+ * scanner type from {@code repo.type}, then runs the appropriate {@link Scanner}
+ * against {@code storageRoot//}.
+ *
+ *
Per-repo failures (parse errors, unknown types, missing storage, scan
+ * exceptions) are all non-fatal: they are logged, recorded in the summary,
+ * and the next repo is processed. Only a {@code FAILED} status (scan exception)
+ * contributes to a non-zero exit code.
+ *
+ * @since 1.20.13
+ */
+@SuppressWarnings("PMD.ExcessiveImports")
+final class BulkBackfillRunner {
+
+ /**
+ * SLF4J logger.
+ */
+ private static final Logger LOG =
+ LoggerFactory.getLogger(BulkBackfillRunner.class);
+
+ /**
+ * {@code .yaml} file extension constant.
+ */
+ private static final String YAML_EXT = ".yaml";
+
+ /**
+ * Directory containing {@code *.yaml} Artipie repo config files.
+ */
+ private final Path configDir;
+
+ /**
+ * Root directory under which each repo's data lives at
+ * {@code //}.
+ */
+ private final Path storageRoot;
+
+ /**
+ * Shared JDBC data source. May be {@code null} when {@code dryRun} is
+ * {@code true}.
+ */
+ private final DataSource dataSource;
+
+ /**
+ * Owner string applied to all inserted artifact records.
+ */
+ private final String owner;
+
+ /**
+ * Batch insert size.
+ */
+ private final int batchSize;
+
+ /**
+ * If {@code true} count records but do not write to the database.
+ */
+ private final boolean dryRun;
+
+ /**
+ * Progress log interval (log every N records per repo).
+ */
+ private final int logInterval;
+
+ /**
+ * Print stream for the summary table (typically {@code System.err}).
+ */
+ private final PrintStream out;
+
+ /**
+ * Ctor.
+ *
+ * @param configDir Directory of repo YAML configs
+ * @param storageRoot Root for repo storage directories
+ * @param dataSource JDBC data source (may be null when dryRun=true)
+ * @param owner Owner string for artifact records
+ * @param batchSize JDBC batch insert size
+ * @param dryRun If true, count only, no DB writes
+ * @param logInterval Progress log every N records
+ * @param out Stream for summary output (typically System.err)
+ * @checkstyle ParameterNumberCheck (10 lines)
+ */
+ @SuppressWarnings("PMD.ExcessiveParameterList")
+ BulkBackfillRunner(
+ final Path configDir,
+ final Path storageRoot,
+ final DataSource dataSource,
+ final String owner,
+ final int batchSize,
+ final boolean dryRun,
+ final int logInterval,
+ final PrintStream out
+ ) {
+ this.configDir = configDir;
+ this.storageRoot = storageRoot;
+ this.dataSource = dataSource;
+ this.owner = owner;
+ this.batchSize = batchSize;
+ this.dryRun = dryRun;
+ this.logInterval = logInterval;
+ this.out = out;
+ }
+
+ /**
+ * Run the bulk backfill over all {@code *.yaml} files in the config
+ * directory.
+ *
+ * @return Exit code: {@code 0} if all repos succeeded or were
+ * skipped/parse-errored, {@code 1} if any repo had a scan failure
+ * @throws IOException if the config directory cannot be listed
+ */
+ int run() throws IOException {
+ final List results = new ArrayList<>();
+ final Set seenNames = new HashSet<>();
+ final List yamlFiles = new ArrayList<>();
+ try (Stream listing = Files.list(this.configDir)) {
+ listing
+ .filter(Files::isRegularFile)
+ .forEach(p -> {
+ final String name = p.getFileName().toString();
+ if (name.endsWith(YAML_EXT)) {
+ yamlFiles.add(p);
+ } else if (name.endsWith(".yml")) {
+ LOG.debug(
+ "Skipping '{}' — use .yaml extension, not .yml",
+ p.getFileName()
+ );
+ }
+ });
+ }
+ yamlFiles.sort(Path::compareTo);
+ for (final Path file : yamlFiles) {
+ results.add(this.processFile(file, seenNames));
+ }
+ this.printSummary(results);
+ return results.stream()
+ .anyMatch(r -> r.status().startsWith("FAILED")) ? 1 : 0;
+ }
+
+ /**
+ * Process one YAML file and return a result row.
+ *
+ * @param file Path to the {@code .yaml} file
+ * @param seenNames Set of repo name stems already processed
+ * @return Result row for the summary table
+ */
+ private RepoResult processFile(
+ final Path file,
+ final Set seenNames
+ ) {
+ final String fileName = file.getFileName().toString();
+ final String stem = fileName.endsWith(YAML_EXT)
+ ? fileName.substring(0, fileName.length() - YAML_EXT.length())
+ : fileName;
+ if (!seenNames.add(stem)) {
+ LOG.warn(
+ "Duplicate repo name '{}' (from '{}'), skipping", stem, fileName
+ );
+ return new RepoResult(
+ stem, "-", -1L, -1L, "SKIPPED (duplicate repo name)"
+ );
+ }
+ final RepoEntry entry;
+ try {
+ entry = RepoConfigYaml.parse(file);
+ } catch (final IOException ex) {
+ LOG.warn("PARSE_ERROR for '{}': {}", fileName, ex.getMessage());
+ return new RepoResult(
+ stem, "-", -1L, -1L,
+ "PARSE_ERROR (" + ex.getMessage() + ")"
+ );
+ }
+ final String rawType = entry.rawType();
+ final Scanner scanner;
+ try {
+ scanner = ScannerFactory.create(rawType);
+ } catch (final IllegalArgumentException ex) {
+ LOG.warn(
+ "Unknown type '{}' for repo '{}', skipping",
+ rawType, stem
+ );
+ return new RepoResult(
+ stem, "[UNKNOWN]", -1L, -1L,
+ "SKIPPED (unknown type: " + rawType + ")"
+ );
+ }
+ final Path storagePath = this.storageRoot.resolve(stem);
+ if (!Files.exists(storagePath)) {
+ LOG.warn(
+ "Storage path missing for repo '{}': {}", stem, storagePath
+ );
+ return new RepoResult(
+ stem, rawType, -1L, -1L, "SKIPPED (storage path missing)"
+ );
+ }
+ return this.scanRepo(stem, rawType, scanner, storagePath);
+ }
+
+ /**
+ * Scan one repo directory and return a result row.
+ *
+ * @param repoName Repo name (for logging and record insertion)
+ * @param scannerType Normalised scanner type string (for display)
+ * @param scanner Scanner instance
+ * @param storagePath Root directory to scan
+ * @return Result row
+ */
+ private RepoResult scanRepo(
+ final String repoName,
+ final String scannerType,
+ final Scanner scanner,
+ final Path storagePath
+ ) {
+ LOG.info(
+ "Scanning repo '{}' (type={}) at {}",
+ repoName, scannerType, storagePath
+ );
+ final ProgressReporter reporter =
+ new ProgressReporter(this.logInterval);
+ long inserted = -1L;
+ long dbSkipped = -1L;
+ boolean failed = false;
+ String failMsg = null;
+ final BatchInserter inserter = new BatchInserter(
+ this.dataSource, this.batchSize, this.dryRun
+ );
+ try (
+ inserter;
+ Stream stream =
+ scanner.scan(storagePath, repoName)
+ ) {
+ stream
+ .map(r -> new ArtifactRecord(
+ r.repoType(), r.repoName(), r.name(),
+ r.version(), r.size(), r.createdDate(),
+ r.releaseDate(), this.owner, r.pathPrefix()
+ ))
+ .forEach(rec -> {
+ inserter.accept(rec);
+ reporter.increment();
+ });
+ } catch (final Exception ex) {
+ // inserter.close() was called by try-with-resources before this catch block.
+ // For FAILED rows, use -1L sentinel per design.
+ failed = true;
+ failMsg = ex.getMessage();
+ LOG.error(
+ "Scan FAILED for repo '{}': {}", repoName, ex.getMessage(), ex
+ );
+ }
+ // inserter.close() has been called (flushed remaining batch). Read final counts.
+ if (!failed) {
+ inserted = inserter.getInsertedCount();
+ dbSkipped = inserter.getSkippedCount();
+ }
+ reporter.printFinalSummary();
+ if (failed) {
+ return new RepoResult(
+ repoName, scannerType, -1L, -1L,
+ "FAILED (" + failMsg + ")"
+ );
+ }
+ return new RepoResult(repoName, scannerType, inserted, dbSkipped, "OK");
+ }
+
+ /**
+ * Print the summary table to the output stream.
+ *
+ * @param results List of result rows
+ */
+ private void printSummary(final List results) {
+ this.out.printf(
+ "%nBulk backfill complete — %d repos processed%n",
+ results.size()
+ );
+ for (final RepoResult row : results) {
+ final String counts;
+ if (row.inserted() < 0) {
+ counts = String.format("%-30s", "-");
+ } else {
+ counts = String.format(
+ "inserted=%-10d skipped=%-6d",
+ row.inserted(), row.dbSkipped()
+ );
+ }
+ this.out.printf(
+ " %-20s [%-12s] %s %s%n",
+ row.repoName(), row.displayType(), counts, row.status()
+ );
+ }
+ final long failCount = results.stream()
+ .filter(r -> r.status().startsWith("FAILED")).count();
+ if (failCount > 0) {
+ this.out.printf("%nExit code: 1 (%d repo(s) failed)%n", failCount);
+ } else {
+ this.out.println("\nExit code: 0");
+ }
+ }
+
+ /**
+ * One row in the bulk run summary.
+ *
+ * @param repoName Repo name
+ * @param displayType Type string for display
+ * @param inserted Records inserted (or -1 if not applicable)
+ * @param dbSkipped Records skipped at DB level (or -1 if not applicable)
+ * @param status Status string
+ */
+ private record RepoResult(
+ String repoName,
+ String displayType,
+ long inserted,
+ long dbSkipped,
+ String status
+ ) {
+ }
+}
diff --git a/artipie-backfill/src/main/java/com/artipie/backfill/ComposerScanner.java b/artipie-backfill/src/main/java/com/artipie/backfill/ComposerScanner.java
new file mode 100644
index 000000000..01f86ee93
--- /dev/null
+++ b/artipie-backfill/src/main/java/com/artipie/backfill/ComposerScanner.java
@@ -0,0 +1,373 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UncheckedIOException;
+import java.net.URI;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import javax.json.Json;
+import javax.json.JsonException;
+import javax.json.JsonObject;
+import javax.json.JsonReader;
+import javax.json.JsonValue;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Scanner for Composer (PHP) repositories.
+ *
+ *
Supports two layouts:
+ *
+ *
p2 (Satis-style): per-package JSON files under
+ * {@code p2/{vendor}/{package}.json}. Files ending with {@code ~dev.json}
+ * are skipped.
+ *
packages.json: a single root-level file containing
+ * all package metadata.
+ *
+ *
+ *
The p2 layout is checked first; if the {@code p2/} directory exists,
+ * {@code packages.json} is ignored even if present.
+ *
+ * @since 1.20.13
+ */
+final class ComposerScanner implements Scanner {
+
+ /**
+ * Logger.
+ */
+ private static final Logger LOG =
+ LoggerFactory.getLogger(ComposerScanner.class);
+
+ /**
+ * Repository type string stored in every produced artifact record
+ * (e.g. {@code "composer"} or {@code "php"}).
+ */
+ private final String repoType;
+
+ /**
+ * Ctor with default repo type {@code "composer"}.
+ */
+ ComposerScanner() {
+ this("composer");
+ }
+
+ /**
+ * Ctor.
+ *
+ * @param repoType Repository type string for artifact records
+ */
+ ComposerScanner(final String repoType) {
+ this.repoType = repoType;
+ }
+
+ @Override
+ public Stream scan(final Path root, final String repoName)
+ throws IOException {
+ final Path p2dir = root.resolve("p2");
+ if (Files.isDirectory(p2dir)) {
+ return this.scanP2(root, repoName, p2dir);
+ }
+ final Path packagesJson = root.resolve("packages.json");
+ if (Files.isRegularFile(packagesJson) && Files.size(packagesJson) > 0) {
+ final List from =
+ this.parseJsonFile(root, repoName, packagesJson)
+ .collect(Collectors.toList());
+ if (!from.isEmpty()) {
+ return from.stream();
+ }
+ LOG.debug(
+ "packages.json has no packages, trying vendor-dir layout"
+ );
+ }
+ return this.scanVendorDirs(root, repoName);
+ }
+
+ /**
+ * Scan the p2 directory layout. Walks all {@code .json} files under
+ * {@code p2/}, skipping any that end with {@code ~dev.json}.
+ *
+ * @param root Repository root directory
+ * @param repoName Logical repository name
+ * @param p2dir Path to the p2 directory
+ * @return Stream of artifact records
+ * @throws IOException If an I/O error occurs
+ */
+ private Stream scanP2(final Path root,
+ final String repoName, final Path p2dir) throws IOException {
+ return Files.walk(p2dir)
+ .filter(Files::isRegularFile)
+ .filter(path -> path.toString().endsWith(".json"))
+ .filter(path -> !path.getFileName().toString().endsWith("~dev.json"))
+ .flatMap(path -> this.parseJsonFile(root, repoName, path));
+ }
+
+ /**
+ * Scan the Artipie Composer proxy layout.
+ *
+ *
The Artipie Composer proxy caches per-package metadata as
+ * {@code {vendor}/{package}.json} files directly under the repository
+ * root (no {@code p2/} prefix). Each file uses the standard Composer
+ * {@code {"packages":{...}}} JSON format.
+ *
+ *
Files ending with {@code ~dev.json} and 0-byte files are skipped.
+ *
+ * @param root Repository root directory
+ * @param repoName Logical repository name
+ * @return Stream of artifact records
+ * @throws IOException If an I/O error occurs
+ */
+ private Stream scanVendorDirs(final Path root,
+ final String repoName) throws IOException {
+ return Files.list(root)
+ .filter(Files::isDirectory)
+ .filter(dir -> !dir.getFileName().toString().startsWith("."))
+ .flatMap(
+ vendorDir -> {
+ try {
+ return Files.list(vendorDir)
+ .filter(Files::isRegularFile)
+ .filter(path -> path.toString().endsWith(".json"))
+ .filter(
+ path -> !path.getFileName().toString().endsWith("~dev.json")
+ )
+ .filter(
+ path -> {
+ try {
+ return Files.size(path) > 0L;
+ } catch (final IOException ex) {
+ LOG.debug("Cannot stat {}, skipping: {}", path, ex.getMessage());
+ return false;
+ }
+ }
+ )
+ .flatMap(path -> this.parseJsonFile(root, repoName, path));
+ } catch (final IOException ex) {
+ throw new UncheckedIOException(ex);
+ }
+ }
+ );
+ }
+
+ /**
+ * Parse a single Composer JSON file and produce artifact records.
+ *
+ * @param root Repository root directory
+ * @param repoName Logical repository name
+ * @param jsonPath Path to the JSON file
+ * @return Stream of artifact records
+ */
+ private Stream parseJsonFile(final Path root,
+ final String repoName, final Path jsonPath) {
+ final JsonObject json;
+ try (InputStream input = Files.newInputStream(jsonPath);
+ JsonReader reader = Json.createReader(input)) {
+ json = reader.readObject();
+ } catch (final JsonException ex) {
+ LOG.warn("Malformed JSON in {}: {}", jsonPath, ex.getMessage());
+ return Stream.empty();
+ } catch (final IOException ex) {
+ throw new UncheckedIOException(ex);
+ }
+ if (!json.containsKey("packages")
+ || json.isNull("packages")
+ || json.get("packages").getValueType() != JsonValue.ValueType.OBJECT) {
+ LOG.debug("Missing or invalid 'packages' key in {}", jsonPath);
+ return Stream.empty();
+ }
+ final JsonObject packages = json.getJsonObject("packages");
+ final long mtime;
+ try {
+ mtime = Files.readAttributes(jsonPath, BasicFileAttributes.class)
+ .lastModifiedTime().toMillis();
+ } catch (final IOException ex) {
+ throw new UncheckedIOException(ex);
+ }
+ final boolean proxyMode = this.repoType.endsWith("-proxy");
+ final List records = new ArrayList<>();
+ for (final String packageName : packages.keySet()) {
+ if (packages.isNull(packageName)
+ || packages.get(packageName).getValueType()
+ != JsonValue.ValueType.OBJECT) {
+ LOG.debug("Skipping non-object package entry: {}", packageName);
+ continue;
+ }
+ final JsonObject versions = packages.getJsonObject(packageName);
+ for (final String version : versions.keySet()) {
+ if (versions.isNull(version)
+ || versions.get(version).getValueType()
+ != JsonValue.ValueType.OBJECT) {
+ LOG.debug(
+ "Skipping non-object version entry: {} {}",
+ packageName, version
+ );
+ continue;
+ }
+ final JsonObject versionObj = versions.getJsonObject(version);
+ // For proxy repos, only record versions that have cached
+ // dist artifacts on disk. The metadata JSON lists all upstream
+ // versions but only downloaded ones have actual files.
+ // Check both .zip (new format) and plain (legacy).
+ if (proxyMode) {
+ final Path distDir = root.resolve("dist")
+ .resolve(packageName);
+ final Path zipFile = distDir.resolve(version + ".zip");
+ final Path legacyFile = distDir.resolve(version);
+ if (!Files.exists(zipFile) && !Files.exists(legacyFile)) {
+ continue;
+ }
+ }
+ long size = ComposerScanner.resolveDistSize(
+ root, versionObj
+ );
+ // For proxy repos, if dist URL resolution failed, read size
+ // directly from the cached file on disk
+ if (size == 0L && proxyMode) {
+ final Path distDir = root.resolve("dist")
+ .resolve(packageName);
+ final Path zipFile = distDir.resolve(version + ".zip");
+ final Path legacyFile = distDir.resolve(version);
+ try {
+ if (Files.isRegularFile(zipFile)) {
+ size = Files.size(zipFile);
+ } else if (Files.isRegularFile(legacyFile)) {
+ size = Files.size(legacyFile);
+ }
+ } catch (final IOException ignored) {
+ // keep size = 0
+ }
+ }
+ final String pathPrefix = proxyMode
+ ? packageName + "/" + version : null;
+ records.add(
+ new ArtifactRecord(
+ this.repoType,
+ repoName,
+ packageName,
+ version,
+ size,
+ mtime,
+ null,
+ "system",
+ pathPrefix
+ )
+ );
+ }
+ }
+ return records.stream();
+ }
+
+ /**
+ * Resolve the dist artifact size for a version entry.
+ *
+ *
Tries to extract the {@code dist.url} field and resolve it as a
+ * local file path. For HTTP URLs the path component is extracted and
+ * attempted relative to the repository root. If the file cannot be
+ * found the size is 0.
+ *
+ * @param root Repository root directory
+ * @param versionObj Version metadata JSON object
+ * @return Size in bytes, or 0 if the artifact cannot be found
+ */
+ private static long resolveDistSize(final Path root,
+ final JsonObject versionObj) {
+ if (!versionObj.containsKey("dist")
+ || versionObj.isNull("dist")
+ || versionObj.get("dist").getValueType()
+ != JsonValue.ValueType.OBJECT) {
+ return 0L;
+ }
+ final JsonObject dist = versionObj.getJsonObject("dist");
+ if (!dist.containsKey("url")
+ || dist.isNull("url")
+ || dist.get("url").getValueType() != JsonValue.ValueType.STRING) {
+ return 0L;
+ }
+ final String url = dist.getString("url");
+ return ComposerScanner.sizeFromUrl(root, url);
+ }
+
+ /**
+ * Attempt to resolve a dist URL to a local file and return its size.
+ *
+ * @param root Repository root directory
+ * @param url The dist URL string
+ * @return File size in bytes, or 0 if the file is not found
+ */
+ private static long sizeFromUrl(final Path root, final String url) {
+ String localPath;
+ if (url.startsWith("http://") || url.startsWith("https://")) {
+ try {
+ localPath = URI.create(url).getPath();
+ } catch (final IllegalArgumentException ex) {
+ LOG.debug("Cannot parse dist URL '{}': {}", url, ex.getMessage());
+ return 0L;
+ }
+ } else {
+ localPath = url;
+ }
+ if (localPath == null || localPath.isEmpty()) {
+ return 0L;
+ }
+ if (localPath.startsWith("/")) {
+ localPath = localPath.substring(1);
+ }
+ final Path resolved = root.resolve(localPath);
+ if (Files.isRegularFile(resolved)) {
+ try {
+ return Files.size(resolved);
+ } catch (final IOException ex) {
+ LOG.debug("Cannot stat {}: {}", resolved, ex.getMessage());
+ return 0L;
+ }
+ }
+ final int lastSlash = localPath.lastIndexOf('/');
+ if (lastSlash >= 0) {
+ final String filename = localPath.substring(lastSlash + 1);
+ final Path fallback = root.resolve(filename);
+ if (Files.isRegularFile(fallback)) {
+ try {
+ return Files.size(fallback);
+ } catch (final IOException ex) {
+ LOG.debug(
+ "Cannot stat fallback {}: {}",
+ fallback, ex.getMessage()
+ );
+ return 0L;
+ }
+ }
+ }
+ // Final fallback: progressively strip leading path segments.
+ // Handles Artipie local PHP repos where the dist URL contains
+ // a full HTTP path like "/prefix/api/composer/repo/artifacts/...".
+ String stripped = localPath;
+ while (stripped.contains("/")) {
+ stripped = stripped.substring(stripped.indexOf('/') + 1);
+ if (stripped.isEmpty()) {
+ break;
+ }
+ final Path candidate = root.resolve(stripped);
+ if (Files.isRegularFile(candidate)) {
+ try {
+ return Files.size(candidate);
+ } catch (final IOException ex) {
+ LOG.debug(
+ "Cannot stat candidate {}: {}",
+ candidate, ex.getMessage()
+ );
+ return 0L;
+ }
+ }
+ }
+ return 0L;
+ }
+}
diff --git a/artipie-backfill/src/main/java/com/artipie/backfill/DebianScanner.java b/artipie-backfill/src/main/java/com/artipie/backfill/DebianScanner.java
new file mode 100644
index 000000000..9634b6b3e
--- /dev/null
+++ b/artipie-backfill/src/main/java/com/artipie/backfill/DebianScanner.java
@@ -0,0 +1,213 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.UncheckedIOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import java.util.zip.GZIPInputStream;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Scanner for Debian repositories.
+ *
+ *
Walks the repository directory tree to find {@code Packages} and
+ * {@code Packages.gz} index files under the standard Debian layout
+ * ({@code dists/{codename}/{component}/binary-{arch}/}). Each stanza
+ * in a Packages file describes one {@code .deb} package. The scanner
+ * extracts the {@code Package}, {@code Version}, and {@code Size}
+ * fields from each stanza.
+ *
+ *
When both {@code Packages} and {@code Packages.gz} exist in the
+ * same directory, only {@code Packages.gz} is used to avoid
+ * double-counting.
+ *
+ * @since 1.20.13
+ */
+final class DebianScanner implements Scanner {
+
+ /**
+ * Logger.
+ */
+ private static final Logger LOG =
+ LoggerFactory.getLogger(DebianScanner.class);
+
+ /**
+ * Name of the uncompressed Packages index file.
+ */
+ private static final String PACKAGES = "Packages";
+
+ /**
+ * Name of the gzip-compressed Packages index file.
+ */
+ private static final String PACKAGES_GZ = "Packages.gz";
+
+ @Override
+ public Stream scan(final Path root, final String repoName)
+ throws IOException {
+ final List indexFiles = Files.walk(root)
+ .filter(Files::isRegularFile)
+ .filter(DebianScanner::isPackagesFile)
+ .collect(Collectors.toList());
+ final List deduped = DebianScanner.dedup(indexFiles);
+ return deduped.stream()
+ .flatMap(path -> DebianScanner.parseIndex(path, repoName));
+ }
+
+ /**
+ * Check whether a file is a Packages or Packages.gz index file.
+ *
+ * @param path File path to check
+ * @return True if the filename is "Packages" or "Packages.gz"
+ */
+ private static boolean isPackagesFile(final Path path) {
+ final String name = path.getFileName().toString();
+ return PACKAGES.equals(name) || PACKAGES_GZ.equals(name);
+ }
+
+ /**
+ * Deduplicate index files by parent directory.
+ * When both Packages and Packages.gz exist in the same directory,
+ * prefer Packages.gz.
+ *
+ * @param files List of discovered index files
+ * @return Deduplicated list preferring .gz files
+ */
+ private static List dedup(final List files) {
+ final Map byParent = new HashMap<>();
+ for (final Path file : files) {
+ final Path parent = file.getParent();
+ final Path existing = byParent.get(parent);
+ if (existing == null) {
+ byParent.put(parent, file);
+ } else if (file.getFileName().toString().equals(PACKAGES_GZ)) {
+ byParent.put(parent, file);
+ }
+ }
+ return new ArrayList<>(byParent.values());
+ }
+
+ /**
+ * Parse a single Packages or Packages.gz file into artifact records.
+ *
+ * @param path Path to the index file
+ * @param repoName Logical repository name
+ * @return Stream of artifact records parsed from the index
+ */
+ private static Stream parseIndex(final Path path,
+ final String repoName) {
+ try {
+ final long mtime = Files.getLastModifiedTime(path).toMillis();
+ final List records = new ArrayList<>();
+ try (
+ InputStream fis = Files.newInputStream(path);
+ InputStream input = path.getFileName().toString().equals(PACKAGES_GZ)
+ ? new GZIPInputStream(fis) : fis;
+ BufferedReader reader = new BufferedReader(
+ new InputStreamReader(input, StandardCharsets.UTF_8)
+ )
+ ) {
+ String pkg = null;
+ String version = null;
+ String arch = null;
+ long size = 0L;
+ String line = reader.readLine();
+ while (line != null) {
+ if (line.isEmpty()) {
+ if (pkg != null && version != null) {
+ records.add(
+ new ArtifactRecord(
+ "deb",
+ repoName,
+ DebianScanner.formatName(pkg, arch),
+ version,
+ size,
+ mtime,
+ null,
+ "system",
+ null
+ )
+ );
+ } else if (pkg != null || version != null) {
+ LOG.debug(
+ "Skipping incomplete stanza (Package={}, Version={}) in {}",
+ pkg, version, path
+ );
+ }
+ pkg = null;
+ version = null;
+ arch = null;
+ size = 0L;
+ } else if (line.startsWith("Package:")) {
+ pkg = line.substring("Package:".length()).trim();
+ } else if (line.startsWith("Version:")) {
+ version = line.substring("Version:".length()).trim();
+ } else if (line.startsWith("Architecture:")) {
+ arch = line.substring("Architecture:".length()).trim();
+ } else if (line.startsWith("Size:")) {
+ try {
+ size = Long.parseLong(
+ line.substring("Size:".length()).trim()
+ );
+ } catch (final NumberFormatException ex) {
+ LOG.debug(
+ "Invalid Size value in {}: {}",
+ path, line
+ );
+ size = 0L;
+ }
+ }
+ line = reader.readLine();
+ }
+ if (pkg != null && version != null) {
+ records.add(
+ new ArtifactRecord(
+ "deb",
+ repoName,
+ DebianScanner.formatName(pkg, arch),
+ version,
+ size,
+ mtime,
+ null,
+ "system",
+ null
+ )
+ );
+ }
+ }
+ return records.stream();
+ } catch (final IOException ex) {
+ throw new UncheckedIOException(ex);
+ }
+ }
+
+ /**
+ * Format the artifact name. The Debian adapter stores artifact names
+ * as {@code package_architecture} (e.g. {@code curl_amd64}).
+ * If architecture is missing, uses just the package name.
+ *
+ * @param pkg Package name
+ * @param arch Architecture string, or null if not present
+ * @return Formatted name
+ */
+ private static String formatName(final String pkg, final String arch) {
+ if (arch != null && !arch.isEmpty()) {
+ return String.join("_", pkg, arch);
+ }
+ return pkg;
+ }
+}
diff --git a/artipie-backfill/src/main/java/com/artipie/backfill/DockerScanner.java b/artipie-backfill/src/main/java/com/artipie/backfill/DockerScanner.java
new file mode 100644
index 000000000..ba677626b
--- /dev/null
+++ b/artipie-backfill/src/main/java/com/artipie/backfill/DockerScanner.java
@@ -0,0 +1,386 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Stream;
+import javax.json.Json;
+import javax.json.JsonArray;
+import javax.json.JsonException;
+import javax.json.JsonObject;
+import javax.json.JsonReader;
+import javax.json.JsonValue;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Scanner for Docker v2 registry repositories.
+ *
+ *
Walks the Docker registry storage layout looking for image repositories
+ * under {@code repositories/}, reads tag link files to resolve manifest
+ * digests, and parses manifest JSON to compute artifact sizes.
+ *
+ * @since 1.20.13
+ */
+final class DockerScanner implements Scanner {
+
+ /**
+ * Logger.
+ */
+ private static final Logger LOG =
+ LoggerFactory.getLogger(DockerScanner.class);
+
+ /**
+ * Name of the manifests metadata directory.
+ */
+ private static final String MANIFESTS_DIR = "_manifests";
+
+ /**
+ * Name of the tags subdirectory.
+ */
+ private static final String TAGS_DIR = "tags";
+
+ /**
+ * Repository type string stored in every produced artifact record
+ * (e.g. {@code "docker"} or {@code "docker-proxy"}).
+ */
+ private final String repoType;
+
+ /**
+ * When {@code true} this is a proxy repo — image names match the
+ * upstream pull path with no prefix. When {@code false} (local/hosted)
+ * the Artipie Docker push path includes the registry name in the image
+ * path, so we prepend {@code repoName + "/"} to match production.
+ */
+ private final boolean isProxy;
+
+ /**
+ * Ctor for local (hosted) Docker repos.
+ */
+ DockerScanner() {
+ this("docker", false);
+ }
+
+ /**
+ * Ctor.
+ *
+ * @param isProxy {@code true} for proxy repos, {@code false} for local
+ */
+ DockerScanner(final boolean isProxy) {
+ this(isProxy ? "docker-proxy" : "docker", isProxy);
+ }
+
+ /**
+ * Ctor.
+ *
+ * @param repoType Repository type string for artifact records
+ * @param isProxy {@code true} for proxy repos, {@code false} for local
+ */
+ DockerScanner(final String repoType, final boolean isProxy) {
+ this.repoType = repoType;
+ this.isProxy = isProxy;
+ }
+
+ @Override
+ public Stream scan(final Path root, final String repoName)
+ throws IOException {
+ final Path reposDir = DockerScanner.resolveReposDir(root);
+ if (reposDir == null) {
+ LOG.warn("No repositories directory found under {}", root);
+ return Stream.empty();
+ }
+ final Path blobsRoot = reposDir.getParent().resolve("blobs");
+ final List images = DockerScanner.findImages(reposDir);
+ final List records = new ArrayList<>();
+ for (final Path imageDir : images) {
+ final String rawImageName =
+ reposDir.relativize(imageDir).toString();
+ final String imageName = this.isProxy
+ ? rawImageName
+ : repoName + "/" + rawImageName;
+ final Path tagsDir = imageDir
+ .resolve(DockerScanner.MANIFESTS_DIR)
+ .resolve(DockerScanner.TAGS_DIR);
+ if (!Files.isDirectory(tagsDir)) {
+ continue;
+ }
+ try (Stream tagDirs = Files.list(tagsDir)) {
+ final List tagList = tagDirs
+ .filter(Files::isDirectory)
+ .toList();
+ for (final Path tagDir : tagList) {
+ final ArtifactRecord record = this.processTag(
+ blobsRoot, repoName, imageName, tagDir
+ );
+ if (record != null) {
+ records.add(record);
+ }
+ }
+ }
+ }
+ return records.stream();
+ }
+
+ /**
+ * Resolve the repositories directory. Checks common Docker registry
+ * v2 layouts:
+ *
+ *
{@code root/repositories/}
+ *
{@code root/docker/registry/v2/repositories/}
+ *
+ * Falls back to walking for a directory named {@code repositories}
+ * that contains image dirs with {@code _manifests/}.
+ *
+ * @param root Registry root path
+ * @return Path to the repositories directory, or null if not found
+ * @throws IOException If an I/O error occurs during directory walk
+ */
+ private static Path resolveReposDir(final Path root) throws IOException {
+ final Path direct = root.resolve("repositories");
+ if (Files.isDirectory(direct)) {
+ return direct;
+ }
+ final Path v2 = root.resolve("docker/registry/v2/repositories");
+ if (Files.isDirectory(v2)) {
+ return v2;
+ }
+ try (Stream walk = Files.walk(root)) {
+ return walk.filter(Files::isDirectory)
+ .filter(
+ p -> "repositories".equals(p.getFileName().toString())
+ )
+ .findFirst()
+ .orElse(null);
+ }
+ }
+
+ /**
+ * Walk the repositories directory to find all image directories.
+ * An image directory is one that contains {@code _manifests/tags/}.
+ *
+ * @param reposDir The repositories root directory
+ * @return List of image directory paths
+ * @throws IOException If an I/O error occurs
+ */
+ private static List findImages(final Path reposDir)
+ throws IOException {
+ final List images = new ArrayList<>();
+ try (Stream walker = Files.walk(reposDir)) {
+ walker.filter(Files::isDirectory)
+ .filter(
+ dir -> {
+ final Path manifests = dir
+ .resolve(DockerScanner.MANIFESTS_DIR)
+ .resolve(DockerScanner.TAGS_DIR);
+ return Files.isDirectory(manifests);
+ }
+ )
+ .forEach(images::add);
+ }
+ return images;
+ }
+
+ /**
+ * Process a single tag directory and produce an artifact record.
+ *
+ * @param blobsRoot Path to the blobs directory
+ * @param repoName Logical repository name
+ * @param imageName Image name (relative path from repositories dir)
+ * @param tagDir Tag directory path
+ * @return ArtifactRecord, or null if tag should be skipped
+ */
+ private ArtifactRecord processTag(final Path blobsRoot,
+ final String repoName, final String imageName, final Path tagDir) {
+ final String tag = tagDir.getFileName().toString();
+ final Path linkFile = tagDir.resolve("current").resolve("link");
+ if (!Files.isRegularFile(linkFile)) {
+ LOG.debug("No link file at {}", linkFile);
+ return null;
+ }
+ final String digest;
+ try {
+ digest = Files.readString(linkFile, StandardCharsets.UTF_8).trim();
+ } catch (final IOException ex) {
+ LOG.warn("Cannot read link file {}: {}", linkFile, ex.getMessage());
+ return null;
+ }
+ if (digest.isEmpty()) {
+ LOG.debug("Empty link file at {}", linkFile);
+ return null;
+ }
+ final long createdDate = DockerScanner.linkMtime(linkFile);
+ final long size = DockerScanner.resolveSize(blobsRoot, digest);
+ return new ArtifactRecord(
+ this.repoType,
+ repoName,
+ imageName,
+ tag,
+ size,
+ createdDate,
+ null,
+ "system",
+ null
+ );
+ }
+
+ /**
+ * Resolve the total size of an artifact from its manifest digest.
+ * For image manifests with layers, sums config.size + layers[].size.
+ * For manifest lists, uses the manifest blob file's own size.
+ * Returns 0 if the blob is missing or manifest is corrupt.
+ *
+ * @param blobsRoot Path to the blobs directory
+ * @param digest Digest string like "sha256:abc123..."
+ * @return Total size in bytes
+ */
+ private static long resolveSize(final Path blobsRoot,
+ final String digest) {
+ final Path blobPath = DockerScanner.digestToPath(blobsRoot, digest);
+ if (blobPath == null || !Files.isRegularFile(blobPath)) {
+ LOG.debug("Blob not found for digest {}", digest);
+ return 0L;
+ }
+ final JsonObject manifest;
+ try (InputStream input = Files.newInputStream(blobPath);
+ JsonReader reader = Json.createReader(input)) {
+ manifest = reader.readObject();
+ } catch (final JsonException ex) {
+ LOG.warn(
+ "Corrupted manifest JSON for digest {}: {}",
+ digest, ex.getMessage()
+ );
+ return 0L;
+ } catch (final IOException ex) {
+ LOG.warn("Cannot read blob {}: {}", blobPath, ex.getMessage());
+ return 0L;
+ }
+ if (manifest.containsKey("manifests")
+ && manifest.get("manifests").getValueType()
+ == JsonValue.ValueType.ARRAY) {
+ return DockerScanner.resolveManifestListSize(
+ blobsRoot, manifest.getJsonArray("manifests")
+ );
+ }
+ return DockerScanner.sumLayersAndConfig(manifest);
+ }
+
+ /**
+ * Sum config.size and all layers[].size from an image manifest.
+ *
+ * @param manifest Parsed manifest JSON object
+ * @return Total size in bytes, or 0 if fields are missing
+ */
+ private static long sumLayersAndConfig(final JsonObject manifest) {
+ long total = 0L;
+ if (manifest.containsKey("config")
+ && manifest.get("config").getValueType()
+ == JsonValue.ValueType.OBJECT) {
+ final JsonObject config = manifest.getJsonObject("config");
+ if (config.containsKey("size")) {
+ total += config.getJsonNumber("size").longValue();
+ }
+ }
+ if (manifest.containsKey("layers")
+ && manifest.get("layers").getValueType()
+ == JsonValue.ValueType.ARRAY) {
+ final JsonArray layers = manifest.getJsonArray("layers");
+ for (final JsonValue layer : layers) {
+ if (layer.getValueType() == JsonValue.ValueType.OBJECT) {
+ final JsonObject layerObj = layer.asJsonObject();
+ if (layerObj.containsKey("size")) {
+ total += layerObj.getJsonNumber("size").longValue();
+ }
+ }
+ }
+ }
+ return total;
+ }
+
+ /**
+ * Resolve the total size of a manifest list by summing the sizes
+ * of all child image manifests' layers and configs.
+ *
+ * @param blobsRoot Path to the blobs directory
+ * @param children The "manifests" JSON array from the manifest list
+ * @return Total size in bytes across all child manifests
+ */
+ private static long resolveManifestListSize(final Path blobsRoot,
+ final JsonArray children) {
+ long total = 0L;
+ for (final JsonValue entry : children) {
+ if (entry.getValueType() != JsonValue.ValueType.OBJECT) {
+ continue;
+ }
+ final JsonObject child = entry.asJsonObject();
+ final String childDigest = child.getString("digest", null);
+ if (childDigest == null || childDigest.isEmpty()) {
+ continue;
+ }
+ final Path childPath =
+ DockerScanner.digestToPath(blobsRoot, childDigest);
+ if (childPath == null || !Files.isRegularFile(childPath)) {
+ LOG.debug("Child manifest blob not found: {}", childDigest);
+ continue;
+ }
+ try (InputStream input = Files.newInputStream(childPath);
+ JsonReader reader = Json.createReader(input)) {
+ final JsonObject childManifest = reader.readObject();
+ total += DockerScanner.sumLayersAndConfig(childManifest);
+ } catch (final JsonException | IOException ex) {
+ LOG.warn("Cannot read child manifest {}: {}",
+ childDigest, ex.getMessage());
+ }
+ }
+ return total;
+ }
+
+ /**
+ * Convert a digest string to a blob file path.
+ *
+ * @param blobsRoot Root blobs directory
+ * @param digest Digest like "sha256:abc123def..."
+ * @return Path to the data file, or null if digest format is invalid
+ */
+ private static Path digestToPath(final Path blobsRoot,
+ final String digest) {
+ final String[] parts = digest.split(":", 2);
+ if (parts.length != 2 || parts[1].length() < 2) {
+ LOG.warn("Invalid digest format: {}", digest);
+ return null;
+ }
+ final String algorithm = parts[0];
+ final String hex = parts[1];
+ return blobsRoot
+ .resolve(algorithm)
+ .resolve(hex.substring(0, 2))
+ .resolve(hex)
+ .resolve("data");
+ }
+
+ /**
+ * Get the last-modified time of the link file as epoch millis.
+ *
+ * @param linkFile Path to the link file
+ * @return Epoch millis
+ */
+ private static long linkMtime(final Path linkFile) {
+ try {
+ return Files.readAttributes(linkFile, BasicFileAttributes.class)
+ .lastModifiedTime().toMillis();
+ } catch (final IOException ex) {
+ LOG.debug(
+ "Cannot read mtime of {}: {}", linkFile, ex.getMessage()
+ );
+ return System.currentTimeMillis();
+ }
+ }
+}
diff --git a/artipie-backfill/src/main/java/com/artipie/backfill/FileScanner.java b/artipie-backfill/src/main/java/com/artipie/backfill/FileScanner.java
new file mode 100644
index 000000000..9781ddaa9
--- /dev/null
+++ b/artipie-backfill/src/main/java/com/artipie/backfill/FileScanner.java
@@ -0,0 +1,101 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.stream.Stream;
+
+/**
+ * Scanner for generic file repositories.
+ *
+ *
Walks the directory tree rooted at the given path, filters out
+ * hidden files (names starting with {@code .}), and maps every
+ * regular file to an {@link ArtifactRecord} with {@code repoType="file"},
+ * an empty version string, the file size, and the last-modified time
+ * as the creation date.
+ *
+ * @since 1.20.13
+ */
+final class FileScanner implements Scanner {
+
+ /**
+ * Repository type string stored in every produced artifact record
+ * (e.g. {@code "file"} or {@code "file-proxy"}).
+ */
+ private final String repoType;
+
+ /**
+ * Owner string to set on every produced record.
+ */
+ private final String owner;
+
+ /**
+ * Ctor with default repo type {@code "file"} and owner {@code "system"}.
+ */
+ FileScanner() {
+ this("file", "system");
+ }
+
+ /**
+ * Ctor with given repo type and default owner {@code "system"}.
+ *
+ * @param repoType Repository type string for artifact records
+ */
+ FileScanner(final String repoType) {
+ this(repoType, "system");
+ }
+
+ /**
+ * Ctor.
+ *
+ * @param repoType Repository type string for artifact records
+ * @param owner Owner identifier for produced records
+ */
+ FileScanner(final String repoType, final String owner) {
+ this.repoType = repoType;
+ this.owner = owner;
+ }
+
+ @Override
+ public Stream scan(final Path root, final String repoName)
+ throws IOException {
+ return Files.walk(root)
+ .filter(Files::isRegularFile)
+ .filter(path -> !path.getFileName().toString().startsWith("."))
+ .map(path -> this.toRecord(root, repoName, path));
+ }
+
+ /**
+ * Convert a file path to an artifact record.
+ *
+ * @param root Repository root directory
+ * @param repoName Logical repository name
+ * @param path File path
+ * @return Artifact record
+ */
+ private ArtifactRecord toRecord(final Path root, final String repoName,
+ final Path path) {
+ try {
+ final String relative = root.relativize(path)
+ .toString().replace('\\', '/').replace('/', '.');
+ return new ArtifactRecord(
+ this.repoType,
+ repoName,
+ relative,
+ "UNKNOWN",
+ Files.size(path),
+ Files.getLastModifiedTime(path).toMillis(),
+ null,
+ this.owner,
+ null
+ );
+ } catch (final IOException ex) {
+ throw new UncheckedIOException(ex);
+ }
+ }
+}
diff --git a/artipie-backfill/src/main/java/com/artipie/backfill/GemScanner.java b/artipie-backfill/src/main/java/com/artipie/backfill/GemScanner.java
new file mode 100644
index 000000000..bcd971d0e
--- /dev/null
+++ b/artipie-backfill/src/main/java/com/artipie/backfill/GemScanner.java
@@ -0,0 +1,122 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Stream;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Scanner for Ruby gem repositories.
+ *
+ *
Walks the repository directory tree looking for {@code .gem} files.
+ * If a {@code gems/} subdirectory exists under the root, only that
+ * subdirectory is scanned; otherwise the root itself is scanned
+ * (flat layout). Each {@code .gem} filename is parsed with a regex
+ * to extract the gem name and version.
+ *
+ *
The filename convention is
+ * {@code {name}-{version}(-{platform}).gem}. Gem names may contain
+ * hyphens (e.g. {@code net-http}, {@code ruby-ole}), so the version
+ * is identified as the first hyphen-separated segment that starts
+ * with a digit.
+ *
+ * @since 1.20.13
+ */
+final class GemScanner implements Scanner {
+
+ /**
+ * Logger.
+ */
+ private static final Logger LOG =
+ LoggerFactory.getLogger(GemScanner.class);
+
+ /**
+ * Pattern for gem filenames.
+ * Captures the gem name (which may contain hyphens) and the
+ * version (which starts with a digit). An optional platform
+ * suffix (e.g. {@code -x86_64-linux}) is allowed but not
+ * captured.
+ * Examples:
+ *
+ */
+ private static final Pattern GEM_PATTERN = Pattern.compile(
+ "^(?.+?)-(?\\d[A-Za-z0-9._]*)(?:-[A-Za-z0-9_]+(?:-[A-Za-z0-9_]+)*)?[.]gem$"
+ );
+
+ /**
+ * Name of the standard gems subdirectory.
+ */
+ private static final String GEMS_DIR = "gems";
+
+ @Override
+ public Stream scan(final Path root, final String repoName)
+ throws IOException {
+ final Path base;
+ if (Files.isDirectory(root.resolve(GemScanner.GEMS_DIR))) {
+ base = root.resolve(GemScanner.GEMS_DIR);
+ } else {
+ base = root;
+ }
+ return Files.walk(base, 1)
+ .filter(Files::isRegularFile)
+ .filter(path -> !path.getFileName().toString().startsWith("."))
+ .filter(path -> path.getFileName().toString().endsWith(".gem"))
+ .flatMap(path -> this.tryParse(repoName, path));
+ }
+
+ /**
+ * Attempt to parse a gem file path into an artifact record.
+ *
+ * @param repoName Logical repository name
+ * @param path File path to parse
+ * @return Stream with a single record, or empty if filename does not match
+ */
+ private Stream tryParse(final String repoName,
+ final Path path) {
+ final String filename = path.getFileName().toString();
+ final Matcher matcher = GEM_PATTERN.matcher(filename);
+ if (!matcher.matches()) {
+ LOG.debug(
+ "Skipping non-conforming gem filename: {}", filename
+ );
+ return Stream.empty();
+ }
+ final String name = matcher.group("name");
+ final String version = matcher.group("version");
+ try {
+ final BasicFileAttributes attrs = Files.readAttributes(
+ path, BasicFileAttributes.class
+ );
+ return Stream.of(
+ new ArtifactRecord(
+ "gem",
+ repoName,
+ name,
+ version,
+ attrs.size(),
+ attrs.lastModifiedTime().toMillis(),
+ null,
+ "system",
+ null
+ )
+ );
+ } catch (final IOException ex) {
+ throw new UncheckedIOException(ex);
+ }
+ }
+}
diff --git a/artipie-backfill/src/main/java/com/artipie/backfill/GoScanner.java b/artipie-backfill/src/main/java/com/artipie/backfill/GoScanner.java
new file mode 100644
index 000000000..10b088322
--- /dev/null
+++ b/artipie-backfill/src/main/java/com/artipie/backfill/GoScanner.java
@@ -0,0 +1,331 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UncheckedIOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Stream;
+import javax.json.Json;
+import javax.json.JsonException;
+import javax.json.JsonObject;
+import javax.json.JsonReader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Scanner for Go module repositories.
+ *
+ *
Walks every {@code @v} directory in the tree. For each one:
+ *
+ *
If a {@code list} file is present, versions are read from it
+ * and the corresponding {@code .zip} files are resolved.
+ *
Otherwise, all {@code .zip} files in the directory are
+ * enumerated directly. The paired {@code .info} file is used
+ * for date resolution when available.
+ *
+ *
This per-directory dispatch ensures proxy repos where some modules
+ * have a {@code list} file and others do not are both captured.
+ *
+ * @since 1.20.13
+ */
+final class GoScanner implements Scanner {
+
+ /**
+ * Logger.
+ */
+ private static final Logger LOG =
+ LoggerFactory.getLogger(GoScanner.class);
+
+ /**
+ * Repository type string stored in every produced artifact record
+ * (e.g. {@code "go"} or {@code "go-proxy"}).
+ */
+ private final String repoType;
+
+ /**
+ * Ctor with default repo type {@code "go"}.
+ */
+ GoScanner() {
+ this("go");
+ }
+
+ /**
+ * Ctor.
+ *
+ * @param repoType Repository type string for artifact records
+ */
+ GoScanner(final String repoType) {
+ this.repoType = repoType;
+ }
+
+ @Override
+ public Stream scan(final Path root, final String repoName)
+ throws IOException {
+ final List records = new ArrayList<>();
+ try (Stream walk = Files.walk(root)) {
+ walk.filter(Files::isDirectory)
+ .filter(p -> "@v".equals(p.getFileName().toString()))
+ .forEach(atVDir -> {
+ final Path listFile = atVDir.resolve("list");
+ if (Files.isRegularFile(listFile)) {
+ this.processListFile(root, repoName, listFile)
+ .forEach(records::add);
+ } else {
+ this.processZipDir(root, repoName, atVDir)
+ .forEach(records::add);
+ }
+ });
+ }
+ return records.stream();
+ }
+
+ /**
+ * Enumerate {@code .zip} files in an {@code @v} directory that has no
+ * {@code list} file (proxy-cached module with no version list).
+ *
+ *
The paired {@code .info} file is used for date resolution when
+ * present; falls back to the zip file mtime.
+ *
+ * @param root Repository root
+ * @param repoName Logical repository name
+ * @param atVDir The {@code @v} directory to scan
+ * @return Stream of artifact records
+ */
+ private Stream processZipDir(final Path root,
+ final String repoName, final Path atVDir) {
+ final Path moduleDir = atVDir.getParent();
+ final String modulePath = root.relativize(moduleDir)
+ .toString().replace('\\', '/');
+ final List records = new ArrayList<>();
+ try (Stream dirStream = Files.list(atVDir)) {
+ dirStream.filter(Files::isRegularFile)
+ .filter(p -> p.getFileName().toString().endsWith(".zip"))
+ .forEach(zipFile -> {
+ final String fname = zipFile.getFileName().toString();
+ final String version = fname.substring(
+ 0, fname.length() - ".zip".length()
+ );
+ if (version.isEmpty()) {
+ return;
+ }
+ final long createdDate = GoScanner.resolveCreatedDate(
+ atVDir, version, GoScanner.fileMtime(zipFile)
+ );
+ final long size = GoScanner.resolveZipSize(atVDir, version);
+ final String stripped = GoScanner.stripV(version);
+ final String pathPrefix = this.repoType.endsWith("-proxy")
+ ? modulePath + "/@v/" + stripped : null;
+ records.add(new ArtifactRecord(
+ this.repoType, repoName, modulePath, stripped,
+ size, createdDate, null, "system", pathPrefix
+ ));
+ });
+ } catch (final IOException ex) {
+ LOG.debug("Cannot list @v dir {}: {}", atVDir, ex.getMessage());
+ }
+ return records.stream();
+ }
+
+ /**
+ * Process a single {@code @v/list} file and produce artifact records
+ * for every version listed inside it.
+ *
+ * @param root Repository root directory
+ * @param repoName Logical repository name
+ * @param listFile Path to the {@code @v/list} file
+ * @return Stream of artifact records, one per version
+ */
+ private Stream processListFile(final Path root,
+ final String repoName, final Path listFile) {
+ final Path atVDir = listFile.getParent();
+ final Path moduleDir = atVDir.getParent();
+ final String modulePath = root.relativize(moduleDir).toString()
+ .replace('\\', '/');
+ final List lines;
+ try {
+ lines = Files.readAllLines(listFile);
+ } catch (final IOException ex) {
+ throw new UncheckedIOException(ex);
+ }
+ final long listMtime = GoScanner.fileMtime(listFile);
+ final List records = new ArrayList<>();
+ final boolean hasVersions =
+ lines.stream().anyMatch(l -> !l.trim().isEmpty());
+ if (hasVersions) {
+ for (final String line : lines) {
+ final String version = line.trim();
+ if (version.isEmpty()) {
+ continue;
+ }
+ final Path zipFile = atVDir.resolve(
+ String.format("%s.zip", version)
+ );
+ if (!Files.isRegularFile(zipFile)) {
+ LOG.debug(
+ "Skipping {} {} — zip not cached", modulePath, version
+ );
+ continue;
+ }
+ final long createdDate = GoScanner.resolveCreatedDate(
+ atVDir, version, listMtime
+ );
+ final long size = GoScanner.resolveZipSize(atVDir, version);
+ final String stripped = GoScanner.stripV(version);
+ final String pathPrefix = this.repoType.endsWith("-proxy")
+ ? modulePath + "/@v/" + stripped : null;
+ records.add(
+ new ArtifactRecord(
+ this.repoType,
+ repoName,
+ modulePath,
+ stripped,
+ size,
+ createdDate,
+ null,
+ "system",
+ pathPrefix
+ )
+ );
+ }
+ } else {
+ // Empty list file — scan @v directory directly for .zip files.
+ // Proxy-cached modules where only a specific version was fetched
+ // (no list request) will have an empty list but a present .zip.
+ try (Stream dirStream = Files.list(atVDir)) {
+ dirStream.filter(Files::isRegularFile)
+ .filter(p -> p.getFileName().toString().endsWith(".zip"))
+ .forEach(zipFile -> {
+ final String fname = zipFile.getFileName().toString();
+ final String ver = fname.substring(
+ 0, fname.length() - ".zip".length()
+ );
+ if (ver.isEmpty()) {
+ return;
+ }
+ final long createdDate = GoScanner.resolveCreatedDate(
+ atVDir, ver, listMtime
+ );
+ final long size =
+ GoScanner.resolveZipSize(atVDir, ver);
+ final String stripped = GoScanner.stripV(ver);
+ final String pathPrefix = this.repoType.endsWith("-proxy")
+ ? modulePath + "/@v/" + stripped : null;
+ records.add(new ArtifactRecord(
+ this.repoType, repoName, modulePath, stripped,
+ size, createdDate, null, "system", pathPrefix
+ ));
+ });
+ } catch (final IOException ex) {
+ LOG.debug(
+ "Cannot list @v dir {}: {}", atVDir, ex.getMessage()
+ );
+ }
+ }
+ return records.stream();
+ }
+
+ /**
+ * Resolve the creation date for a version. Reads the {@code .info} JSON
+ * file and parses the {@code "Time"} field. Falls back to the list file
+ * mtime if the {@code .info} file is missing or cannot be parsed.
+ *
+ * @param atVDir Path to the {@code @v} directory
+ * @param version Version string (e.g. {@code v1.0.0})
+ * @param fallback Fallback epoch millis (list file mtime)
+ * @return Epoch millis
+ */
+ private static long resolveCreatedDate(final Path atVDir,
+ final String version, final long fallback) {
+ final Path infoFile = atVDir.resolve(
+ String.format("%s.info", version)
+ );
+ if (!Files.isRegularFile(infoFile)) {
+ return fallback;
+ }
+ try (InputStream input = Files.newInputStream(infoFile);
+ JsonReader reader = Json.createReader(input)) {
+ final JsonObject json = reader.readObject();
+ if (json.containsKey("Time") && !json.isNull("Time")) {
+ final String time = json.getString("Time");
+ return Instant.parse(time).toEpochMilli();
+ }
+ } catch (final JsonException ex) {
+ LOG.warn(
+ "Invalid JSON in {}: {}", infoFile, ex.getMessage()
+ );
+ } catch (final Exception ex) {
+ LOG.warn(
+ "Cannot parse .info file {}: {}", infoFile, ex.getMessage()
+ );
+ }
+ return fallback;
+ }
+
+ /**
+ * Resolve the zip file size for a version. Returns 0 if the zip
+ * file does not exist.
+ *
+ * @param atVDir Path to the {@code @v} directory
+ * @param version Version string (e.g. {@code v1.0.0})
+ * @return File size in bytes, or 0 if not found
+ */
+ private static long resolveZipSize(final Path atVDir,
+ final String version) {
+ final Path zipFile = atVDir.resolve(
+ String.format("%s.zip", version)
+ );
+ if (Files.isRegularFile(zipFile)) {
+ try {
+ return Files.size(zipFile);
+ } catch (final IOException ex) {
+ LOG.debug(
+ "Cannot stat zip file {}: {}", zipFile, ex.getMessage()
+ );
+ return 0L;
+ }
+ }
+ return 0L;
+ }
+
+ /**
+ * Strip the leading {@code v} prefix from a Go version string.
+ * The Go adapter stores versions without the {@code v} prefix
+ * (e.g. {@code 1.0.0} instead of {@code v1.0.0}).
+ *
+ * @param version Version string, possibly starting with "v"
+ * @return Version without "v" prefix
+ */
+ private static String stripV(final String version) {
+ if (version.startsWith("v") || version.startsWith("V")) {
+ return version.substring(1);
+ }
+ return version;
+ }
+
+ /**
+ * Get the last-modified time of a file as epoch millis.
+ *
+ * @param path Path to the file
+ * @return Epoch millis
+ */
+ private static long fileMtime(final Path path) {
+ try {
+ return Files.readAttributes(path, BasicFileAttributes.class)
+ .lastModifiedTime().toMillis();
+ } catch (final IOException ex) {
+ LOG.debug(
+ "Cannot read mtime of {}: {}", path, ex.getMessage()
+ );
+ return System.currentTimeMillis();
+ }
+ }
+}
diff --git a/artipie-backfill/src/main/java/com/artipie/backfill/HelmScanner.java b/artipie-backfill/src/main/java/com/artipie/backfill/HelmScanner.java
new file mode 100644
index 000000000..b5682f7d4
--- /dev/null
+++ b/artipie-backfill/src/main/java/com/artipie/backfill/HelmScanner.java
@@ -0,0 +1,194 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.time.OffsetDateTime;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeParseException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Stream;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.yaml.snakeyaml.Yaml;
+
+/**
+ * Scanner for Helm chart repositories.
+ *
+ *
Reads {@code index.yaml} from the repository root, parses it with
+ * SnakeYAML, and emits one {@link ArtifactRecord} per chart version.
+ * The {@code .tgz} file referenced in the {@code urls} list is resolved
+ * relative to the root directory to determine artifact size.
+ *
+ * @since 1.20.13
+ */
+final class HelmScanner implements Scanner {
+
+ /**
+ * Logger.
+ */
+ private static final Logger LOG =
+ LoggerFactory.getLogger(HelmScanner.class);
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public Stream scan(final Path root, final String repoName)
+ throws IOException {
+ final Path indexPath = root.resolve("index.yaml");
+ if (!Files.isRegularFile(indexPath)) {
+ LOG.debug("No index.yaml found in {}", root);
+ return Stream.empty();
+ }
+ final Map index;
+ try (InputStream input = Files.newInputStream(indexPath)) {
+ index = new Yaml().load(input);
+ }
+ if (index == null || !index.containsKey("entries")) {
+ LOG.debug("No 'entries' key in index.yaml at {}", indexPath);
+ return Stream.empty();
+ }
+ final Object entriesObj = index.get("entries");
+ if (!(entriesObj instanceof Map)) {
+ LOG.warn("'entries' is not a map in {}", indexPath);
+ return Stream.empty();
+ }
+ final Map entries = (Map) entriesObj;
+ final long indexMtime = HelmScanner.indexMtime(indexPath);
+ final List records = new ArrayList<>();
+ for (final Map.Entry entry : entries.entrySet()) {
+ final String chartName = entry.getKey();
+ final Object versionsObj = entry.getValue();
+ if (!(versionsObj instanceof List)) {
+ LOG.debug("Skipping chart {} with non-list versions", chartName);
+ continue;
+ }
+ final List
+ *
+ * @since 1.20.13
+ */
+final class RepoConfigYaml {
+
+ /**
+ * Private ctor — utility class, not instantiable.
+ */
+ private RepoConfigYaml() {
+ }
+
+ /**
+ * Parse a single {@code .yaml} Artipie repo config file.
+ *
+ * @param file Path to the {@code .yaml} file
+ * @return Parsed {@link RepoEntry} with repo name (filename stem) and raw type
+ * @throws IOException if the file is unreadable, YAML is malformed,
+ * or {@code repo.type} is missing
+ */
+ @SuppressWarnings("unchecked")
+ static RepoEntry parse(final Path file) throws IOException {
+ final String filename = file.getFileName().toString();
+ final String repoName;
+ if (filename.endsWith(".yaml")) {
+ repoName = filename.substring(0, filename.length() - ".yaml".length());
+ } else {
+ repoName = filename;
+ }
+ final Map doc;
+ try (InputStream in = Files.newInputStream(file)) {
+ doc = new Yaml().load(in);
+ } catch (final Exception ex) {
+ throw new IOException(
+ String.format("Failed to parse YAML in '%s': %s", filename, ex.getMessage()),
+ ex
+ );
+ }
+ if (doc == null) {
+ throw new IOException(
+ String.format("Empty YAML file: '%s'", filename)
+ );
+ }
+ final Object repoObj = doc.get("repo");
+ if (!(repoObj instanceof Map)) {
+ throw new IOException(
+ String.format("Missing or invalid 'repo' key in '%s'", filename)
+ );
+ }
+ final Map repo = (Map) repoObj;
+ final Object typeObj = repo.get("type");
+ if (typeObj == null) {
+ throw new IOException(
+ String.format("Missing 'repo.type' in '%s'", filename)
+ );
+ }
+ return new RepoEntry(repoName, typeObj.toString());
+ }
+}
diff --git a/artipie-backfill/src/main/java/com/artipie/backfill/RepoEntry.java b/artipie-backfill/src/main/java/com/artipie/backfill/RepoEntry.java
new file mode 100644
index 000000000..867eadcd4
--- /dev/null
+++ b/artipie-backfill/src/main/java/com/artipie/backfill/RepoEntry.java
@@ -0,0 +1,15 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+/**
+ * Parsed result of one Artipie repo YAML config file.
+ *
+ * @param repoName Repo name derived from the YAML filename stem (e.g. {@code go.yaml} → {@code go})
+ * @param rawType Raw {@code repo.type} string from the YAML (e.g. {@code docker-proxy})
+ * @since 1.20.13
+ */
+record RepoEntry(String repoName, String rawType) {
+}
diff --git a/artipie-backfill/src/main/java/com/artipie/backfill/RepoTypeNormalizer.java b/artipie-backfill/src/main/java/com/artipie/backfill/RepoTypeNormalizer.java
new file mode 100644
index 000000000..db980841c
--- /dev/null
+++ b/artipie-backfill/src/main/java/com/artipie/backfill/RepoTypeNormalizer.java
@@ -0,0 +1,39 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+/**
+ * Normalises raw Artipie repo type strings to scanner type keys
+ * understood by {@link ScannerFactory}.
+ *
+ *
Currently only strips the {@code -proxy} suffix
+ * (e.g. {@code docker-proxy} → {@code docker}).
+ * Other compound suffixes (e.g. {@code -hosted}, {@code -group}) are out of
+ * scope and will surface as unknown types in {@link ScannerFactory}.
+ *
+ * @since 1.20.13
+ */
+final class RepoTypeNormalizer {
+
+ /**
+ * Private ctor — utility class, not instantiable.
+ */
+ private RepoTypeNormalizer() {
+ }
+
+ /**
+ * Normalize a raw repo type by stripping the {@code -proxy} suffix.
+ *
+ * @param rawType Raw {@code repo.type} value from the YAML config
+ * @return Normalised scanner type string
+ */
+ static String normalize(final String rawType) {
+ final String suffix = "-proxy";
+ if (rawType.endsWith(suffix)) {
+ return rawType.substring(0, rawType.length() - suffix.length());
+ }
+ return rawType;
+ }
+}
diff --git a/artipie-backfill/src/main/java/com/artipie/backfill/Scanner.java b/artipie-backfill/src/main/java/com/artipie/backfill/Scanner.java
new file mode 100644
index 000000000..bc528f9f5
--- /dev/null
+++ b/artipie-backfill/src/main/java/com/artipie/backfill/Scanner.java
@@ -0,0 +1,31 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.stream.Stream;
+
+/**
+ * Scans a repository root directory and produces a lazy stream of
+ * {@link ArtifactRecord} instances. Implementations must ensure the
+ * returned stream is lazy so that arbitrarily large repositories can
+ * be processed with constant memory.
+ *
+ * @since 1.20.13
+ */
+@FunctionalInterface
+public interface Scanner {
+
+ /**
+ * Scan the given repository root and produce artifact records.
+ *
+ * @param root Path to the repository root directory on disk
+ * @param repoName Logical repository name
+ * @return Lazy stream of artifact records
+ * @throws IOException If an I/O error occurs while scanning
+ */
+ Stream scan(Path root, String repoName) throws IOException;
+}
diff --git a/artipie-backfill/src/main/java/com/artipie/backfill/ScannerFactory.java b/artipie-backfill/src/main/java/com/artipie/backfill/ScannerFactory.java
new file mode 100644
index 000000000..99b74cc62
--- /dev/null
+++ b/artipie-backfill/src/main/java/com/artipie/backfill/ScannerFactory.java
@@ -0,0 +1,97 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+/**
+ * Factory that maps repository type strings to {@link Scanner} implementations.
+ *
+ * @since 1.20.13
+ */
+public final class ScannerFactory {
+
+ /**
+ * Private ctor to prevent instantiation.
+ */
+ private ScannerFactory() {
+ }
+
+ /**
+ * Create a scanner for the given repository type.
+ *
+ *
Accepts both plain types (e.g. {@code "maven"}) and proxy variants
+ * (e.g. {@code "maven-proxy"}). The raw type string is passed through to
+ * the scanner so that the correct {@code repo_type} value is stored in
+ * the database (matching production).
+ *
+ * @param type Repository type string, raw from YAML
+ * (e.g. "maven", "docker-proxy", "php")
+ * @return Scanner implementation for the given type
+ * @throws IllegalArgumentException If the type is not recognized
+ */
+ public static Scanner create(final String type) {
+ final String lower = type.toLowerCase(java.util.Locale.ROOT);
+ final Scanner scanner;
+ switch (lower) {
+ case "maven":
+ case "maven-proxy":
+ scanner = new MavenScanner(lower);
+ break;
+ case "gradle":
+ case "gradle-proxy":
+ scanner = new MavenScanner(lower);
+ break;
+ case "docker":
+ scanner = new DockerScanner(lower, false);
+ break;
+ case "docker-proxy":
+ scanner = new DockerScanner(lower, true);
+ break;
+ case "npm":
+ scanner = new NpmScanner(false);
+ break;
+ case "npm-proxy":
+ scanner = new NpmScanner(true);
+ break;
+ case "pypi":
+ case "pypi-proxy":
+ scanner = new PypiScanner(lower);
+ break;
+ case "go":
+ case "go-proxy":
+ scanner = new GoScanner(lower);
+ break;
+ case "helm":
+ case "helm-proxy":
+ scanner = new HelmScanner();
+ break;
+ case "composer":
+ case "composer-proxy":
+ case "php":
+ case "php-proxy":
+ scanner = new ComposerScanner(lower);
+ break;
+ case "file":
+ case "file-proxy":
+ scanner = new FileScanner(lower);
+ break;
+ case "deb":
+ case "deb-proxy":
+ case "debian":
+ case "debian-proxy":
+ scanner = new DebianScanner();
+ break;
+ case "gem":
+ case "gem-proxy":
+ case "gems":
+ scanner = new GemScanner();
+ break;
+ default:
+ throw new IllegalArgumentException(
+ String.format("Unknown repository type: %s", type)
+ );
+ }
+ return scanner;
+ }
+}
diff --git a/artipie-backfill/src/main/resources/META-INF/services/org.apache.logging.log4j.spi.Provider b/artipie-backfill/src/main/resources/META-INF/services/org.apache.logging.log4j.spi.Provider
new file mode 100644
index 000000000..f2a6da017
--- /dev/null
+++ b/artipie-backfill/src/main/resources/META-INF/services/org.apache.logging.log4j.spi.Provider
@@ -0,0 +1 @@
+org.apache.logging.log4j.core.impl.Log4jProvider
diff --git a/artipie-backfill/src/main/resources/log4j2.xml b/artipie-backfill/src/main/resources/log4j2.xml
new file mode 100644
index 000000000..9a4a05422
--- /dev/null
+++ b/artipie-backfill/src/main/resources/log4j2.xml
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/artipie-backfill/src/test/java/com/artipie/backfill/BackfillCliTest.java b/artipie-backfill/src/test/java/com/artipie/backfill/BackfillCliTest.java
new file mode 100644
index 000000000..86a3cd95f
--- /dev/null
+++ b/artipie-backfill/src/test/java/com/artipie/backfill/BackfillCliTest.java
@@ -0,0 +1,273 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * Tests for {@link BackfillCli}.
+ *
+ *
All tests exercise the {@code run()} method which returns an
+ * exit code (0 = success, 1 = error) instead of calling
+ * {@code System.exit()}.
+ *
+ * @since 1.20.13
+ */
+final class BackfillCliTest {
+
+ /**
+ * Dry-run with a file scanner should succeed (exit code 0) and
+ * process all non-hidden regular files in the temp directory.
+ *
+ * @param tmp Temporary directory created by JUnit
+ * @throws IOException If temp file creation fails
+ */
+ @Test
+ void dryRunWithFileScanner(@TempDir final Path tmp) throws IOException {
+ Files.createFile(tmp.resolve("file1.txt"));
+ Files.write(tmp.resolve("file2.dat"), new byte[]{1, 2, 3});
+ Files.createFile(tmp.resolve(".hidden"));
+ final int code = BackfillCli.run(
+ "--type", "file",
+ "--path", tmp.toString(),
+ "--repo-name", "test",
+ "--dry-run"
+ );
+ MatcherAssert.assertThat(
+ "Dry-run with file scanner should succeed",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ /**
+ * Running with no arguments should fail (exit code 1) because
+ * required options are missing.
+ */
+ @Test
+ void missingRequiredArgs() {
+ final int code = BackfillCli.run();
+ MatcherAssert.assertThat(
+ "Missing required args should return exit code 1",
+ code,
+ Matchers.is(1)
+ );
+ }
+
+ /**
+ * Running with a non-existent path should fail (exit code 1).
+ */
+ @Test
+ void invalidPath() {
+ final int code = BackfillCli.run(
+ "--type", "file",
+ "--path", "/nonexistent/directory/that/does/not/exist",
+ "--repo-name", "test",
+ "--dry-run"
+ );
+ MatcherAssert.assertThat(
+ "Non-existent path should return exit code 1",
+ code,
+ Matchers.is(1)
+ );
+ }
+
+ /**
+ * Running with an unknown scanner type should fail (exit code 1).
+ *
+ * @param tmp Temporary directory created by JUnit
+ */
+ @Test
+ void invalidType(@TempDir final Path tmp) {
+ final int code = BackfillCli.run(
+ "--type", "unknown_type_xyz",
+ "--path", tmp.toString(),
+ "--repo-name", "test",
+ "--dry-run"
+ );
+ MatcherAssert.assertThat(
+ "Unknown scanner type should return exit code 1",
+ code,
+ Matchers.is(1)
+ );
+ }
+
+ /**
+ * Running with --help should succeed (exit code 0).
+ */
+ @Test
+ void helpFlag() {
+ final int code = BackfillCli.run("--help");
+ MatcherAssert.assertThat(
+ "Help flag should return exit code 0",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ /**
+ * Running without --db-url and without --dry-run should fail
+ * (exit code 1) because the database URL is required for real runs.
+ *
+ * @param tmp Temporary directory created by JUnit
+ */
+ @Test
+ void dbUrlRequiredWithoutDryRun(@TempDir final Path tmp) {
+ final int code = BackfillCli.run(
+ "--type", "file",
+ "--path", tmp.toString(),
+ "--repo-name", "test"
+ );
+ MatcherAssert.assertThat(
+ "Missing --db-url without --dry-run should return exit code 1",
+ code,
+ Matchers.is(1)
+ );
+ }
+
+ /**
+ * Dry-run with nested directories should process files recursively
+ * and skip hidden files.
+ *
+ * @param tmp Temporary directory created by JUnit
+ * @throws IOException If temp file creation fails
+ */
+ @Test
+ void dryRunWithNestedDirectories(@TempDir final Path tmp)
+ throws IOException {
+ final Path sub = tmp.resolve("subdir");
+ Files.createDirectory(sub);
+ Files.createFile(tmp.resolve("root-file.txt"));
+ Files.createFile(sub.resolve("nested-file.txt"));
+ Files.createFile(sub.resolve(".hidden-nested"));
+ final int code = BackfillCli.run(
+ "--type", "file",
+ "--path", tmp.toString(),
+ "--repo-name", "nested-test",
+ "--dry-run"
+ );
+ MatcherAssert.assertThat(
+ "Dry-run with nested directories should succeed",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ /**
+ * --config-dir without --storage-root should fail (exit code 1).
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if directory setup fails
+ */
+ @Test
+ void configDirWithoutStorageRootFails(@TempDir final Path tmp)
+ throws IOException {
+ Files.createDirectories(tmp);
+ final int code = BackfillCli.run(
+ "--config-dir", tmp.toString(),
+ "--dry-run"
+ );
+ MatcherAssert.assertThat(
+ "--config-dir without --storage-root should return exit code 1",
+ code,
+ Matchers.is(1)
+ );
+ }
+
+ /**
+ * --storage-root without --config-dir should fail (exit code 1).
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if directory setup fails
+ */
+ @Test
+ void storageRootWithoutConfigDirFails(@TempDir final Path tmp)
+ throws IOException {
+ Files.createDirectories(tmp);
+ final int code = BackfillCli.run(
+ "--storage-root", tmp.toString(),
+ "--dry-run"
+ );
+ MatcherAssert.assertThat(
+ "--storage-root without --config-dir should return exit code 1",
+ code,
+ Matchers.is(1)
+ );
+ }
+
+ /**
+ * --config-dir combined with --type should fail (mutually exclusive).
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if directory setup fails
+ */
+ @Test
+ void configDirAndTypeTogether(@TempDir final Path tmp) throws IOException {
+ Files.createDirectories(tmp);
+ final int code = BackfillCli.run(
+ "--config-dir", tmp.toString(),
+ "--storage-root", tmp.toString(),
+ "--type", "file",
+ "--dry-run"
+ );
+ MatcherAssert.assertThat(
+ "--config-dir and --type together should return exit code 1",
+ code,
+ Matchers.is(1)
+ );
+ }
+
+ /**
+ * Valid --config-dir + --storage-root in dry-run mode → exit code 0.
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if file setup fails
+ */
+ @Test
+ void bulkModeWithConfigDirSucceeds(@TempDir final Path tmp)
+ throws IOException {
+ final Path configDir = tmp.resolve("configs");
+ final Path storageRoot = tmp.resolve("data");
+ Files.createDirectories(configDir);
+ Files.createDirectories(storageRoot);
+ Files.writeString(configDir.resolve("myrepo.yaml"), "repo:\n type: file\n");
+ Files.createDirectories(storageRoot.resolve("myrepo"));
+ Files.writeString(storageRoot.resolve("myrepo").resolve("f.txt"), "hi");
+ final int code = BackfillCli.run(
+ "--config-dir", configDir.toString(),
+ "--storage-root", storageRoot.toString(),
+ "--dry-run"
+ );
+ MatcherAssert.assertThat(
+ "Valid bulk mode dry-run should return exit code 0",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ /**
+ * --type alone without --path and --repo-name should fail (exit code 1).
+ *
+ * @throws IOException if test setup fails
+ */
+ @Test
+ void typeWithoutPathAndRepoNameFails() throws IOException {
+ final int code = BackfillCli.run(
+ "--type", "file",
+ "--dry-run"
+ );
+ MatcherAssert.assertThat(
+ "--type without --path and --repo-name should return exit code 1",
+ code,
+ Matchers.is(1)
+ );
+ }
+}
diff --git a/artipie-backfill/src/test/java/com/artipie/backfill/BackfillIntegrationTest.java b/artipie-backfill/src/test/java/com/artipie/backfill/BackfillIntegrationTest.java
new file mode 100644
index 000000000..786940e98
--- /dev/null
+++ b/artipie-backfill/src/test/java/com/artipie/backfill/BackfillIntegrationTest.java
@@ -0,0 +1,533 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.Statement;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.junit.jupiter.api.MethodOrderer;
+import org.junit.jupiter.api.Order;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestMethodOrder;
+import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * Integration tests for the backfill CLI pipeline.
+ *
+ *
Dry-run tests (always run) exercise the full pipeline
+ * {@code BackfillCli -> ScannerFactory -> Scanner -> BatchInserter(dry-run)}
+ * for every supported scanner type with minimal but valid sample data.
+ *
+ *
PostgreSQL tests (gated behind the {@code BACKFILL_IT_DB_URL}
+ * environment variable) verify actual database inserts and
+ * UPSERT idempotency against a real PostgreSQL instance.
+ *
+ * @since 1.20.13
+ */
+@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
+final class BackfillIntegrationTest {
+
+ // ---------------------------------------------------------------
+ // Dry-run tests (always run)
+ // ---------------------------------------------------------------
+
+ /**
+ * Maven scanner dry-run: creates a minimal maven-metadata.xml with
+ * one version directory containing a JAR and verifies exit code 0.
+ *
+ * @param tmp Temporary directory created by JUnit
+ * @throws IOException If temp file creation fails
+ */
+ @Test
+ @Order(1)
+ void dryRunMavenScanner(@TempDir final Path tmp) throws IOException {
+ final Path artifact = tmp.resolve("com/example/mylib");
+ Files.createDirectories(artifact);
+ Files.writeString(
+ artifact.resolve("maven-metadata.xml"),
+ String.join(
+ "\n",
+ "",
+ "",
+ " com.example",
+ " mylib",
+ " ",
+ " ",
+ " 1.0.0",
+ " ",
+ " ",
+ ""
+ ),
+ StandardCharsets.UTF_8
+ );
+ final Path ver = artifact.resolve("1.0.0");
+ Files.createDirectories(ver);
+ Files.write(ver.resolve("mylib-1.0.0.jar"), new byte[64]);
+ final int code = BackfillCli.run(
+ "--type", "maven",
+ "--path", tmp.toString(),
+ "--repo-name", "it-maven",
+ "--dry-run"
+ );
+ MatcherAssert.assertThat(
+ "Maven dry-run should succeed",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ /**
+ * Docker scanner dry-run: creates a minimal Docker registry layout
+ * with one image, one tag, and a manifest blob.
+ *
+ * @param tmp Temporary directory created by JUnit
+ * @throws IOException If temp file creation fails
+ */
+ @Test
+ @Order(2)
+ void dryRunDockerScanner(@TempDir final Path tmp) throws IOException {
+ final String digest = "sha256:aabbccdd11223344";
+ final Path linkDir = tmp
+ .resolve("repositories")
+ .resolve("alpine")
+ .resolve("_manifests")
+ .resolve("tags")
+ .resolve("3.18")
+ .resolve("current");
+ Files.createDirectories(linkDir);
+ Files.writeString(
+ linkDir.resolve("link"), digest, StandardCharsets.UTF_8
+ );
+ final String hex = digest.split(":", 2)[1];
+ final Path blobDir = tmp.resolve("blobs")
+ .resolve("sha256")
+ .resolve(hex.substring(0, 2))
+ .resolve(hex);
+ Files.createDirectories(blobDir);
+ Files.writeString(
+ blobDir.resolve("data"),
+ String.join(
+ "\n",
+ "{",
+ " \"schemaVersion\": 2,",
+ " \"config\": { \"size\": 100, \"digest\": \"sha256:cfg\" },",
+ " \"layers\": [",
+ " { \"size\": 500, \"digest\": \"sha256:l1\" }",
+ " ]",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final int code = BackfillCli.run(
+ "--type", "docker",
+ "--path", tmp.toString(),
+ "--repo-name", "it-docker",
+ "--dry-run"
+ );
+ MatcherAssert.assertThat(
+ "Docker dry-run should succeed",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ /**
+ * NPM scanner dry-run: creates a meta.json with one scoped package
+ * and one version entry.
+ *
+ * @param tmp Temporary directory created by JUnit
+ * @throws IOException If temp file creation fails
+ */
+ @Test
+ @Order(3)
+ void dryRunNpmScanner(@TempDir final Path tmp) throws IOException {
+ final Path pkgDir = tmp.resolve("@scope/widget");
+ Files.createDirectories(pkgDir);
+ Files.writeString(
+ pkgDir.resolve("meta.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"name\": \"@scope/widget\",",
+ " \"versions\": {",
+ " \"2.0.0\": {",
+ " \"name\": \"@scope/widget\",",
+ " \"version\": \"2.0.0\",",
+ " \"dist\": {",
+ " \"tarball\": \"/@scope/widget/-/"
+ + "@scope/widget-2.0.0.tgz\"",
+ " }",
+ " }",
+ " }",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final int code = BackfillCli.run(
+ "--type", "npm",
+ "--path", tmp.toString(),
+ "--repo-name", "it-npm",
+ "--dry-run"
+ );
+ MatcherAssert.assertThat(
+ "NPM dry-run should succeed",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ /**
+ * PyPI scanner dry-run: creates a wheel file in a package directory.
+ *
+ * @param tmp Temporary directory created by JUnit
+ * @throws IOException If temp file creation fails
+ */
+ @Test
+ @Order(4)
+ void dryRunPypiScanner(@TempDir final Path tmp) throws IOException {
+ final Path pkgDir = tmp.resolve("requests");
+ Files.createDirectories(pkgDir);
+ Files.write(
+ pkgDir.resolve("requests-2.31.0-py3-none-any.whl"),
+ new byte[80]
+ );
+ final int code = BackfillCli.run(
+ "--type", "pypi",
+ "--path", tmp.toString(),
+ "--repo-name", "it-pypi",
+ "--dry-run"
+ );
+ MatcherAssert.assertThat(
+ "PyPI dry-run should succeed",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ /**
+ * Go scanner dry-run: creates a module {@code @v} directory with
+ * a version list file and a .info JSON file.
+ *
+ * @param tmp Temporary directory created by JUnit
+ * @throws IOException If temp file creation fails
+ */
+ @Test
+ @Order(5)
+ void dryRunGoScanner(@TempDir final Path tmp) throws IOException {
+ final Path atv = tmp.resolve("example.com/mod/@v");
+ Files.createDirectories(atv);
+ Files.writeString(
+ atv.resolve("list"),
+ "v1.0.0\n",
+ StandardCharsets.UTF_8
+ );
+ Files.writeString(
+ atv.resolve("v1.0.0.info"),
+ "{\"Version\":\"v1.0.0\","
+ + "\"Time\":\"2024-01-01T00:00:00Z\"}",
+ StandardCharsets.UTF_8
+ );
+ Files.write(atv.resolve("v1.0.0.zip"), new byte[128]);
+ final int code = BackfillCli.run(
+ "--type", "go",
+ "--path", tmp.toString(),
+ "--repo-name", "it-go",
+ "--dry-run"
+ );
+ MatcherAssert.assertThat(
+ "Go dry-run should succeed",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ /**
+ * Helm scanner dry-run: creates an index.yaml with one chart entry
+ * and a corresponding .tgz file.
+ *
+ * @param tmp Temporary directory created by JUnit
+ * @throws IOException If temp file creation fails
+ */
+ @Test
+ @Order(6)
+ void dryRunHelmScanner(@TempDir final Path tmp) throws IOException {
+ Files.writeString(
+ tmp.resolve("index.yaml"),
+ String.join(
+ "\n",
+ "apiVersion: v1",
+ "entries:",
+ " mychart:",
+ " - name: mychart",
+ " version: 0.1.0",
+ " urls:",
+ " - mychart-0.1.0.tgz",
+ " created: '2024-06-01T00:00:00+00:00'"
+ ),
+ StandardCharsets.UTF_8
+ );
+ Files.write(tmp.resolve("mychart-0.1.0.tgz"), new byte[256]);
+ final int code = BackfillCli.run(
+ "--type", "helm",
+ "--path", tmp.toString(),
+ "--repo-name", "it-helm",
+ "--dry-run"
+ );
+ MatcherAssert.assertThat(
+ "Helm dry-run should succeed",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ /**
+ * Composer scanner dry-run: creates a p2 layout with one package
+ * JSON file containing one vendor/package with one version.
+ *
+ * @param tmp Temporary directory created by JUnit
+ * @throws IOException If temp file creation fails
+ */
+ @Test
+ @Order(7)
+ void dryRunComposerScanner(@TempDir final Path tmp) throws IOException {
+ final Path vendorDir = tmp.resolve("p2").resolve("vendor");
+ Files.createDirectories(vendorDir);
+ Files.writeString(
+ vendorDir.resolve("lib.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"packages\": {",
+ " \"vendor/lib\": {",
+ " \"1.0.0\": {",
+ " \"name\": \"vendor/lib\",",
+ " \"version\": \"1.0.0\",",
+ " \"dist\": {",
+ " \"url\": \"https://example.com/lib.zip\",",
+ " \"type\": \"zip\"",
+ " }",
+ " }",
+ " }",
+ " }",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final int code = BackfillCli.run(
+ "--type", "composer",
+ "--path", tmp.toString(),
+ "--repo-name", "it-composer",
+ "--dry-run"
+ );
+ MatcherAssert.assertThat(
+ "Composer dry-run should succeed",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ /**
+ * File scanner dry-run: creates a couple of plain files and one
+ * hidden file that should be skipped.
+ *
+ * @param tmp Temporary directory created by JUnit
+ * @throws IOException If temp file creation fails
+ */
+ @Test
+ @Order(8)
+ void dryRunFileScanner(@TempDir final Path tmp) throws IOException {
+ Files.createFile(tmp.resolve("readme.txt"));
+ Files.write(tmp.resolve("data.bin"), new byte[32]);
+ Files.createFile(tmp.resolve(".hidden"));
+ final int code = BackfillCli.run(
+ "--type", "file",
+ "--path", tmp.toString(),
+ "--repo-name", "it-file",
+ "--dry-run"
+ );
+ MatcherAssert.assertThat(
+ "File dry-run should succeed",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ // ---------------------------------------------------------------
+ // PostgreSQL tests (gated behind BACKFILL_IT_DB_URL)
+ // ---------------------------------------------------------------
+
+ /**
+ * Insert records into a real PostgreSQL instance via the CLI pipeline
+ * and verify the row count matches the expected number.
+ *
+ *
Requires the following environment variables:
+ *
+ *
{@code BACKFILL_IT_DB_URL} - JDBC URL, e.g.
+ * {@code jdbc:postgresql://localhost:5432/artipie}
+ *
+ * @param tmp Temporary directory created by JUnit
+ * @throws Exception If I/O or SQL operations fail
+ */
+ @Test
+ @Order(10)
+ @EnabledIfEnvironmentVariable(named = "BACKFILL_IT_DB_URL", matches = ".+")
+ void insertsRecordsIntoPostgres(@TempDir final Path tmp) throws Exception {
+ final String dbUrl = System.getenv("BACKFILL_IT_DB_URL");
+ final String dbUser = System.getenv().getOrDefault(
+ "BACKFILL_IT_DB_USER", "artipie"
+ );
+ final String dbPassword = System.getenv().getOrDefault(
+ "BACKFILL_IT_DB_PASSWORD", "artipie"
+ );
+ final String repoName = "it-pg-maven-" + System.nanoTime();
+ final Path artifact = tmp.resolve("org/test/pglib");
+ Files.createDirectories(artifact);
+ Files.writeString(
+ artifact.resolve("maven-metadata.xml"),
+ String.join(
+ "\n",
+ "",
+ "",
+ " org.test",
+ " pglib",
+ " ",
+ " ",
+ " 1.0.0",
+ " 2.0.0",
+ " ",
+ " ",
+ ""
+ ),
+ StandardCharsets.UTF_8
+ );
+ final Path ver1 = artifact.resolve("1.0.0");
+ Files.createDirectories(ver1);
+ Files.write(ver1.resolve("pglib-1.0.0.jar"), new byte[100]);
+ final Path ver2 = artifact.resolve("2.0.0");
+ Files.createDirectories(ver2);
+ Files.write(ver2.resolve("pglib-2.0.0.jar"), new byte[200]);
+ final int code = BackfillCli.run(
+ "--type", "maven",
+ "--path", tmp.toString(),
+ "--repo-name", repoName,
+ "--db-url", dbUrl,
+ "--db-user", dbUser,
+ "--db-password", dbPassword,
+ "--batch-size", "10"
+ );
+ MatcherAssert.assertThat(
+ "CLI should succeed inserting into PostgreSQL",
+ code,
+ Matchers.is(0)
+ );
+ final long count;
+ try (Connection conn =
+ DriverManager.getConnection(dbUrl, dbUser, dbPassword);
+ Statement stmt = conn.createStatement();
+ ResultSet rset = stmt.executeQuery(
+ "SELECT count(*) FROM artifacts WHERE repo_name = '"
+ + repoName + "'"
+ )) {
+ rset.next();
+ count = rset.getLong(1);
+ }
+ MatcherAssert.assertThat(
+ "Should have inserted exactly 2 records",
+ count,
+ Matchers.is(2L)
+ );
+ }
+
+ /**
+ * Run the same backfill again and verify the UPSERT does not
+ * duplicate rows (idempotency check).
+ *
+ * @param tmp Temporary directory created by JUnit
+ * @throws Exception If I/O or SQL operations fail
+ */
+ @Test
+ @Order(11)
+ @EnabledIfEnvironmentVariable(named = "BACKFILL_IT_DB_URL", matches = ".+")
+ void upsertIsIdempotent(@TempDir final Path tmp) throws Exception {
+ final String dbUrl = System.getenv("BACKFILL_IT_DB_URL");
+ final String dbUser = System.getenv().getOrDefault(
+ "BACKFILL_IT_DB_USER", "artipie"
+ );
+ final String dbPassword = System.getenv().getOrDefault(
+ "BACKFILL_IT_DB_PASSWORD", "artipie"
+ );
+ final String repoName = "it-pg-idempotent-" + System.nanoTime();
+ final Path artifact = tmp.resolve("org/test/idem");
+ Files.createDirectories(artifact);
+ Files.writeString(
+ artifact.resolve("maven-metadata.xml"),
+ String.join(
+ "\n",
+ "",
+ "",
+ " org.test",
+ " idem",
+ " ",
+ " ",
+ " 1.0.0",
+ " ",
+ " ",
+ ""
+ ),
+ StandardCharsets.UTF_8
+ );
+ final Path ver = artifact.resolve("1.0.0");
+ Files.createDirectories(ver);
+ Files.write(ver.resolve("idem-1.0.0.jar"), new byte[50]);
+ final String[] args = {
+ "--type", "maven",
+ "--path", tmp.toString(),
+ "--repo-name", repoName,
+ "--db-url", dbUrl,
+ "--db-user", dbUser,
+ "--db-password", dbPassword,
+ "--batch-size", "10",
+ };
+ final int firstRun = BackfillCli.run(args);
+ MatcherAssert.assertThat(
+ "First run should succeed",
+ firstRun,
+ Matchers.is(0)
+ );
+ final int secondRun = BackfillCli.run(args);
+ MatcherAssert.assertThat(
+ "Second run (upsert) should succeed",
+ secondRun,
+ Matchers.is(0)
+ );
+ final long count;
+ try (Connection conn =
+ DriverManager.getConnection(dbUrl, dbUser, dbPassword);
+ Statement stmt = conn.createStatement();
+ ResultSet rset = stmt.executeQuery(
+ "SELECT count(*) FROM artifacts WHERE repo_name = '"
+ + repoName + "'"
+ )) {
+ rset.next();
+ count = rset.getLong(1);
+ }
+ MatcherAssert.assertThat(
+ "UPSERT should not duplicate; count should still be 1",
+ count,
+ Matchers.is(1L)
+ );
+ }
+}
diff --git a/artipie-backfill/src/test/java/com/artipie/backfill/BatchInserterTest.java b/artipie-backfill/src/test/java/com/artipie/backfill/BatchInserterTest.java
new file mode 100644
index 000000000..ff5048fa9
--- /dev/null
+++ b/artipie-backfill/src/test/java/com/artipie/backfill/BatchInserterTest.java
@@ -0,0 +1,174 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests for {@link BatchInserter}.
+ *
+ *
These tests exercise dry-run counting, flush-threshold logic, and
+ * close-flushes-remaining behavior. Full database integration tests
+ * (PostgreSQL upsert, parameter binding, error fall-back) are deferred
+ * to Task 12.
+ *
+ * @since 1.20.13
+ */
+final class BatchInserterTest {
+
+ /**
+ * In dry-run mode, records are counted but nothing is written to the
+ * database. The {@code insertedCount} reflects the number of records
+ * that would have been inserted.
+ */
+ @Test
+ void dryRunCountsWithoutDbInteraction() {
+ try (BatchInserter inserter = new BatchInserter(null, 100, true)) {
+ for (int idx = 0; idx < 5; idx++) {
+ inserter.accept(sampleRecord(idx));
+ }
+ inserter.flush();
+ MatcherAssert.assertThat(
+ "Dry-run should count all accepted records",
+ inserter.getInsertedCount(),
+ Matchers.is(5L)
+ );
+ MatcherAssert.assertThat(
+ "Dry-run should have zero skipped",
+ inserter.getSkippedCount(),
+ Matchers.is(0L)
+ );
+ }
+ }
+
+ /**
+ * Verify that dry-run auto-flushes when the buffer reaches batchSize.
+ */
+ @Test
+ void dryRunAutoFlushesAtBatchSize() {
+ try (BatchInserter inserter = new BatchInserter(null, 3, true)) {
+ inserter.accept(sampleRecord(1));
+ inserter.accept(sampleRecord(2));
+ MatcherAssert.assertThat(
+ "Before reaching batchSize, insertedCount should be 0",
+ inserter.getInsertedCount(),
+ Matchers.is(0L)
+ );
+ inserter.accept(sampleRecord(3));
+ MatcherAssert.assertThat(
+ "After reaching batchSize, auto-flush should have counted 3",
+ inserter.getInsertedCount(),
+ Matchers.is(3L)
+ );
+ }
+ }
+
+ /**
+ * Verify that close() flushes remaining records that haven't reached
+ * batchSize yet.
+ */
+ @Test
+ void closeFlushesRemainingRecords() {
+ final BatchInserter inserter = new BatchInserter(null, 100, true);
+ inserter.accept(sampleRecord(1));
+ inserter.accept(sampleRecord(2));
+ MatcherAssert.assertThat(
+ "Before close, records should still be buffered",
+ inserter.getInsertedCount(),
+ Matchers.is(0L)
+ );
+ inserter.close();
+ MatcherAssert.assertThat(
+ "After close, remaining records should be flushed",
+ inserter.getInsertedCount(),
+ Matchers.is(2L)
+ );
+ }
+
+ /**
+ * Verify that multiple flushes accumulate the inserted count.
+ */
+ @Test
+ void multipleFlushesAccumulateCount() {
+ try (BatchInserter inserter = new BatchInserter(null, 2, true)) {
+ inserter.accept(sampleRecord(1));
+ inserter.accept(sampleRecord(2));
+ MatcherAssert.assertThat(
+ "First flush should count 2",
+ inserter.getInsertedCount(),
+ Matchers.is(2L)
+ );
+ inserter.accept(sampleRecord(3));
+ inserter.accept(sampleRecord(4));
+ MatcherAssert.assertThat(
+ "Second flush should bring total to 4",
+ inserter.getInsertedCount(),
+ Matchers.is(4L)
+ );
+ }
+ }
+
+ /**
+ * Verify that flushing an empty buffer does nothing.
+ */
+ @Test
+ void flushEmptyBufferIsNoop() {
+ try (BatchInserter inserter = new BatchInserter(null, 10, true)) {
+ inserter.flush();
+ MatcherAssert.assertThat(
+ "Flushing empty buffer should leave count at 0",
+ inserter.getInsertedCount(),
+ Matchers.is(0L)
+ );
+ }
+ }
+
+ /**
+ * Verify that in dry-run mode, DataSource is never touched (null is
+ * safe).
+ */
+ @Test
+ void dryRunAcceptsNullDataSource() {
+ try (BatchInserter inserter = new BatchInserter(null, 5, true)) {
+ for (int idx = 0; idx < 12; idx++) {
+ inserter.accept(sampleRecord(idx));
+ }
+ }
+ }
+
+ /**
+ * Verify counters start at zero.
+ */
+ @Test
+ void countersStartAtZero() {
+ try (BatchInserter inserter = new BatchInserter(null, 10, true)) {
+ MatcherAssert.assertThat(
+ "Initial insertedCount should be 0",
+ inserter.getInsertedCount(),
+ Matchers.is(0L)
+ );
+ MatcherAssert.assertThat(
+ "Initial skippedCount should be 0",
+ inserter.getSkippedCount(),
+ Matchers.is(0L)
+ );
+ }
+ }
+
+ /**
+ * Create a sample ArtifactRecord for testing.
+ *
+ * @param idx Unique index to distinguish records
+ * @return Sample record
+ */
+ private static ArtifactRecord sampleRecord(final int idx) {
+ return new ArtifactRecord(
+ "maven", "repo", "art-" + idx, "1.0." + idx,
+ 1024L, 1700000000L + idx, null, "system", null
+ );
+ }
+}
diff --git a/artipie-backfill/src/test/java/com/artipie/backfill/BulkBackfillRunnerTest.java b/artipie-backfill/src/test/java/com/artipie/backfill/BulkBackfillRunnerTest.java
new file mode 100644
index 000000000..6ea248353
--- /dev/null
+++ b/artipie-backfill/src/test/java/com/artipie/backfill/BulkBackfillRunnerTest.java
@@ -0,0 +1,354 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * Tests for {@link BulkBackfillRunner}.
+ *
+ *
All tests use {@code dryRun=true} and a null datasource unless testing
+ * the FAILED path, which deliberately uses {@code dryRun=false} and a null
+ * datasource to trigger a NullPointerException in BatchInserter.
+ *
+ * @since 1.20.13
+ */
+final class BulkBackfillRunnerTest {
+
+ /**
+ * Null print stream for suppressing summary output during tests.
+ */
+ private static final PrintStream DEV_NULL =
+ new PrintStream(OutputStream.nullOutputStream());
+
+ // ── Happy path ───────────────────────────────────────────────────────────
+
+ /**
+ * Empty config dir → exit code 0, zero repos processed.
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if directory setup fails
+ */
+ @Test
+ void emptyConfigDirSucceeds(@TempDir final Path tmp) throws IOException {
+ final Path configDir = tmp.resolve("configs");
+ final Path storageRoot = tmp.resolve("data");
+ Files.createDirectories(configDir);
+ Files.createDirectories(storageRoot);
+ final int code = runner(configDir, storageRoot, true).run();
+ MatcherAssert.assertThat(
+ "Empty config dir should return exit code 0",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ /**
+ * Two valid repos with file scanner → both succeed, exit code 0.
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if file setup fails
+ */
+ @Test
+ void twoValidReposSucceed(@TempDir final Path tmp) throws IOException {
+ final Path configDir = tmp.resolve("configs");
+ final Path storageRoot = tmp.resolve("data");
+ Files.createDirectories(configDir);
+ // Repo 1: "myfiles" type file
+ Files.writeString(configDir.resolve("myfiles.yaml"), "repo:\n type: file\n");
+ final Path repo1 = storageRoot.resolve("myfiles");
+ Files.createDirectories(repo1);
+ Files.writeString(repo1.resolve("artifact.txt"), "content");
+ // Repo 2: "otherfiles" type file
+ Files.writeString(configDir.resolve("otherfiles.yaml"), "repo:\n type: file\n");
+ final Path repo2 = storageRoot.resolve("otherfiles");
+ Files.createDirectories(repo2);
+ Files.writeString(repo2.resolve("pkg.dat"), "data");
+ final int code = runner(configDir, storageRoot, true).run();
+ MatcherAssert.assertThat(
+ "Two valid repos should return exit code 0",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ // ── SKIPPED paths ────────────────────────────────────────────────────────
+
+ /**
+ * Repo with unknown type → SKIPPED, rest continue, exit code 0.
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if file setup fails
+ */
+ @Test
+ void unknownTypeIsSkipped(@TempDir final Path tmp) throws IOException {
+ final Path configDir = tmp.resolve("configs");
+ final Path storageRoot = tmp.resolve("data");
+ Files.createDirectories(configDir);
+ // Unknown type
+ Files.writeString(configDir.resolve("weird.yaml"), "repo:\n type: weird-hosted\n");
+ // Valid repo that should still run
+ Files.writeString(configDir.resolve("myfiles.yaml"), "repo:\n type: file\n");
+ Files.createDirectories(storageRoot.resolve("myfiles"));
+ final int code = runner(configDir, storageRoot, true).run();
+ MatcherAssert.assertThat(
+ "Unknown type should be SKIPPED, run exits 0",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ /**
+ * Repo with missing storage path → SKIPPED, rest continue, exit code 0.
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if file setup fails
+ */
+ @Test
+ void missingStoragePathIsSkipped(@TempDir final Path tmp) throws IOException {
+ final Path configDir = tmp.resolve("configs");
+ final Path storageRoot = tmp.resolve("data");
+ Files.createDirectories(configDir);
+ Files.createDirectories(storageRoot);
+ // This repo has a valid YAML but no matching storage directory
+ Files.writeString(configDir.resolve("ghost.yaml"), "repo:\n type: file\n");
+ // Valid repo
+ Files.writeString(configDir.resolve("real.yaml"), "repo:\n type: file\n");
+ Files.createDirectories(storageRoot.resolve("real"));
+ final int code = runner(configDir, storageRoot, true).run();
+ MatcherAssert.assertThat(
+ "Missing storage path should be SKIPPED, run exits 0",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ /**
+ * Proxy type is normalised before lookup: docker-proxy → docker scanner is used.
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if file setup fails
+ */
+ @Test
+ void proxyTypeIsNormalised(@TempDir final Path tmp) throws IOException {
+ final Path configDir = tmp.resolve("configs");
+ final Path storageRoot = tmp.resolve("data");
+ Files.createDirectories(configDir);
+ // docker-proxy should normalise to docker
+ Files.writeString(
+ configDir.resolve("docker_cache.yaml"),
+ "repo:\n type: docker-proxy\n"
+ );
+ // Create minimal docker v2 storage layout so DockerScanner doesn't fail on missing dirs
+ final Path dockerRepo = storageRoot.resolve("docker_cache");
+ Files.createDirectories(dockerRepo.resolve("repositories"));
+ final int code = runner(configDir, storageRoot, true).run();
+ MatcherAssert.assertThat(
+ "docker-proxy should normalise to docker scanner, exit 0",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ // ── PARSE_ERROR paths ────────────────────────────────────────────────────
+
+ /**
+ * Malformed YAML → PARSE_ERROR, rest continue, exit code 0.
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if file setup fails
+ */
+ @Test
+ void parseErrorContinuesRun(@TempDir final Path tmp) throws IOException {
+ final Path configDir = tmp.resolve("configs");
+ final Path storageRoot = tmp.resolve("data");
+ Files.createDirectories(configDir);
+ Files.writeString(configDir.resolve("bad.yaml"), "repo: [\nunclosed\n");
+ Files.writeString(configDir.resolve("good.yaml"), "repo:\n type: file\n");
+ Files.createDirectories(storageRoot.resolve("good"));
+ final int code = runner(configDir, storageRoot, true).run();
+ MatcherAssert.assertThat(
+ "PARSE_ERROR should not set exit code to 1",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ /**
+ * PARSE_ERROR only run → exit code 0.
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if file setup fails
+ */
+ @Test
+ void parseErrorOnlyExitsZero(@TempDir final Path tmp) throws IOException {
+ final Path configDir = tmp.resolve("configs");
+ Files.createDirectories(configDir);
+ Files.writeString(configDir.resolve("bad.yaml"), "not: valid: yaml: content\n broken");
+ final int code = runner(configDir, tmp, true).run();
+ MatcherAssert.assertThat(
+ "PARSE_ERROR only should exit 0",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ // ── FAILED paths ─────────────────────────────────────────────────────────
+
+ /**
+ * Scanner throws (triggered by null datasource + dryRun=false) → FAILED,
+ * rest continue, exit code 1.
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if file setup fails
+ */
+ @Test
+ void failedRepoExitsOne(@TempDir final Path tmp) throws IOException {
+ final Path configDir = tmp.resolve("configs");
+ final Path storageRoot = tmp.resolve("data");
+ Files.createDirectories(configDir);
+ // This repo will FAIL: dryRun=false, dataSource=null → NPE in BatchInserter
+ Files.writeString(configDir.resolve("willbreak.yaml"), "repo:\n type: file\n");
+ final Path breakRepo = storageRoot.resolve("willbreak");
+ Files.createDirectories(breakRepo);
+ Files.writeString(breakRepo.resolve("a.txt"), "x");
+ // dryRun=false, dataSource=null triggers failure
+ final int code = new BulkBackfillRunner(
+ configDir, storageRoot, null, "system", 100, false, 10000, DEV_NULL
+ ).run();
+ MatcherAssert.assertThat(
+ "FAILED repo should set exit code to 1",
+ code,
+ Matchers.is(1)
+ );
+ }
+
+ /**
+ * PARSE_ERROR + FAILED in same run → exit code 1 (FAILED dominates).
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if file setup fails
+ */
+ @Test
+ void parseErrorPlusFailed(@TempDir final Path tmp) throws IOException {
+ final Path configDir = tmp.resolve("configs");
+ final Path storageRoot = tmp.resolve("data");
+ Files.createDirectories(configDir);
+ Files.writeString(configDir.resolve("bad.yaml"), "not: valid\n broken: [");
+ Files.writeString(configDir.resolve("willbreak.yaml"), "repo:\n type: file\n");
+ final Path breakRepo = storageRoot.resolve("willbreak");
+ Files.createDirectories(breakRepo);
+ Files.writeString(breakRepo.resolve("a.txt"), "x");
+ final int code = new BulkBackfillRunner(
+ configDir, storageRoot, null, "system", 100, false, 10000, DEV_NULL
+ ).run();
+ MatcherAssert.assertThat(
+ "PARSE_ERROR + FAILED should exit 1",
+ code,
+ Matchers.is(1)
+ );
+ }
+
+ // ── Edge cases ───────────────────────────────────────────────────────────
+
+ /**
+ * Subdirectories in config dir are ignored (non-recursive).
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if file setup fails
+ */
+ @Test
+ void subdirectoriesAreIgnored(@TempDir final Path tmp) throws IOException {
+ final Path configDir = tmp.resolve("configs");
+ final Path storageRoot = tmp.resolve("data");
+ Files.createDirectories(configDir);
+ // Subdirectory with a yaml inside — should not be processed
+ final Path subdir = configDir.resolve("subgroup");
+ Files.createDirectories(subdir);
+ Files.writeString(subdir.resolve("inner.yaml"), "repo:\n type: file\n");
+ // Valid top-level repo
+ Files.writeString(configDir.resolve("top.yaml"), "repo:\n type: file\n");
+ Files.createDirectories(storageRoot.resolve("top"));
+ final int code = runner(configDir, storageRoot, true).run();
+ MatcherAssert.assertThat(
+ "Subdirectories should be ignored, run exits 0",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ /**
+ * A .yml file (wrong extension) is skipped — not processed, run still succeeds.
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if file setup fails
+ */
+ @Test
+ void ymlExtensionIsSkipped(@TempDir final Path tmp) throws IOException {
+ final Path configDir = tmp.resolve("configs");
+ final Path storageRoot = tmp.resolve("data");
+ Files.createDirectories(configDir);
+ // .yml file should be silently skipped
+ Files.writeString(configDir.resolve("repo.yml"), "repo:\n type: file\n");
+ // Valid .yaml file
+ Files.writeString(configDir.resolve("valid.yaml"), "repo:\n type: file\n");
+ Files.createDirectories(storageRoot.resolve("valid"));
+ final int code = runner(configDir, storageRoot, true).run();
+ MatcherAssert.assertThat(
+ ".yml file should be skipped, run exits 0",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ /**
+ * Two repos with different names both succeed — verifies the seenNames set
+ * does not produce false-positive duplicate collisions.
+ *
+ *
Note: the filesystem guarantees unique filenames within a directory,
+ * so a true stem collision (two files producing the same stem) cannot
+ * occur in practice. The {@code seenNames} guard is a defensive measure.
+ * This test verifies the guard does not interfere with normal operation.
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if file setup fails
+ */
+ @Test
+ void twoDistinctReposDoNotCollide(@TempDir final Path tmp) throws IOException {
+ final Path configDir = tmp.resolve("configs");
+ final Path storageRoot = tmp.resolve("data");
+ Files.createDirectories(configDir);
+ Files.writeString(configDir.resolve("alpha.yaml"), "repo:\n type: file\n");
+ Files.writeString(configDir.resolve("beta.yaml"), "repo:\n type: file\n");
+ Files.createDirectories(storageRoot.resolve("alpha"));
+ Files.createDirectories(storageRoot.resolve("beta"));
+ final int code = runner(configDir, storageRoot, true).run();
+ MatcherAssert.assertThat(
+ "Two repos with distinct names should both succeed, exit 0",
+ code,
+ Matchers.is(0)
+ );
+ }
+
+ // ── Helper ───────────────────────────────────────────────────────────────
+
+ private static BulkBackfillRunner runner(
+ final Path configDir,
+ final Path storageRoot,
+ final boolean dryRun
+ ) {
+ return new BulkBackfillRunner(
+ configDir, storageRoot, null, "system", 1000, dryRun, 10000, DEV_NULL
+ );
+ }
+}
diff --git a/artipie-backfill/src/test/java/com/artipie/backfill/ComposerScannerTest.java b/artipie-backfill/src/test/java/com/artipie/backfill/ComposerScannerTest.java
new file mode 100644
index 000000000..a20a0f5d6
--- /dev/null
+++ b/artipie-backfill/src/test/java/com/artipie/backfill/ComposerScannerTest.java
@@ -0,0 +1,481 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * Tests for {@link ComposerScanner}.
+ *
+ * @since 1.20.13
+ */
+final class ComposerScannerTest {
+
+ @Test
+ void scansP2Layout(@TempDir final Path temp) throws IOException {
+ final Path vendorDir = temp.resolve("p2").resolve("vendor");
+ Files.createDirectories(vendorDir);
+ Files.writeString(
+ vendorDir.resolve("package.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"packages\": {",
+ " \"vendor/package\": {",
+ " \"1.0.0\": {",
+ " \"name\": \"vendor/package\",",
+ " \"version\": \"1.0.0\",",
+ " \"dist\": {",
+ " \"url\": \"https://example.com/vendor/package-1.0.0.zip\",",
+ " \"type\": \"zip\"",
+ " }",
+ " },",
+ " \"2.0.0\": {",
+ " \"name\": \"vendor/package\",",
+ " \"version\": \"2.0.0\",",
+ " \"dist\": {",
+ " \"url\": \"https://example.com/vendor/package-2.0.0.zip\",",
+ " \"type\": \"zip\"",
+ " }",
+ " }",
+ " }",
+ " }",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final ComposerScanner scanner = new ComposerScanner();
+ final List records = scanner.scan(temp, "composer-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 2 records for 2 versions",
+ records,
+ Matchers.hasSize(2)
+ );
+ MatcherAssert.assertThat(
+ "All records should have name vendor/package",
+ records.stream().allMatch(
+ r -> "vendor/package".equals(r.name())
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain version 1.0.0",
+ records.stream().anyMatch(r -> "1.0.0".equals(r.version())),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain version 2.0.0",
+ records.stream().anyMatch(r -> "2.0.0".equals(r.version())),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Repo type should be composer",
+ records.get(0).repoType(),
+ Matchers.is("composer")
+ );
+ }
+
+ @Test
+ void scansPackagesJsonLayout(@TempDir final Path temp) throws IOException {
+ Files.writeString(
+ temp.resolve("packages.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"packages\": {",
+ " \"acme/foo\": {",
+ " \"1.0.0\": {",
+ " \"name\": \"acme/foo\",",
+ " \"version\": \"1.0.0\",",
+ " \"dist\": {",
+ " \"url\": \"https://example.com/acme/foo-1.0.0.zip\",",
+ " \"type\": \"zip\"",
+ " }",
+ " }",
+ " },",
+ " \"acme/bar\": {",
+ " \"2.0.0\": {",
+ " \"name\": \"acme/bar\",",
+ " \"version\": \"2.0.0\",",
+ " \"dist\": {",
+ " \"url\": \"https://example.com/acme/bar-2.0.0.zip\",",
+ " \"type\": \"zip\"",
+ " }",
+ " }",
+ " }",
+ " }",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final ComposerScanner scanner = new ComposerScanner();
+ final List records = scanner.scan(temp, "composer-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 2 records for 2 packages",
+ records,
+ Matchers.hasSize(2)
+ );
+ MatcherAssert.assertThat(
+ "Should contain acme/foo",
+ records.stream().anyMatch(r -> "acme/foo".equals(r.name())),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain acme/bar",
+ records.stream().anyMatch(r -> "acme/bar".equals(r.name())),
+ Matchers.is(true)
+ );
+ }
+
+ @Test
+ void prefersP2OverPackagesJson(@TempDir final Path temp)
+ throws IOException {
+ final Path vendorDir = temp.resolve("p2").resolve("vendor");
+ Files.createDirectories(vendorDir);
+ Files.writeString(
+ vendorDir.resolve("lib.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"packages\": {",
+ " \"vendor/lib\": {",
+ " \"1.0.0\": {",
+ " \"name\": \"vendor/lib\",",
+ " \"version\": \"1.0.0\"",
+ " }",
+ " }",
+ " }",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ Files.writeString(
+ temp.resolve("packages.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"packages\": {",
+ " \"other/pkg\": {",
+ " \"3.0.0\": {",
+ " \"name\": \"other/pkg\",",
+ " \"version\": \"3.0.0\"",
+ " }",
+ " }",
+ " }",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final ComposerScanner scanner = new ComposerScanner();
+ final List records = scanner.scan(temp, "composer-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 1 record from p2 only",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Should contain vendor/lib from p2 layout",
+ records.get(0).name(),
+ Matchers.is("vendor/lib")
+ );
+ MatcherAssert.assertThat(
+ "Should NOT contain other/pkg from packages.json",
+ records.stream().noneMatch(r -> "other/pkg".equals(r.name())),
+ Matchers.is(true)
+ );
+ }
+
+ @Test
+ void handlesMissingPackagesKey(@TempDir final Path temp)
+ throws IOException {
+ final Path vendorDir = temp.resolve("p2").resolve("vendor");
+ Files.createDirectories(vendorDir);
+ Files.writeString(
+ vendorDir.resolve("nopackages.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"minified\": \"provider/latest\"",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final ComposerScanner scanner = new ComposerScanner();
+ final List records = scanner.scan(temp, "composer-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 0 records when packages key is missing",
+ records,
+ Matchers.empty()
+ );
+ }
+
+ @Test
+ void skipsDevJsonFiles(@TempDir final Path temp) throws IOException {
+ final Path vendorDir = temp.resolve("p2").resolve("vendor");
+ Files.createDirectories(vendorDir);
+ Files.writeString(
+ vendorDir.resolve("pkg~dev.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"packages\": {",
+ " \"vendor/pkg\": {",
+ " \"dev-master\": {",
+ " \"name\": \"vendor/pkg\",",
+ " \"version\": \"dev-master\"",
+ " }",
+ " }",
+ " }",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final ComposerScanner scanner = new ComposerScanner();
+ final List records = scanner.scan(temp, "composer-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 0 records when only ~dev.json files exist",
+ records,
+ Matchers.empty()
+ );
+ }
+
+ @Test
+ void handlesEmptyRoot(@TempDir final Path temp) throws IOException {
+ final ComposerScanner scanner = new ComposerScanner();
+ final List records = scanner.scan(temp, "composer-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 0 records for empty root",
+ records,
+ Matchers.empty()
+ );
+ }
+
+ @Test
+ void skipsEmptyPackagesJsonAndScansVendorDirs(@TempDir final Path temp)
+ throws IOException {
+ // packages.json exists but is 0 bytes (common in Artipie proxy repos)
+ Files.createFile(temp.resolve("packages.json"));
+ // vendor-dir layout files exist with real content
+ final Path vendorDir = temp.resolve("psr");
+ Files.createDirectories(vendorDir);
+ Files.writeString(
+ vendorDir.resolve("log.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"packages\": {",
+ " \"psr/log\": {",
+ " \"1.0.0\": {",
+ " \"name\": \"psr/log\",",
+ " \"version\": \"1.0.0\"",
+ " }",
+ " }",
+ " }",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final ComposerScanner scanner = new ComposerScanner();
+ final List records = scanner.scan(temp, "php-proxy")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should find 1 record from vendor-dir layout despite empty packages.json",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Record name should be psr/log",
+ records.get(0).name(),
+ Matchers.is("psr/log")
+ );
+ MatcherAssert.assertThat(
+ "Record version should be 1.0.0",
+ records.get(0).version(),
+ Matchers.is("1.0.0")
+ );
+ }
+
+ @Test
+ void scansVendorDirLayout(@TempDir final Path temp) throws IOException {
+ // Two vendor directories, multiple packages
+ final Path psr = temp.resolve("psr");
+ Files.createDirectories(psr);
+ Files.writeString(
+ psr.resolve("log.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"packages\": {",
+ " \"psr/log\": {",
+ " \"1.0.0\": { \"name\": \"psr/log\", \"version\": \"1.0.0\" },",
+ " \"2.0.0\": { \"name\": \"psr/log\", \"version\": \"2.0.0\" }",
+ " }",
+ " }",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ Files.writeString(
+ psr.resolve("http-message.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"packages\": {",
+ " \"psr/http-message\": {",
+ " \"1.1.0\": { \"name\": \"psr/http-message\", \"version\": \"1.1.0\" }",
+ " }",
+ " }",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final Path symfony = temp.resolve("symfony");
+ Files.createDirectories(symfony);
+ Files.writeString(
+ symfony.resolve("http-client.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"packages\": {",
+ " \"symfony/http-client\": {",
+ " \"6.4.0\": { \"name\": \"symfony/http-client\", \"version\": \"6.4.0\" }",
+ " }",
+ " }",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final ComposerScanner scanner = new ComposerScanner();
+ final List records = scanner.scan(temp, "php-proxy")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 4 records total (2 psr/log + 1 psr/http-message + 1 symfony/http-client)",
+ records,
+ Matchers.hasSize(4)
+ );
+ MatcherAssert.assertThat(
+ "Should contain psr/log",
+ records.stream().anyMatch(r -> "psr/log".equals(r.name())),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain psr/http-message",
+ records.stream().anyMatch(r -> "psr/http-message".equals(r.name())),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain symfony/http-client",
+ records.stream().anyMatch(r -> "symfony/http-client".equals(r.name())),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "All records should have composer repo type",
+ records.stream().allMatch(r -> "composer".equals(r.repoType())),
+ Matchers.is(true)
+ );
+ }
+
+ @Test
+ void skipsEmptyFilesInVendorDirLayout(@TempDir final Path temp)
+ throws IOException {
+ final Path psr = temp.resolve("psr");
+ Files.createDirectories(psr);
+ // One empty file (0 bytes) — should be skipped silently
+ Files.createFile(psr.resolve("log.json"));
+ // One non-empty file — should be scanned
+ Files.writeString(
+ psr.resolve("container.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"packages\": {",
+ " \"psr/container\": {",
+ " \"2.0.0\": { \"name\": \"psr/container\", \"version\": \"2.0.0\" }",
+ " }",
+ " }",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final ComposerScanner scanner = new ComposerScanner();
+ final List records = scanner.scan(temp, "php-proxy")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 1 record — empty file skipped, non-empty file scanned",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Record should be from psr/container (non-empty file)",
+ records.get(0).name(),
+ Matchers.is("psr/container")
+ );
+ }
+
+ @Test
+ void skipsDevJsonFilesInVendorDirLayout(@TempDir final Path temp)
+ throws IOException {
+ final Path openTelemetry = temp.resolve("open-telemetry");
+ Files.createDirectories(openTelemetry);
+ // dev file — should be skipped
+ Files.writeString(
+ openTelemetry.resolve("sem-conv~dev.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"packages\": {",
+ " \"open-telemetry/sem-conv\": {",
+ " \"dev-main\": { \"name\": \"open-telemetry/sem-conv\", \"version\": \"dev-main\" }",
+ " }",
+ " }",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ // stable file — should be scanned
+ Files.writeString(
+ openTelemetry.resolve("sem-conv.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"packages\": {",
+ " \"open-telemetry/sem-conv\": {",
+ " \"1.0.0\": { \"name\": \"open-telemetry/sem-conv\", \"version\": \"1.0.0\" }",
+ " }",
+ " }",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final ComposerScanner scanner = new ComposerScanner();
+ final List records = scanner.scan(temp, "php-proxy")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 1 record — ~dev.json file skipped",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Record version should be 1.0.0 (from stable file, not dev)",
+ records.get(0).version(),
+ Matchers.is("1.0.0")
+ );
+ }
+}
diff --git a/artipie-backfill/src/test/java/com/artipie/backfill/DebianScannerTest.java b/artipie-backfill/src/test/java/com/artipie/backfill/DebianScannerTest.java
new file mode 100644
index 000000000..6fa7aebad
--- /dev/null
+++ b/artipie-backfill/src/test/java/com/artipie/backfill/DebianScannerTest.java
@@ -0,0 +1,313 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.zip.GZIPOutputStream;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * Tests for {@link DebianScanner}.
+ *
+ * @since 1.20.13
+ */
+final class DebianScannerTest {
+
+ @Test
+ void parsesUncompressedPackagesFile(@TempDir final Path temp)
+ throws IOException {
+ final Path dir = temp.resolve("dists/focal/main/binary-amd64");
+ Files.createDirectories(dir);
+ Files.writeString(
+ dir.resolve("Packages"),
+ String.join(
+ "\n",
+ "Package: curl",
+ "Version: 7.68.0-1ubuntu2.6",
+ "Architecture: amd64",
+ "Size: 161672",
+ "Filename: pool/main/c/curl/curl_7.68.0-1ubuntu2.6_amd64.deb",
+ "",
+ "Package: wget",
+ "Version: 1.20.3-1ubuntu2",
+ "Architecture: amd64",
+ "Size: 345678",
+ "Filename: pool/main/w/wget/wget_1.20.3-1ubuntu2_amd64.deb",
+ ""
+ )
+ );
+ final DebianScanner scanner = new DebianScanner();
+ final List records = scanner.scan(temp, "deb-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce exactly 2 records",
+ records,
+ Matchers.hasSize(2)
+ );
+ MatcherAssert.assertThat(
+ "First record name should be curl_amd64",
+ records.stream().anyMatch(
+ r -> "curl_amd64".equals(r.name())
+ && "7.68.0-1ubuntu2.6".equals(r.version())
+ && r.size() == 161672L
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Second record name should be wget_amd64",
+ records.stream().anyMatch(
+ r -> "wget_amd64".equals(r.name())
+ && "1.20.3-1ubuntu2".equals(r.version())
+ && r.size() == 345678L
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Repo type should be deb",
+ records.get(0).repoType(),
+ Matchers.is("deb")
+ );
+ MatcherAssert.assertThat(
+ "Owner should be system",
+ records.get(0).owner(),
+ Matchers.is("system")
+ );
+ }
+
+ @Test
+ void parsesGzipCompressedPackagesFile(@TempDir final Path temp)
+ throws IOException {
+ final Path dir = temp.resolve("dists/focal/main/binary-amd64");
+ Files.createDirectories(dir);
+ final String content = String.join(
+ "\n",
+ "Package: nginx",
+ "Version: 1.18.0-0ubuntu1",
+ "Architecture: amd64",
+ "Size: 543210",
+ "",
+ "Package: apache2",
+ "Version: 2.4.41-4ubuntu3",
+ "Architecture: amd64",
+ "Size: 987654",
+ ""
+ );
+ final Path gzPath = dir.resolve("Packages.gz");
+ try (OutputStream fos = Files.newOutputStream(gzPath);
+ GZIPOutputStream gzos = new GZIPOutputStream(fos)) {
+ gzos.write(content.getBytes(StandardCharsets.UTF_8));
+ }
+ final DebianScanner scanner = new DebianScanner();
+ final List records = scanner.scan(temp, "deb-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce exactly 2 records from gzip file",
+ records,
+ Matchers.hasSize(2)
+ );
+ MatcherAssert.assertThat(
+ "Should contain nginx_amd64 record",
+ records.stream().anyMatch(
+ r -> "nginx_amd64".equals(r.name())
+ && "1.18.0-0ubuntu1".equals(r.version())
+ && r.size() == 543210L
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain apache2_amd64 record",
+ records.stream().anyMatch(
+ r -> "apache2_amd64".equals(r.name())
+ && "2.4.41-4ubuntu3".equals(r.version())
+ && r.size() == 987654L
+ ),
+ Matchers.is(true)
+ );
+ }
+
+ @Test
+ void defaultsSizeToZeroWhenMissing(@TempDir final Path temp)
+ throws IOException {
+ final Path dir = temp.resolve("dists/focal/main/binary-amd64");
+ Files.createDirectories(dir);
+ Files.writeString(
+ dir.resolve("Packages"),
+ String.join(
+ "\n",
+ "Package: nano",
+ "Version: 4.8-1ubuntu1",
+ "Architecture: amd64",
+ ""
+ )
+ );
+ final DebianScanner scanner = new DebianScanner();
+ final List records = scanner.scan(temp, "deb-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce exactly 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Size should default to 0 when missing",
+ records.get(0).size(),
+ Matchers.is(0L)
+ );
+ }
+
+ @Test
+ void skipsStanzasMissingPackageOrVersion(@TempDir final Path temp)
+ throws IOException {
+ final Path dir = temp.resolve("dists/focal/main/binary-amd64");
+ Files.createDirectories(dir);
+ Files.writeString(
+ dir.resolve("Packages"),
+ String.join(
+ "\n",
+ "Package: valid-pkg",
+ "Version: 1.0",
+ "Size: 100",
+ "",
+ "Version: 2.0",
+ "Size: 200",
+ "",
+ "Package: no-version",
+ "Size: 300",
+ "",
+ "Package: another-valid",
+ "Version: 3.0",
+ "Size: 400",
+ ""
+ )
+ );
+ final DebianScanner scanner = new DebianScanner();
+ final List records = scanner.scan(temp, "deb-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce exactly 2 records, skipping incomplete stanzas",
+ records,
+ Matchers.hasSize(2)
+ );
+ MatcherAssert.assertThat(
+ "Should contain valid-pkg",
+ records.stream().anyMatch(
+ r -> "valid-pkg".equals(r.name())
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain another-valid",
+ records.stream().anyMatch(
+ r -> "another-valid".equals(r.name())
+ ),
+ Matchers.is(true)
+ );
+ }
+
+ @Test
+ void handlesMultipleDistributionsAndComponents(@TempDir final Path temp)
+ throws IOException {
+ final Path focal = temp.resolve("dists/focal/main/binary-amd64");
+ Files.createDirectories(focal);
+ Files.writeString(
+ focal.resolve("Packages"),
+ String.join(
+ "\n",
+ "Package: focal-pkg",
+ "Version: 1.0",
+ "Size: 100",
+ ""
+ )
+ );
+ final Path bionic = temp.resolve("dists/bionic/contrib/binary-i386");
+ Files.createDirectories(bionic);
+ Files.writeString(
+ bionic.resolve("Packages"),
+ String.join(
+ "\n",
+ "Package: bionic-pkg",
+ "Version: 2.0",
+ "Size: 200",
+ ""
+ )
+ );
+ final DebianScanner scanner = new DebianScanner();
+ final List records = scanner.scan(temp, "deb-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce records from both distributions",
+ records,
+ Matchers.hasSize(2)
+ );
+ MatcherAssert.assertThat(
+ "Should contain focal-pkg",
+ records.stream().anyMatch(
+ r -> "focal-pkg".equals(r.name())
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain bionic-pkg",
+ records.stream().anyMatch(
+ r -> "bionic-pkg".equals(r.name())
+ ),
+ Matchers.is(true)
+ );
+ }
+
+ @Test
+ void returnsEmptyForEmptyDirectory(@TempDir final Path temp)
+ throws IOException {
+ final DebianScanner scanner = new DebianScanner();
+ final List records = scanner.scan(temp, "deb-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should return empty stream for empty directory",
+ records,
+ Matchers.empty()
+ );
+ }
+
+ @Test
+ void prefersPackagesGzOverPackages(@TempDir final Path temp)
+ throws IOException {
+ final Path dir = temp.resolve("dists/focal/main/binary-amd64");
+ Files.createDirectories(dir);
+ final String content = String.join(
+ "\n",
+ "Package: curl",
+ "Version: 7.68.0",
+ "Size: 100",
+ "",
+ "Package: wget",
+ "Version: 1.20.3",
+ "Size: 200",
+ ""
+ );
+ Files.writeString(dir.resolve("Packages"), content);
+ final Path gzPath = dir.resolve("Packages.gz");
+ try (OutputStream fos = Files.newOutputStream(gzPath);
+ GZIPOutputStream gzos = new GZIPOutputStream(fos)) {
+ gzos.write(content.getBytes(StandardCharsets.UTF_8));
+ }
+ final DebianScanner scanner = new DebianScanner();
+ final List records = scanner.scan(temp, "deb-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should not double-count when both Packages and Packages.gz exist",
+ records,
+ Matchers.hasSize(2)
+ );
+ }
+}
diff --git a/artipie-backfill/src/test/java/com/artipie/backfill/DockerScannerTest.java b/artipie-backfill/src/test/java/com/artipie/backfill/DockerScannerTest.java
new file mode 100644
index 000000000..a6fc1203f
--- /dev/null
+++ b/artipie-backfill/src/test/java/com/artipie/backfill/DockerScannerTest.java
@@ -0,0 +1,381 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * Tests for {@link DockerScanner}.
+ *
+ * @since 1.20.13
+ */
+final class DockerScannerTest {
+
+ @Test
+ void scansImageWithTag(@TempDir final Path temp) throws IOException {
+ final String digest = "sha256:abc123def456";
+ DockerScannerTest.createTagLink(temp, "nginx", "latest", digest);
+ final String manifest = String.join(
+ "\n",
+ "{",
+ " \"schemaVersion\": 2,",
+ " \"config\": { \"size\": 7023, \"digest\": \"sha256:config1\" },",
+ " \"layers\": [",
+ " { \"size\": 32654, \"digest\": \"sha256:layer1\" },",
+ " { \"size\": 73109, \"digest\": \"sha256:layer2\" }",
+ " ]",
+ "}"
+ );
+ DockerScannerTest.createBlob(temp, digest, manifest);
+ final DockerScanner scanner = new DockerScanner(true);
+ final List records = scanner.scan(temp, "docker-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce exactly 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ final ArtifactRecord record = records.get(0);
+ MatcherAssert.assertThat(
+ "Image name should be nginx",
+ record.name(),
+ Matchers.is("nginx")
+ );
+ MatcherAssert.assertThat(
+ "Version should be the tag name",
+ record.version(),
+ Matchers.is("latest")
+ );
+ MatcherAssert.assertThat(
+ "Size should be config + layers sum",
+ record.size(),
+ Matchers.is(7023L + 32654L + 73109L)
+ );
+ MatcherAssert.assertThat(
+ "Repo type should be docker-proxy",
+ record.repoType(),
+ Matchers.is("docker-proxy")
+ );
+ MatcherAssert.assertThat(
+ "Repo name should be docker-repo",
+ record.repoName(),
+ Matchers.is("docker-repo")
+ );
+ }
+
+ @Test
+ void scansMultipleTagsForImage(@TempDir final Path temp)
+ throws IOException {
+ final String digest1 = "sha256:aaa111bbb222";
+ final String digest2 = "sha256:ccc333ddd444";
+ DockerScannerTest.createTagLink(temp, "nginx", "latest", digest1);
+ DockerScannerTest.createTagLink(temp, "nginx", "1.25", digest2);
+ final String manifest1 = String.join(
+ "\n",
+ "{",
+ " \"schemaVersion\": 2,",
+ " \"config\": { \"size\": 1000, \"digest\": \"sha256:cfg1\" },",
+ " \"layers\": [",
+ " { \"size\": 2000, \"digest\": \"sha256:l1\" }",
+ " ]",
+ "}"
+ );
+ final String manifest2 = String.join(
+ "\n",
+ "{",
+ " \"schemaVersion\": 2,",
+ " \"config\": { \"size\": 500, \"digest\": \"sha256:cfg2\" },",
+ " \"layers\": [",
+ " { \"size\": 1500, \"digest\": \"sha256:l2\" }",
+ " ]",
+ "}"
+ );
+ DockerScannerTest.createBlob(temp, digest1, manifest1);
+ DockerScannerTest.createBlob(temp, digest2, manifest2);
+ final DockerScanner scanner = new DockerScanner();
+ final List records = scanner.scan(temp, "docker-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 2 records",
+ records,
+ Matchers.hasSize(2)
+ );
+ MatcherAssert.assertThat(
+ "Should contain 'latest' as version",
+ records.stream().anyMatch(
+ r -> "latest".equals(r.version())
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain '1.25' as version",
+ records.stream().anyMatch(
+ r -> "1.25".equals(r.version())
+ ),
+ Matchers.is(true)
+ );
+ final ArtifactRecord first = records.stream()
+ .filter(r -> "latest".equals(r.version()))
+ .findFirst().orElseThrow();
+ MatcherAssert.assertThat(
+ "latest tag size should be 3000",
+ first.size(),
+ Matchers.is(3000L)
+ );
+ final ArtifactRecord second = records.stream()
+ .filter(r -> "1.25".equals(r.version()))
+ .findFirst().orElseThrow();
+ MatcherAssert.assertThat(
+ "1.25 tag size should be 2000",
+ second.size(),
+ Matchers.is(2000L)
+ );
+ }
+
+ @Test
+ void handlesMissingBlob(@TempDir final Path temp) throws IOException {
+ final String digest = "sha256:deadbeef0000";
+ DockerScannerTest.createTagLink(temp, "alpine", "3.18", digest);
+ final DockerScanner scanner = new DockerScanner();
+ final List records = scanner.scan(temp, "docker-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 1 record even with missing blob",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Size should be 0 when blob is missing",
+ records.get(0).size(),
+ Matchers.is(0L)
+ );
+ }
+
+ @Test
+ void handlesManifestList(@TempDir final Path temp) throws IOException {
+ final String childDigest = "sha256:child111222333";
+ final String childManifest = String.join(
+ "\n",
+ "{",
+ " \"schemaVersion\": 2,",
+ " \"mediaType\": \"application/vnd.oci.image.manifest.v1+json\",",
+ " \"config\": { \"size\": 1504, \"digest\": \"sha256:cfgchild\" },",
+ " \"layers\": [",
+ " { \"size\": 28865120, \"digest\": \"sha256:layerchild\" }",
+ " ]",
+ "}"
+ );
+ DockerScannerTest.createBlob(temp, childDigest, childManifest);
+ final String attestDigest = "sha256:attest999888777";
+ final String attestManifest = String.join(
+ "\n",
+ "{",
+ " \"schemaVersion\": 2,",
+ " \"mediaType\": \"application/vnd.oci.image.manifest.v1+json\",",
+ " \"config\": { \"size\": 167, \"digest\": \"sha256:cfgattest\" },",
+ " \"layers\": [",
+ " { \"size\": 1331, \"digest\": \"sha256:layerattest\",",
+ " \"mediaType\": \"application/vnd.in-toto+json\" }",
+ " ]",
+ "}"
+ );
+ DockerScannerTest.createBlob(temp, attestDigest, attestManifest);
+ final String listDigest = "sha256:ffee00112233";
+ final String manifestList = String.join(
+ "\n",
+ "{",
+ " \"schemaVersion\": 2,",
+ " \"mediaType\": \"application/vnd.docker.distribution.manifest.list.v2+json\",",
+ " \"manifests\": [",
+ " { \"digest\": \"" + childDigest + "\", \"size\": 482,",
+ " \"platform\": { \"architecture\": \"amd64\", \"os\": \"linux\" } },",
+ " { \"digest\": \"" + attestDigest + "\", \"size\": 566,",
+ " \"platform\": { \"architecture\": \"unknown\", \"os\": \"unknown\" } }",
+ " ]",
+ "}"
+ );
+ DockerScannerTest.createTagLink(temp, "ubuntu", "22.04", listDigest);
+ DockerScannerTest.createBlob(temp, listDigest, manifestList);
+ final DockerScanner scanner = new DockerScanner();
+ final List records = scanner.scan(temp, "docker-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ final ArtifactRecord record = records.get(0);
+ MatcherAssert.assertThat(
+ "Size should be sum of ALL child manifests' layers and configs",
+ record.size(),
+ Matchers.is(1504L + 28865120L + 167L + 1331L)
+ );
+ }
+
+ @Test
+ void handlesNestedImageName(@TempDir final Path temp) throws IOException {
+ final String digest = "sha256:1122334455aa";
+ DockerScannerTest.createTagLink(temp, "library/redis", "7.0", digest);
+ final String manifest = String.join(
+ "\n",
+ "{",
+ " \"schemaVersion\": 2,",
+ " \"config\": { \"size\": 500, \"digest\": \"sha256:rcfg\" },",
+ " \"layers\": [",
+ " { \"size\": 10000, \"digest\": \"sha256:rl1\" }",
+ " ]",
+ "}"
+ );
+ DockerScannerTest.createBlob(temp, digest, manifest);
+ final DockerScanner scanner = new DockerScanner(true);
+ final List records = scanner.scan(temp, "docker-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Image name should include nested path",
+ records.get(0).name(),
+ Matchers.is("library/redis")
+ );
+ MatcherAssert.assertThat(
+ "Version should be the tag name",
+ records.get(0).version(),
+ Matchers.is("7.0")
+ );
+ }
+
+ @Test
+ void scansDockerRegistryV2Layout(@TempDir final Path temp)
+ throws IOException {
+ final String digest = "sha256:abcdef123456";
+ final Path v2 = temp.resolve("docker/registry/v2");
+ final Path linkDir = v2
+ .resolve("repositories/ubuntu/_manifests/tags/latest/current");
+ Files.createDirectories(linkDir);
+ Files.writeString(
+ linkDir.resolve("link"), digest, StandardCharsets.UTF_8
+ );
+ final String manifest = String.join(
+ "\n",
+ "{",
+ " \"schemaVersion\": 2,",
+ " \"config\": { \"size\": 2000, \"digest\": \"sha256:c1\" },",
+ " \"layers\": [",
+ " { \"size\": 50000, \"digest\": \"sha256:l1\" }",
+ " ]",
+ "}"
+ );
+ final String[] parts = digest.split(":", 2);
+ final Path blobDir = v2.resolve("blobs")
+ .resolve(parts[0])
+ .resolve(parts[1].substring(0, 2))
+ .resolve(parts[1]);
+ Files.createDirectories(blobDir);
+ Files.writeString(
+ blobDir.resolve("data"), manifest, StandardCharsets.UTF_8
+ );
+ final DockerScanner scanner = new DockerScanner(true);
+ final List records = scanner.scan(temp, "docker-cache")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should find image in docker/registry/v2 layout",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Image name should be ubuntu",
+ records.get(0).name(),
+ Matchers.is("ubuntu")
+ );
+ MatcherAssert.assertThat(
+ "Size should be config + layer",
+ records.get(0).size(),
+ Matchers.is(52000L)
+ );
+ }
+
+ @Test
+ void handlesMissingRepositoriesDir(@TempDir final Path temp)
+ throws IOException {
+ final DockerScanner scanner = new DockerScanner();
+ final List records = scanner.scan(temp, "docker-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 0 records when repositories dir is missing",
+ records,
+ Matchers.empty()
+ );
+ }
+
+ /**
+ * Create a tag link file in the Docker registry layout.
+ *
+ * @param root Root directory (contains repositories/ and blobs/)
+ * @param imageName Image name (e.g., "nginx" or "library/redis")
+ * @param tag Tag name (e.g., "latest")
+ * @param digest Digest string (e.g., "sha256:abc123")
+ * @throws IOException If an I/O error occurs
+ */
+ private static void createTagLink(final Path root,
+ final String imageName, final String tag, final String digest)
+ throws IOException {
+ final Path linkDir = root
+ .resolve("repositories")
+ .resolve(imageName)
+ .resolve("_manifests")
+ .resolve("tags")
+ .resolve(tag)
+ .resolve("current");
+ Files.createDirectories(linkDir);
+ Files.writeString(
+ linkDir.resolve("link"), digest, StandardCharsets.UTF_8
+ );
+ }
+
+ /**
+ * Create a blob data file for a given digest.
+ *
+ * @param root Root directory (contains repositories/ and blobs/)
+ * @param digest Digest string (e.g., "sha256:abc123def456")
+ * @param content Blob content (manifest JSON)
+ * @throws IOException If an I/O error occurs
+ */
+ private static void createBlob(final Path root, final String digest,
+ final String content) throws IOException {
+ final Path dataPath = DockerScannerTest.blobDataPath(root, digest);
+ Files.createDirectories(dataPath.getParent());
+ Files.writeString(dataPath, content, StandardCharsets.UTF_8);
+ }
+
+ /**
+ * Compute the blob data path for a given digest.
+ *
+ * @param root Root directory
+ * @param digest Digest string
+ * @return Path to the data file
+ */
+ private static Path blobDataPath(final Path root, final String digest) {
+ final String[] parts = digest.split(":", 2);
+ final String algorithm = parts[0];
+ final String hex = parts[1];
+ return root.resolve("blobs")
+ .resolve(algorithm)
+ .resolve(hex.substring(0, 2))
+ .resolve(hex)
+ .resolve("data");
+ }
+}
diff --git a/artipie-backfill/src/test/java/com/artipie/backfill/GemScannerTest.java b/artipie-backfill/src/test/java/com/artipie/backfill/GemScannerTest.java
new file mode 100644
index 000000000..57962e484
--- /dev/null
+++ b/artipie-backfill/src/test/java/com/artipie/backfill/GemScannerTest.java
@@ -0,0 +1,244 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * Tests for {@link GemScanner}.
+ *
+ * @since 1.20.13
+ */
+final class GemScannerTest {
+
+ @Test
+ void parsesSimpleGemFilename(@TempDir final Path temp) throws IOException {
+ final Path gems = temp.resolve("gems");
+ Files.createDirectories(gems);
+ Files.write(gems.resolve("rake-13.0.6.gem"), new byte[100]);
+ final GemScanner scanner = new GemScanner();
+ final List records = scanner.scan(temp, "gem-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce exactly 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ final ArtifactRecord record = records.get(0);
+ MatcherAssert.assertThat(
+ "Name should be rake",
+ record.name(),
+ Matchers.is("rake")
+ );
+ MatcherAssert.assertThat(
+ "Version should be 13.0.6",
+ record.version(),
+ Matchers.is("13.0.6")
+ );
+ MatcherAssert.assertThat(
+ "Size should be 100",
+ record.size(),
+ Matchers.is(100L)
+ );
+ MatcherAssert.assertThat(
+ "Repo type should be gem",
+ record.repoType(),
+ Matchers.is("gem")
+ );
+ MatcherAssert.assertThat(
+ "Owner should be system",
+ record.owner(),
+ Matchers.is("system")
+ );
+ }
+
+ @Test
+ void parsesGemWithHyphenatedName(@TempDir final Path temp)
+ throws IOException {
+ final Path gems = temp.resolve("gems");
+ Files.createDirectories(gems);
+ Files.write(gems.resolve("net-http-0.3.2.gem"), new byte[80]);
+ final GemScanner scanner = new GemScanner();
+ final List records = scanner.scan(temp, "gem-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce exactly 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Name should be net-http",
+ records.get(0).name(),
+ Matchers.is("net-http")
+ );
+ MatcherAssert.assertThat(
+ "Version should be 0.3.2",
+ records.get(0).version(),
+ Matchers.is("0.3.2")
+ );
+ }
+
+ @Test
+ void parsesGemWithPlatform(@TempDir final Path temp)
+ throws IOException {
+ final Path gems = temp.resolve("gems");
+ Files.createDirectories(gems);
+ Files.write(
+ gems.resolve("nokogiri-1.13.8-x86_64-linux.gem"),
+ new byte[200]
+ );
+ final GemScanner scanner = new GemScanner();
+ final List records = scanner.scan(temp, "gem-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce exactly 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Name should be nokogiri",
+ records.get(0).name(),
+ Matchers.is("nokogiri")
+ );
+ MatcherAssert.assertThat(
+ "Version should be 1.13.8",
+ records.get(0).version(),
+ Matchers.is("1.13.8")
+ );
+ }
+
+ @Test
+ void parsesGemWithMultipleHyphensInName(@TempDir final Path temp)
+ throws IOException {
+ final Path gems = temp.resolve("gems");
+ Files.createDirectories(gems);
+ Files.write(gems.resolve("ruby-ole-1.2.12.7.gem"), new byte[150]);
+ final GemScanner scanner = new GemScanner();
+ final List records = scanner.scan(temp, "gem-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce exactly 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Name should be ruby-ole",
+ records.get(0).name(),
+ Matchers.is("ruby-ole")
+ );
+ MatcherAssert.assertThat(
+ "Version should be 1.2.12.7",
+ records.get(0).version(),
+ Matchers.is("1.2.12.7")
+ );
+ }
+
+ @Test
+ void handlesMultipleGems(@TempDir final Path temp) throws IOException {
+ final Path gems = temp.resolve("gems");
+ Files.createDirectories(gems);
+ Files.write(gems.resolve("rails-7.0.4.gem"), new byte[300]);
+ Files.write(gems.resolve("rake-13.0.6.gem"), new byte[100]);
+ Files.write(
+ gems.resolve("activerecord-7.0.4.gem"), new byte[250]
+ );
+ final GemScanner scanner = new GemScanner();
+ final List records = scanner.scan(temp, "gem-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 3 records",
+ records,
+ Matchers.hasSize(3)
+ );
+ MatcherAssert.assertThat(
+ "Should contain rails",
+ records.stream().anyMatch(
+ r -> "rails".equals(r.name())
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain rake",
+ records.stream().anyMatch(
+ r -> "rake".equals(r.name())
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain activerecord",
+ records.stream().anyMatch(
+ r -> "activerecord".equals(r.name())
+ ),
+ Matchers.is(true)
+ );
+ }
+
+ @Test
+ void skipsNonGemFiles(@TempDir final Path temp) throws IOException {
+ final Path gems = temp.resolve("gems");
+ Files.createDirectories(gems);
+ Files.writeString(gems.resolve("readme.txt"), "hello");
+ Files.writeString(gems.resolve("notes.md"), "notes");
+ Files.write(gems.resolve("data.tar.gz"), new byte[50]);
+ final GemScanner scanner = new GemScanner();
+ final List records = scanner.scan(temp, "gem-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 0 records for non-gem files",
+ records,
+ Matchers.empty()
+ );
+ }
+
+ @Test
+ void returnsEmptyForEmptyDirectory(@TempDir final Path temp)
+ throws IOException {
+ final GemScanner scanner = new GemScanner();
+ final List records = scanner.scan(temp, "gem-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 0 records for empty directory",
+ records,
+ Matchers.empty()
+ );
+ }
+
+ @Test
+ void handlesGemsInRootDirectly(@TempDir final Path temp)
+ throws IOException {
+ Files.write(temp.resolve("rake-13.0.6.gem"), new byte[100]);
+ Files.write(temp.resolve("rails-7.0.4.gem"), new byte[200]);
+ final GemScanner scanner = new GemScanner();
+ final List records = scanner.scan(temp, "gem-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 2 records from root-level gems",
+ records,
+ Matchers.hasSize(2)
+ );
+ MatcherAssert.assertThat(
+ "Should contain rake",
+ records.stream().anyMatch(
+ r -> "rake".equals(r.name())
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain rails",
+ records.stream().anyMatch(
+ r -> "rails".equals(r.name())
+ ),
+ Matchers.is(true)
+ );
+ }
+}
diff --git a/artipie-backfill/src/test/java/com/artipie/backfill/GoScannerTest.java b/artipie-backfill/src/test/java/com/artipie/backfill/GoScannerTest.java
new file mode 100644
index 000000000..bb33611c4
--- /dev/null
+++ b/artipie-backfill/src/test/java/com/artipie/backfill/GoScannerTest.java
@@ -0,0 +1,327 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.time.Instant;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * Tests for {@link GoScanner}.
+ *
+ * @since 1.20.13
+ */
+final class GoScannerTest {
+
+ @Test
+ void scansModuleWithVersions(@TempDir final Path temp) throws IOException {
+ final Path atv = temp.resolve("example.com/foo/bar/@v");
+ Files.createDirectories(atv);
+ Files.writeString(
+ atv.resolve("list"),
+ "v1.0.0\nv1.1.0\n",
+ StandardCharsets.UTF_8
+ );
+ Files.writeString(
+ atv.resolve("v1.0.0.info"),
+ "{\"Version\":\"v1.0.0\",\"Time\":\"2024-01-15T10:30:00Z\"}",
+ StandardCharsets.UTF_8
+ );
+ Files.write(atv.resolve("v1.0.0.zip"), new byte[200]);
+ Files.writeString(
+ atv.resolve("v1.1.0.info"),
+ "{\"Version\":\"v1.1.0\",\"Time\":\"2024-02-20T14:00:00Z\"}",
+ StandardCharsets.UTF_8
+ );
+ Files.write(atv.resolve("v1.1.0.zip"), new byte[350]);
+ final GoScanner scanner = new GoScanner();
+ final List records = scanner.scan(temp, "go-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 2 records for 2 versions",
+ records,
+ Matchers.hasSize(2)
+ );
+ MatcherAssert.assertThat(
+ "All records should have module path example.com/foo/bar",
+ records.stream().allMatch(
+ r -> "example.com/foo/bar".equals(r.name())
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "All records should have repoType go",
+ records.stream().allMatch(r -> "go".equals(r.repoType())),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain version 1.0.0",
+ records.stream().anyMatch(r -> "1.0.0".equals(r.version())),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain version 1.1.0",
+ records.stream().anyMatch(r -> "1.1.0".equals(r.version())),
+ Matchers.is(true)
+ );
+ final ArtifactRecord first = records.stream()
+ .filter(r -> "1.0.0".equals(r.version()))
+ .findFirst()
+ .orElseThrow();
+ MatcherAssert.assertThat(
+ "v1.0.0 zip size should be 200",
+ first.size(),
+ Matchers.is(200L)
+ );
+ final ArtifactRecord second = records.stream()
+ .filter(r -> "1.1.0".equals(r.version()))
+ .findFirst()
+ .orElseThrow();
+ MatcherAssert.assertThat(
+ "v1.1.0 zip size should be 350",
+ second.size(),
+ Matchers.is(350L)
+ );
+ }
+
+ @Test
+ void handlesMissingZipFile(@TempDir final Path temp) throws IOException {
+ final Path atv = temp.resolve("example.com/lib/@v");
+ Files.createDirectories(atv);
+ Files.writeString(
+ atv.resolve("list"),
+ "v2.0.0\n",
+ StandardCharsets.UTF_8
+ );
+ Files.writeString(
+ atv.resolve("v2.0.0.info"),
+ "{\"Version\":\"v2.0.0\",\"Time\":\"2024-03-10T08:00:00Z\"}",
+ StandardCharsets.UTF_8
+ );
+ final GoScanner scanner = new GoScanner();
+ final List records = scanner.scan(temp, "go-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 0 records when zip is not cached",
+ records,
+ Matchers.empty()
+ );
+ }
+
+ @Test
+ void handlesMissingInfoFile(@TempDir final Path temp) throws IOException {
+ final Path atv = temp.resolve("example.com/noinfo/@v");
+ Files.createDirectories(atv);
+ Files.writeString(
+ atv.resolve("list"),
+ "v3.0.0\n",
+ StandardCharsets.UTF_8
+ );
+ Files.write(atv.resolve("v3.0.0.zip"), new byte[100]);
+ final GoScanner scanner = new GoScanner();
+ final List records = scanner.scan(temp, "go-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should still produce 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ final long listMtime = Files.readAttributes(
+ atv.resolve("list"), BasicFileAttributes.class
+ ).lastModifiedTime().toMillis();
+ MatcherAssert.assertThat(
+ "CreatedDate should fall back to list file mtime",
+ records.get(0).createdDate(),
+ Matchers.is(listMtime)
+ );
+ }
+
+ @Test
+ void parsesTimestampFromInfoFile(@TempDir final Path temp)
+ throws IOException {
+ final String timestamp = "2024-01-15T10:30:00Z";
+ final long expected = Instant.parse(timestamp).toEpochMilli();
+ final Path atv = temp.resolve("example.com/timed/@v");
+ Files.createDirectories(atv);
+ Files.writeString(
+ atv.resolve("list"),
+ "v1.0.0\n",
+ StandardCharsets.UTF_8
+ );
+ Files.writeString(
+ atv.resolve("v1.0.0.info"),
+ "{\"Version\":\"v1.0.0\",\"Time\":\"" + timestamp + "\"}",
+ StandardCharsets.UTF_8
+ );
+ Files.write(atv.resolve("v1.0.0.zip"), new byte[50]);
+ final GoScanner scanner = new GoScanner();
+ final List records = scanner.scan(temp, "go-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "CreatedDate should match the parsed Time field",
+ records.get(0).createdDate(),
+ Matchers.is(expected)
+ );
+ }
+
+ @Test
+ void skipsUncachedVersionsInListFile(@TempDir final Path temp)
+ throws IOException {
+ // List has v1.0.1–v1.0.4 but only v1.0.4 was actually downloaded
+ final Path atv = temp.resolve("gopkg.in/example/@v");
+ Files.createDirectories(atv);
+ Files.writeString(
+ atv.resolve("list"),
+ "v1.0.1\nv1.0.2\nv1.0.3\nv1.0.4\n",
+ StandardCharsets.UTF_8
+ );
+ for (final String ver : new String[]{"v1.0.1", "v1.0.2", "v1.0.3", "v1.0.4"}) {
+ Files.writeString(
+ atv.resolve(ver + ".info"),
+ "{\"Version\":\"" + ver + "\",\"Time\":\"2024-01-01T00:00:00Z\"}",
+ StandardCharsets.UTF_8
+ );
+ }
+ // Only v1.0.4 has a zip (actually cached)
+ Files.write(atv.resolve("v1.0.4.zip"), new byte[12345]);
+ final GoScanner scanner = new GoScanner();
+ final List records = scanner.scan(temp, "go-proxy")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should only index the one version that has a zip",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Only cached version 1.0.4 should be indexed",
+ records.get(0).version(),
+ Matchers.is("1.0.4")
+ );
+ MatcherAssert.assertThat(
+ "Size should reflect the zip file",
+ records.get(0).size(),
+ Matchers.is(12345L)
+ );
+ }
+
+ @Test
+ void handlesEmptyListFile(@TempDir final Path temp) throws IOException {
+ final Path atv = temp.resolve("example.com/empty/@v");
+ Files.createDirectories(atv);
+ Files.writeString(
+ atv.resolve("list"),
+ "",
+ StandardCharsets.UTF_8
+ );
+ final GoScanner scanner = new GoScanner();
+ final List records = scanner.scan(temp, "go-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Empty list file should produce 0 records",
+ records,
+ Matchers.empty()
+ );
+ }
+
+ @Test
+ void scansByInfoFilesWhenNoListFile(@TempDir final Path temp)
+ throws IOException {
+ // Proxy layout: only .info and .zip files, no list file
+ final Path atv = temp.resolve("example.com/proxy-mod/@v");
+ Files.createDirectories(atv);
+ Files.writeString(
+ atv.resolve("v1.0.0.info"),
+ "{\"Version\":\"v1.0.0\",\"Time\":\"2024-06-01T12:00:00Z\"}",
+ StandardCharsets.UTF_8
+ );
+ Files.write(atv.resolve("v1.0.0.zip"), new byte[300]);
+ Files.writeString(
+ atv.resolve("v1.1.0.info"),
+ "{\"Version\":\"v1.1.0\",\"Time\":\"2024-07-01T12:00:00Z\"}",
+ StandardCharsets.UTF_8
+ );
+ Files.write(atv.resolve("v1.1.0.zip"), new byte[400]);
+ final GoScanner scanner = new GoScanner();
+ final List records = scanner.scan(temp, "go-proxy")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should find 2 versions via .info files",
+ records,
+ Matchers.hasSize(2)
+ );
+ MatcherAssert.assertThat(
+ "All records should have module path example.com/proxy-mod",
+ records.stream().allMatch(
+ r -> "example.com/proxy-mod".equals(r.name())
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain version 1.0.0",
+ records.stream().anyMatch(r -> "1.0.0".equals(r.version())),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain version 1.1.0",
+ records.stream().anyMatch(r -> "1.1.0".equals(r.version())),
+ Matchers.is(true)
+ );
+ }
+
+ @Test
+ void handlesNestedModulePaths(@TempDir final Path temp)
+ throws IOException {
+ final Path atv = temp.resolve("github.com/org/project/v2/@v");
+ Files.createDirectories(atv);
+ Files.writeString(
+ atv.resolve("list"),
+ "v2.0.0\n",
+ StandardCharsets.UTF_8
+ );
+ Files.writeString(
+ atv.resolve("v2.0.0.info"),
+ "{\"Version\":\"v2.0.0\",\"Time\":\"2024-05-01T00:00:00Z\"}",
+ StandardCharsets.UTF_8
+ );
+ Files.write(atv.resolve("v2.0.0.zip"), new byte[500]);
+ final GoScanner scanner = new GoScanner();
+ final List records = scanner.scan(temp, "go-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 1 record for nested module",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Module path should be github.com/org/project/v2",
+ records.get(0).name(),
+ Matchers.is("github.com/org/project/v2")
+ );
+ MatcherAssert.assertThat(
+ "Version should be 2.0.0 (v prefix stripped)",
+ records.get(0).version(),
+ Matchers.is("2.0.0")
+ );
+ MatcherAssert.assertThat(
+ "Size should be 500",
+ records.get(0).size(),
+ Matchers.is(500L)
+ );
+ }
+}
diff --git a/artipie-backfill/src/test/java/com/artipie/backfill/HelmScannerTest.java b/artipie-backfill/src/test/java/com/artipie/backfill/HelmScannerTest.java
new file mode 100644
index 000000000..413838a2f
--- /dev/null
+++ b/artipie-backfill/src/test/java/com/artipie/backfill/HelmScannerTest.java
@@ -0,0 +1,219 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.OffsetDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * Tests for {@link HelmScanner}.
+ *
+ * @since 1.20.13
+ */
+final class HelmScannerTest {
+
+ @Test
+ void scansMultipleChartsWithVersions(@TempDir final Path temp)
+ throws IOException {
+ Files.writeString(
+ temp.resolve("index.yaml"),
+ String.join(
+ "\n",
+ "apiVersion: v1",
+ "entries:",
+ " tomcat:",
+ " - name: tomcat",
+ " version: 0.4.1",
+ " urls:",
+ " - tomcat-0.4.1.tgz",
+ " created: '2021-01-11T16:21:01.376598500+03:00'",
+ " redis:",
+ " - name: redis",
+ " version: 7.0.0",
+ " urls:",
+ " - redis-7.0.0.tgz",
+ " created: '2023-05-01T10:00:00+00:00'",
+ " - name: redis",
+ " version: 6.2.0",
+ " urls:",
+ " - redis-6.2.0.tgz",
+ " created: '2022-03-15T08:30:00+00:00'"
+ ),
+ StandardCharsets.UTF_8
+ );
+ Files.write(temp.resolve("tomcat-0.4.1.tgz"), new byte[1024]);
+ Files.write(temp.resolve("redis-7.0.0.tgz"), new byte[2048]);
+ Files.write(temp.resolve("redis-6.2.0.tgz"), new byte[512]);
+ final HelmScanner scanner = new HelmScanner();
+ final List records = scanner.scan(temp, "helm-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 3 records total",
+ records,
+ Matchers.hasSize(3)
+ );
+ MatcherAssert.assertThat(
+ "Should contain tomcat 0.4.1",
+ records.stream().anyMatch(
+ r -> "tomcat".equals(r.name()) && "0.4.1".equals(r.version())
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain redis 7.0.0",
+ records.stream().anyMatch(
+ r -> "redis".equals(r.name()) && "7.0.0".equals(r.version())
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain redis 6.2.0",
+ records.stream().anyMatch(
+ r -> "redis".equals(r.name()) && "6.2.0".equals(r.version())
+ ),
+ Matchers.is(true)
+ );
+ final ArtifactRecord tomcat = records.stream()
+ .filter(r -> "tomcat".equals(r.name()))
+ .findFirst().orElseThrow();
+ MatcherAssert.assertThat(
+ "Tomcat size should be 1024",
+ tomcat.size(),
+ Matchers.is(1024L)
+ );
+ final ArtifactRecord redis7 = records.stream()
+ .filter(r -> "7.0.0".equals(r.version()))
+ .findFirst().orElseThrow();
+ MatcherAssert.assertThat(
+ "Redis 7.0.0 size should be 2048",
+ redis7.size(),
+ Matchers.is(2048L)
+ );
+ final ArtifactRecord redis6 = records.stream()
+ .filter(r -> "6.2.0".equals(r.version()))
+ .findFirst().orElseThrow();
+ MatcherAssert.assertThat(
+ "Redis 6.2.0 size should be 512",
+ redis6.size(),
+ Matchers.is(512L)
+ );
+ MatcherAssert.assertThat(
+ "Repo type should be helm",
+ tomcat.repoType(),
+ Matchers.is("helm")
+ );
+ }
+
+ @Test
+ void handlesMissingTgzFile(@TempDir final Path temp)
+ throws IOException {
+ Files.writeString(
+ temp.resolve("index.yaml"),
+ String.join(
+ "\n",
+ "apiVersion: v1",
+ "entries:",
+ " nginx:",
+ " - name: nginx",
+ " version: 1.0.0",
+ " urls:",
+ " - nginx-1.0.0.tgz",
+ " created: '2023-01-01T00:00:00+00:00'"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final HelmScanner scanner = new HelmScanner();
+ final List records = scanner.scan(temp, "helm-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should still produce 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Size should be 0 when tgz is missing",
+ records.get(0).size(),
+ Matchers.is(0L)
+ );
+ }
+
+ @Test
+ void handlesMissingIndexYaml(@TempDir final Path temp)
+ throws IOException {
+ final HelmScanner scanner = new HelmScanner();
+ final List records = scanner.scan(temp, "helm-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 0 records when index.yaml is missing",
+ records,
+ Matchers.empty()
+ );
+ }
+
+ @Test
+ void handlesMissingEntriesKey(@TempDir final Path temp)
+ throws IOException {
+ Files.writeString(
+ temp.resolve("index.yaml"),
+ "apiVersion: v1\n",
+ StandardCharsets.UTF_8
+ );
+ final HelmScanner scanner = new HelmScanner();
+ final List records = scanner.scan(temp, "helm-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 0 records when entries is missing",
+ records,
+ Matchers.empty()
+ );
+ }
+
+ @Test
+ void parsesCreatedTimestamp(@TempDir final Path temp)
+ throws IOException {
+ final String timestamp = "2021-01-11T16:21:01.376598500+03:00";
+ final long expected = OffsetDateTime.parse(
+ timestamp, DateTimeFormatter.ISO_OFFSET_DATE_TIME
+ ).toInstant().toEpochMilli();
+ Files.writeString(
+ temp.resolve("index.yaml"),
+ String.join(
+ "\n",
+ "apiVersion: v1",
+ "entries:",
+ " mychart:",
+ " - name: mychart",
+ " version: 1.0.0",
+ " urls:",
+ " - mychart-1.0.0.tgz",
+ " created: '" + timestamp + "'"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final HelmScanner scanner = new HelmScanner();
+ final List records = scanner.scan(temp, "helm-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "CreatedDate should match the parsed timestamp",
+ records.get(0).createdDate(),
+ Matchers.is(expected)
+ );
+ }
+}
diff --git a/artipie-backfill/src/test/java/com/artipie/backfill/MavenScannerTest.java b/artipie-backfill/src/test/java/com/artipie/backfill/MavenScannerTest.java
new file mode 100644
index 000000000..647a61a26
--- /dev/null
+++ b/artipie-backfill/src/test/java/com/artipie/backfill/MavenScannerTest.java
@@ -0,0 +1,357 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * Tests for {@link MavenScanner}.
+ *
+ * @since 1.20.13
+ */
+final class MavenScannerTest {
+
+ @Test
+ void scansMultipleVersions(@TempDir final Path temp) throws IOException {
+ final Path v1 = temp.resolve("com/test/logger/1.0");
+ final Path v2 = temp.resolve("com/test/logger/2.0");
+ Files.createDirectories(v1);
+ Files.createDirectories(v2);
+ Files.write(v1.resolve("logger-1.0.jar"), new byte[100]);
+ Files.write(v1.resolve("logger-1.0.pom"), new byte[20]);
+ Files.write(v2.resolve("logger-2.0.jar"), new byte[200]);
+ Files.write(v2.resolve("logger-2.0.pom"), new byte[25]);
+ final MavenScanner scanner = new MavenScanner("maven");
+ final List records = scanner.scan(temp, "my-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 2 records",
+ records,
+ Matchers.hasSize(2)
+ );
+ final ArtifactRecord first = records.stream()
+ .filter(r -> "1.0".equals(r.version()))
+ .findFirst()
+ .orElseThrow();
+ MatcherAssert.assertThat(
+ "Name should be groupId.artifactId",
+ first.name(),
+ Matchers.is("com.test.logger")
+ );
+ MatcherAssert.assertThat(
+ "Size should be JAR size (100), not POM",
+ first.size(),
+ Matchers.is(100L)
+ );
+ MatcherAssert.assertThat(
+ "Repo type should be maven",
+ first.repoType(),
+ Matchers.is("maven")
+ );
+ final ArtifactRecord second = records.stream()
+ .filter(r -> "2.0".equals(r.version()))
+ .findFirst()
+ .orElseThrow();
+ MatcherAssert.assertThat(
+ "Size of version 2.0 jar should be 200",
+ second.size(),
+ Matchers.is(200L)
+ );
+ }
+
+ @Test
+ void handlesMultipleArtifacts(@TempDir final Path temp)
+ throws IOException {
+ final Path commonsDir = temp.resolve(
+ "org/apache/commons/commons-lang3/3.12.0"
+ );
+ Files.createDirectories(commonsDir);
+ Files.write(
+ commonsDir.resolve("commons-lang3-3.12.0.jar"), new byte[50]
+ );
+ Files.write(
+ commonsDir.resolve("commons-lang3-3.12.0.pom"), new byte[10]
+ );
+ final Path guavaDir = temp.resolve("com/google/guava/guava/31.0");
+ Files.createDirectories(guavaDir);
+ Files.write(guavaDir.resolve("guava-31.0.jar"), new byte[75]);
+ Files.write(guavaDir.resolve("guava-31.0.pom"), new byte[15]);
+ final MavenScanner scanner = new MavenScanner("maven");
+ final List records = scanner.scan(temp, "central")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should find records from both artifacts",
+ records,
+ Matchers.hasSize(2)
+ );
+ MatcherAssert.assertThat(
+ "Should contain commons-lang3",
+ records.stream()
+ .anyMatch(r -> "org.apache.commons.commons-lang3".equals(r.name())),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain guava",
+ records.stream()
+ .anyMatch(r -> "com.google.guava.guava".equals(r.name())),
+ Matchers.is(true)
+ );
+ }
+
+ @Test
+ void handlesWarFile(@TempDir final Path temp) throws IOException {
+ final Path ver = temp.resolve("com/test/webapp/1.0");
+ Files.createDirectories(ver);
+ Files.write(ver.resolve("webapp-1.0.war"), new byte[300]);
+ final MavenScanner scanner = new MavenScanner("maven");
+ final List records = scanner.scan(temp, "repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should find the war artifact",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "War file size should be 300",
+ records.get(0).size(),
+ Matchers.is(300L)
+ );
+ }
+
+ @Test
+ void gradleUsesCorrectRepoType(@TempDir final Path temp)
+ throws IOException {
+ final Path ver = temp.resolve("com/test/gradlelib/1.0");
+ Files.createDirectories(ver);
+ Files.write(ver.resolve("gradlelib-1.0.jar"), new byte[50]);
+ final MavenScanner scanner = new MavenScanner("gradle");
+ final List records = scanner.scan(temp, "repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce a record",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Repo type should be gradle",
+ records.get(0).repoType(),
+ Matchers.is("gradle")
+ );
+ }
+
+ @Test
+ void skipsSidecarFiles(@TempDir final Path temp) throws IOException {
+ final Path dir = temp.resolve("uk/co/datumedge/hamcrest-json/0.2");
+ Files.createDirectories(dir);
+ Files.write(dir.resolve("hamcrest-json-0.2.jar"), new byte[200]);
+ Files.write(dir.resolve("hamcrest-json-0.2.pom"), new byte[30]);
+ Files.writeString(dir.resolve("hamcrest-json-0.2.jar.sha1"), "hash");
+ Files.writeString(dir.resolve("hamcrest-json-0.2.jar.sha256"), "hash");
+ Files.writeString(dir.resolve("hamcrest-json-0.2.jar.md5"), "hash");
+ Files.writeString(
+ dir.resolve("hamcrest-json-0.2.jar.artipie-meta.json"), "{}"
+ );
+ final MavenScanner scanner = new MavenScanner("maven");
+ final List records = scanner.scan(temp, "proxy-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce exactly 1 deduplicated record",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Name should be uk.co.datumedge.hamcrest-json",
+ records.get(0).name(),
+ Matchers.is("uk.co.datumedge.hamcrest-json")
+ );
+ MatcherAssert.assertThat(
+ "Size should be JAR size (200), not POM (30)",
+ records.get(0).size(),
+ Matchers.is(200L)
+ );
+ }
+
+ @Test
+ void handlesPomOnlyArtifacts(@TempDir final Path temp)
+ throws IOException {
+ final Path dir = temp.resolve(
+ "com/fasterxml/jackson/jackson-bom/3.0.1"
+ );
+ Files.createDirectories(dir);
+ Files.write(dir.resolve("jackson-bom-3.0.1.pom"), new byte[80]);
+ Files.writeString(dir.resolve("jackson-bom-3.0.1.pom.sha1"), "hash");
+ final MavenScanner scanner = new MavenScanner("maven");
+ final List records = scanner.scan(temp, "proxy-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should find the POM-only artifact",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Name should be com.fasterxml.jackson.jackson-bom",
+ records.get(0).name(),
+ Matchers.is("com.fasterxml.jackson.jackson-bom")
+ );
+ MatcherAssert.assertThat(
+ "Size should be the POM size",
+ records.get(0).size(),
+ Matchers.is(80L)
+ );
+ }
+
+ @Test
+ void skipsMetadataXmlFiles(@TempDir final Path temp)
+ throws IOException {
+ // Plugin-level or artifact-level metadata files should not
+ // be indexed as artifacts themselves
+ final Path pluginDir = temp.resolve("com/example/maven/plugins");
+ Files.createDirectories(pluginDir);
+ Files.writeString(
+ pluginDir.resolve("maven-metadata.xml"),
+ ""
+ );
+ final Path artifactDir = temp.resolve(
+ "com/example/maven/plugins/my-plugin/1.0"
+ );
+ Files.createDirectories(artifactDir);
+ Files.write(artifactDir.resolve("my-plugin-1.0.jar"), new byte[150]);
+ Files.write(artifactDir.resolve("my-plugin-1.0.pom"), new byte[20]);
+ final MavenScanner scanner = new MavenScanner("maven");
+ final List records = scanner.scan(temp, "repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should only find the actual artifact, not the metadata XML",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Should be the plugin JAR",
+ records.get(0).name(),
+ Matchers.is("com.example.maven.plugins.my-plugin")
+ );
+ }
+
+ @Test
+ void handlesZipArtifactsWithSidecars(@TempDir final Path temp)
+ throws IOException {
+ final Path v1 = temp.resolve(
+ "com/auto1/aws/lambda/rackspace_swift_uploader_lambda/1.2.10"
+ );
+ final Path v2 = temp.resolve(
+ "com/auto1/aws/lambda/rackspace_swift_uploader_lambda/1.2.10-beta"
+ );
+ Files.createDirectories(v1);
+ Files.createDirectories(v2);
+ Files.write(
+ v1.resolve(
+ "rackspace_swift_uploader_lambda_1.2.10.zip"
+ ), new byte[400]
+ );
+ Files.writeString(
+ v1.resolve(
+ "rackspace_swift_uploader_lambda_1.2.10.zip.md5"
+ ), "hash"
+ );
+ Files.writeString(
+ v1.resolve(
+ "rackspace_swift_uploader_lambda_1.2.10.zip.sha1"
+ ), "hash"
+ );
+ Files.writeString(
+ v1.resolve(
+ "rackspace_swift_uploader_lambda_1.2.10.zip.sha256"
+ ), "hash"
+ );
+ Files.write(
+ v2.resolve(
+ "rackspace_swift_uploader_lambda_1.2.10-beta.zip"
+ ), new byte[350]
+ );
+ Files.writeString(
+ v2.resolve(
+ "rackspace_swift_uploader_lambda_1.2.10-beta.zip.md5"
+ ), "hash"
+ );
+ final MavenScanner scanner = new MavenScanner("gradle");
+ final List records = scanner.scan(temp, "ops")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should find 2 zip versions",
+ records,
+ Matchers.hasSize(2)
+ );
+ MatcherAssert.assertThat(
+ "Name should be fully qualified",
+ records.get(0).name(),
+ Matchers.is(
+ "com.auto1.aws.lambda:rackspace_swift_uploader_lambda"
+ )
+ );
+ MatcherAssert.assertThat(
+ "Repo type should be gradle",
+ records.get(0).repoType(),
+ Matchers.is("gradle")
+ );
+ final ArtifactRecord release = records.stream()
+ .filter(r -> "1.2.10".equals(r.version()))
+ .findFirst().orElseThrow();
+ MatcherAssert.assertThat(
+ "Release zip size should be 400",
+ release.size(),
+ Matchers.is(400L)
+ );
+ final ArtifactRecord beta = records.stream()
+ .filter(r -> "1.2.10-beta".equals(r.version()))
+ .findFirst().orElseThrow();
+ MatcherAssert.assertThat(
+ "Beta zip size should be 350",
+ beta.size(),
+ Matchers.is(350L)
+ );
+ }
+
+ @Test
+ void returnsEmptyForEmptyDirectory(@TempDir final Path temp)
+ throws IOException {
+ final MavenScanner scanner = new MavenScanner("maven");
+ final List records = scanner.scan(temp, "empty")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Empty directory should produce no records",
+ records,
+ Matchers.empty()
+ );
+ }
+
+ @Test
+ void deduplicatesJarAndPom(@TempDir final Path temp) throws IOException {
+ final Path dir = temp.resolve("com/test/lib/1.0");
+ Files.createDirectories(dir);
+ Files.write(dir.resolve("lib-1.0.jar"), new byte[500]);
+ Files.write(dir.resolve("lib-1.0.pom"), new byte[50]);
+ final MavenScanner scanner = new MavenScanner("maven");
+ final List records = scanner.scan(temp, "repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "JAR + POM should produce exactly 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Size should be JAR (500), not POM (50)",
+ records.get(0).size(),
+ Matchers.is(500L)
+ );
+ }
+}
diff --git a/artipie-backfill/src/test/java/com/artipie/backfill/NpmScannerTest.java b/artipie-backfill/src/test/java/com/artipie/backfill/NpmScannerTest.java
new file mode 100644
index 000000000..da41b7fc6
--- /dev/null
+++ b/artipie-backfill/src/test/java/com/artipie/backfill/NpmScannerTest.java
@@ -0,0 +1,475 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.Instant;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * Tests for {@link NpmScanner}.
+ *
+ * @since 1.20.13
+ */
+final class NpmScannerTest {
+
+ @Test
+ void scansUnscopedPackageWithVersionsDir(@TempDir final Path temp)
+ throws IOException {
+ final Path pkg = temp.resolve("simple-modal-window");
+ final Path versions = pkg.resolve(".versions");
+ final Path tgzDir = pkg.resolve("-/@platform");
+ Files.createDirectories(versions);
+ Files.createDirectories(tgzDir);
+ Files.writeString(
+ versions.resolve("0.0.2.json"), "{}", StandardCharsets.UTF_8
+ );
+ Files.writeString(
+ versions.resolve("0.0.3.json"), "{}", StandardCharsets.UTF_8
+ );
+ Files.write(
+ tgzDir.resolve("simple-modal-window-0.0.2.tgz"),
+ new byte[100]
+ );
+ Files.write(
+ tgzDir.resolve("simple-modal-window-0.0.3.tgz"),
+ new byte[200]
+ );
+ final NpmScanner scanner = new NpmScanner();
+ final List records = scanner.scan(temp, "npm-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 2 records",
+ records,
+ Matchers.hasSize(2)
+ );
+ MatcherAssert.assertThat(
+ "All records should have name simple-modal-window",
+ records.stream().allMatch(
+ r -> "simple-modal-window".equals(r.name())
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain version 0.0.2",
+ records.stream().anyMatch(
+ r -> "0.0.2".equals(r.version())
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain version 0.0.3",
+ records.stream().anyMatch(
+ r -> "0.0.3".equals(r.version())
+ ),
+ Matchers.is(true)
+ );
+ final ArtifactRecord v2 = records.stream()
+ .filter(r -> "0.0.2".equals(r.version()))
+ .findFirst().orElseThrow();
+ MatcherAssert.assertThat(
+ "Size of 0.0.2 should be 100",
+ v2.size(),
+ Matchers.is(100L)
+ );
+ final ArtifactRecord v3 = records.stream()
+ .filter(r -> "0.0.3".equals(r.version()))
+ .findFirst().orElseThrow();
+ MatcherAssert.assertThat(
+ "Size of 0.0.3 should be 200",
+ v3.size(),
+ Matchers.is(200L)
+ );
+ MatcherAssert.assertThat(
+ "Repo type should be npm",
+ v2.repoType(),
+ Matchers.is("npm")
+ );
+ }
+
+ @Test
+ void scansScopedPackageWithVersionsDir(@TempDir final Path temp)
+ throws IOException {
+ final Path pkg = temp.resolve("@ui-components/button");
+ final Path versions = pkg.resolve(".versions");
+ final Path tgzDir = pkg.resolve("-/@ui-components");
+ Files.createDirectories(versions);
+ Files.createDirectories(tgzDir);
+ Files.writeString(
+ versions.resolve("0.1.8.json"), "{}", StandardCharsets.UTF_8
+ );
+ Files.write(
+ tgzDir.resolve("button-0.1.8.tgz"), new byte[50]
+ );
+ final NpmScanner scanner = new NpmScanner();
+ final List records = scanner.scan(temp, "npm-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 1 record for scoped package",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Name should be @ui-components/button",
+ records.get(0).name(),
+ Matchers.is("@ui-components/button")
+ );
+ MatcherAssert.assertThat(
+ "Version should be 0.1.8",
+ records.get(0).version(),
+ Matchers.is("0.1.8")
+ );
+ MatcherAssert.assertThat(
+ "Size should be 50",
+ records.get(0).size(),
+ Matchers.is(50L)
+ );
+ }
+
+ @Test
+ void handlesPreReleaseVersions(@TempDir final Path temp)
+ throws IOException {
+ final Path pkg = temp.resolve("ssu-popup");
+ final Path versions = pkg.resolve(".versions");
+ final Path tgzDir = pkg.resolve("-/@platform");
+ Files.createDirectories(versions);
+ Files.createDirectories(tgzDir);
+ Files.writeString(
+ versions.resolve("0.0.1-dev.0.json"), "{}",
+ StandardCharsets.UTF_8
+ );
+ Files.writeString(
+ versions.resolve("0.0.1.json"), "{}", StandardCharsets.UTF_8
+ );
+ Files.writeString(
+ versions.resolve("1.0.1-dev.2.json"), "{}",
+ StandardCharsets.UTF_8
+ );
+ Files.write(
+ tgzDir.resolve("ssu-popup-0.0.1-dev.0.tgz"), new byte[30]
+ );
+ Files.write(
+ tgzDir.resolve("ssu-popup-0.0.1.tgz"), new byte[40]
+ );
+ Files.write(
+ tgzDir.resolve("ssu-popup-1.0.1-dev.2.tgz"), new byte[60]
+ );
+ final NpmScanner scanner = new NpmScanner();
+ final List records = scanner.scan(temp, "npm-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 3 records (including pre-release)",
+ records,
+ Matchers.hasSize(3)
+ );
+ MatcherAssert.assertThat(
+ "Should contain 0.0.1-dev.0",
+ records.stream().anyMatch(
+ r -> "0.0.1-dev.0".equals(r.version())
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain 1.0.1-dev.2",
+ records.stream().anyMatch(
+ r -> "1.0.1-dev.2".equals(r.version())
+ ),
+ Matchers.is(true)
+ );
+ }
+
+ @Test
+ void handlesMultiplePackages(@TempDir final Path temp)
+ throws IOException {
+ final Path pkg1 = temp.resolve("tracking");
+ final Path pkg2 = temp.resolve("str-formatter");
+ Files.createDirectories(pkg1.resolve(".versions"));
+ Files.createDirectories(pkg1.resolve("-/@platform"));
+ Files.createDirectories(pkg2.resolve(".versions"));
+ Files.createDirectories(pkg2.resolve("-/@platform"));
+ Files.writeString(
+ pkg1.resolve(".versions/0.0.1.json"), "{}",
+ StandardCharsets.UTF_8
+ );
+ Files.write(
+ pkg1.resolve("-/@platform/tracking-0.0.1.tgz"), new byte[80]
+ );
+ Files.writeString(
+ pkg2.resolve(".versions/0.0.2.json"), "{}",
+ StandardCharsets.UTF_8
+ );
+ Files.write(
+ pkg2.resolve("-/@platform/str-formatter-0.0.2.tgz"),
+ new byte[90]
+ );
+ final NpmScanner scanner = new NpmScanner();
+ final List records = scanner.scan(temp, "npm-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should find records from both packages",
+ records,
+ Matchers.hasSize(2)
+ );
+ MatcherAssert.assertThat(
+ "Should contain tracking",
+ records.stream().anyMatch(
+ r -> "tracking".equals(r.name())
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain str-formatter",
+ records.stream().anyMatch(
+ r -> "str-formatter".equals(r.name())
+ ),
+ Matchers.is(true)
+ );
+ }
+
+ @Test
+ void handlesMissingTgzInVersionsMode(@TempDir final Path temp)
+ throws IOException {
+ final Path pkg = temp.resolve("no-tgz");
+ Files.createDirectories(pkg.resolve(".versions"));
+ Files.writeString(
+ pkg.resolve(".versions/1.0.0.json"), "{}",
+ StandardCharsets.UTF_8
+ );
+ final NpmScanner scanner = new NpmScanner();
+ final List records = scanner.scan(temp, "npm-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should still produce 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Size should be 0 when tgz is missing",
+ records.get(0).size(),
+ Matchers.is(0L)
+ );
+ }
+
+ @Test
+ void fallsBackToMetaJson(@TempDir final Path temp) throws IOException {
+ final Path pkgDir = temp.resolve("lodash");
+ final Path tgzDir = temp.resolve("lodash/-");
+ Files.createDirectories(tgzDir);
+ Files.write(tgzDir.resolve("lodash-4.17.21.tgz"), new byte[12345]);
+ Files.writeString(
+ pkgDir.resolve("meta.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"name\": \"lodash\",",
+ " \"versions\": {",
+ " \"4.17.21\": {",
+ " \"name\": \"lodash\",",
+ " \"version\": \"4.17.21\",",
+ " \"dist\": {",
+ " \"tarball\": \"/lodash/-/lodash-4.17.21.tgz\"",
+ " }",
+ " }",
+ " }",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final NpmScanner scanner = new NpmScanner();
+ final List records = scanner.scan(temp, "npm-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 1 record via meta.json fallback",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Name should be lodash",
+ records.get(0).name(),
+ Matchers.is("lodash")
+ );
+ MatcherAssert.assertThat(
+ "Version should be 4.17.21",
+ records.get(0).version(),
+ Matchers.is("4.17.21")
+ );
+ MatcherAssert.assertThat(
+ "Size should reflect the tarball",
+ records.get(0).size(),
+ Matchers.is(12345L)
+ );
+ }
+
+ @Test
+ void skipsUncachedVersionsInMetaJson(@TempDir final Path temp)
+ throws IOException {
+ // meta.json lists 3 versions but only 1.0.11 tarball is on disk
+ final Path pkgDir = temp.resolve("pako");
+ final Path tgzDir = temp.resolve("pako/-");
+ Files.createDirectories(tgzDir);
+ Files.write(tgzDir.resolve("pako-1.0.11.tgz"), new byte[98765]);
+ Files.writeString(
+ pkgDir.resolve("meta.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"name\": \"pako\",",
+ " \"versions\": {",
+ " \"1.0.9\": {\"dist\":{\"tarball\":\"/pako/-/pako-1.0.9.tgz\"}},",
+ " \"1.0.10\": {\"dist\":{\"tarball\":\"/pako/-/pako-1.0.10.tgz\"}},",
+ " \"1.0.11\": {\"dist\":{\"tarball\":\"/pako/-/pako-1.0.11.tgz\"}}",
+ " }",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final NpmScanner scanner = new NpmScanner();
+ final List records = scanner.scan(temp, "npm-proxy")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should only index the one cached version",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Only version 1.0.11 should be indexed",
+ records.get(0).version(),
+ Matchers.is("1.0.11")
+ );
+ MatcherAssert.assertThat(
+ "Size should reflect the cached tarball",
+ records.get(0).size(),
+ Matchers.is(98765L)
+ );
+ }
+
+ @Test
+ void skipsMalformedMetaJson(@TempDir final Path temp)
+ throws IOException {
+ final Path pkgDir = temp.resolve("broken");
+ Files.createDirectories(pkgDir);
+ Files.writeString(
+ pkgDir.resolve("meta.json"),
+ "<<>>",
+ StandardCharsets.UTF_8
+ );
+ final NpmScanner scanner = new NpmScanner();
+ final List records = scanner.scan(temp, "npm-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Malformed JSON should produce 0 records",
+ records,
+ Matchers.empty()
+ );
+ }
+
+ @Test
+ void metaJsonUsesTimeField(@TempDir final Path temp)
+ throws IOException {
+ final String timestamp = "2023-06-15T12:30:00.000Z";
+ final long expected = Instant.parse(timestamp).toEpochMilli();
+ final Path pkgDir = temp.resolve("timed");
+ final Path tgzDir = temp.resolve("timed/-");
+ Files.createDirectories(tgzDir);
+ Files.write(tgzDir.resolve("timed-1.0.0.tgz"), new byte[100]);
+ Files.writeString(
+ pkgDir.resolve("meta.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"name\": \"timed\",",
+ " \"versions\": {",
+ " \"1.0.0\": {",
+ " \"name\": \"timed\",",
+ " \"version\": \"1.0.0\",",
+ " \"dist\": {",
+ " \"tarball\": \"/timed/-/timed-1.0.0.tgz\"",
+ " }",
+ " }",
+ " },",
+ " \"time\": {",
+ " \"1.0.0\": \"" + timestamp + "\"",
+ " }",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final NpmScanner scanner = new NpmScanner();
+ final List records = scanner.scan(temp, "npm-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "CreatedDate should match the parsed time field",
+ records.get(0).createdDate(),
+ Matchers.is(expected)
+ );
+ }
+
+ @Test
+ void scansScopedPackageWithMetaJson(@TempDir final Path temp)
+ throws IOException {
+ final Path pkgDir = temp.resolve("@hello/simple");
+ final Path tgzDir = temp.resolve("@hello/simple/-");
+ Files.createDirectories(tgzDir);
+ Files.write(tgzDir.resolve("simple-1.0.1.tgz"), new byte[200]);
+ Files.writeString(
+ pkgDir.resolve("meta.json"),
+ String.join(
+ "\n",
+ "{",
+ " \"name\": \"@hello/simple\",",
+ " \"versions\": {",
+ " \"1.0.1\": {",
+ " \"name\": \"@hello/simple\",",
+ " \"version\": \"1.0.1\",",
+ " \"dist\": {",
+ " \"tarball\": \"/@hello/simple/-/simple-1.0.1.tgz\"",
+ " }",
+ " }",
+ " }",
+ "}"
+ ),
+ StandardCharsets.UTF_8
+ );
+ final NpmScanner scanner = new NpmScanner();
+ final List records = scanner.scan(temp, "npm-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 1 record for scoped package via meta.json",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Name should be @hello/simple",
+ records.get(0).name(),
+ Matchers.is("@hello/simple")
+ );
+ }
+
+ @Test
+ void returnsEmptyForEmptyDirectory(@TempDir final Path temp)
+ throws IOException {
+ final NpmScanner scanner = new NpmScanner();
+ final List records = scanner.scan(temp, "npm-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Empty directory should produce no records",
+ records,
+ Matchers.empty()
+ );
+ }
+}
diff --git a/artipie-backfill/src/test/java/com/artipie/backfill/ProgressReporterTest.java b/artipie-backfill/src/test/java/com/artipie/backfill/ProgressReporterTest.java
new file mode 100644
index 000000000..da4d2c196
--- /dev/null
+++ b/artipie-backfill/src/test/java/com/artipie/backfill/ProgressReporterTest.java
@@ -0,0 +1,95 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests for {@link ProgressReporter}.
+ *
+ * @since 1.20.13
+ */
+final class ProgressReporterTest {
+
+ @Test
+ void incrementIncrementsScannedCount() {
+ final ProgressReporter reporter = new ProgressReporter(1000);
+ reporter.increment();
+ reporter.increment();
+ reporter.increment();
+ MatcherAssert.assertThat(
+ "Scanned count should reflect three increments",
+ reporter.getScanned(),
+ Matchers.is(3L)
+ );
+ }
+
+ @Test
+ void getScannedReturnsZeroInitially() {
+ final ProgressReporter reporter = new ProgressReporter(100);
+ MatcherAssert.assertThat(
+ "Initial scanned count should be zero",
+ reporter.getScanned(),
+ Matchers.is(0L)
+ );
+ }
+
+ @Test
+ void recordErrorIncrementsErrorCount() {
+ final ProgressReporter reporter = new ProgressReporter(100);
+ reporter.recordError();
+ reporter.recordError();
+ MatcherAssert.assertThat(
+ "Error count should reflect two errors",
+ reporter.getErrors(),
+ Matchers.is(2L)
+ );
+ }
+
+ @Test
+ void errorsStartAtZero() {
+ final ProgressReporter reporter = new ProgressReporter(100);
+ MatcherAssert.assertThat(
+ "Initial error count should be zero",
+ reporter.getErrors(),
+ Matchers.is(0L)
+ );
+ }
+
+ @Test
+ void incrementAndErrorsAreIndependent() {
+ final ProgressReporter reporter = new ProgressReporter(100);
+ reporter.increment();
+ reporter.increment();
+ reporter.recordError();
+ MatcherAssert.assertThat(
+ "Scanned should be 2",
+ reporter.getScanned(),
+ Matchers.is(2L)
+ );
+ MatcherAssert.assertThat(
+ "Errors should be 1",
+ reporter.getErrors(),
+ Matchers.is(1L)
+ );
+ }
+
+ @Test
+ void printFinalSummaryDoesNotThrow() {
+ final ProgressReporter reporter = new ProgressReporter(10);
+ for (int idx = 0; idx < 25; idx++) {
+ reporter.increment();
+ }
+ reporter.recordError();
+ reporter.printFinalSummary();
+ MatcherAssert.assertThat(
+ "Scanned should be 25 after summary",
+ reporter.getScanned(),
+ Matchers.is(25L)
+ );
+ }
+}
diff --git a/artipie-backfill/src/test/java/com/artipie/backfill/PypiScannerTest.java b/artipie-backfill/src/test/java/com/artipie/backfill/PypiScannerTest.java
new file mode 100644
index 000000000..a454cb494
--- /dev/null
+++ b/artipie-backfill/src/test/java/com/artipie/backfill/PypiScannerTest.java
@@ -0,0 +1,317 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * Tests for {@link PypiScanner}.
+ *
+ * @since 1.20.13
+ */
+final class PypiScannerTest {
+
+ @Test
+ void parsesWheelFilename(@TempDir final Path temp) throws IOException {
+ final Path pkgDir = temp.resolve("my-package");
+ Files.createDirectories(pkgDir);
+ Files.write(
+ pkgDir.resolve("my_package-1.0.0-py3-none-any.whl"),
+ new byte[50]
+ );
+ final PypiScanner scanner = new PypiScanner();
+ final List records = scanner.scan(temp, "pypi-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce exactly 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ final ArtifactRecord record = records.get(0);
+ MatcherAssert.assertThat(
+ "Name should be normalized to my-package",
+ record.name(),
+ Matchers.is("my-package")
+ );
+ MatcherAssert.assertThat(
+ "Version should be 1.0.0",
+ record.version(),
+ Matchers.is("1.0.0")
+ );
+ MatcherAssert.assertThat(
+ "Size should be 50",
+ record.size(),
+ Matchers.is(50L)
+ );
+ MatcherAssert.assertThat(
+ "Repo type should be pypi",
+ record.repoType(),
+ Matchers.is("pypi")
+ );
+ }
+
+ @Test
+ void parsesSdistTarGz(@TempDir final Path temp) throws IOException {
+ final Path pkgDir = temp.resolve("requests");
+ Files.createDirectories(pkgDir);
+ Files.write(
+ pkgDir.resolve("requests-2.28.0.tar.gz"),
+ new byte[100]
+ );
+ final PypiScanner scanner = new PypiScanner();
+ final List records = scanner.scan(temp, "pypi-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce exactly 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ final ArtifactRecord record = records.get(0);
+ MatcherAssert.assertThat(
+ "Name should be requests",
+ record.name(),
+ Matchers.is("requests")
+ );
+ MatcherAssert.assertThat(
+ "Version should be 2.28.0",
+ record.version(),
+ Matchers.is("2.28.0")
+ );
+ }
+
+ @Test
+ void parsesSdistZip(@TempDir final Path temp) throws IOException {
+ final Path pkgDir = temp.resolve("foo");
+ Files.createDirectories(pkgDir);
+ Files.write(
+ pkgDir.resolve("foo-3.0.zip"),
+ new byte[75]
+ );
+ final PypiScanner scanner = new PypiScanner();
+ final List records = scanner.scan(temp, "pypi-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce exactly 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ final ArtifactRecord record = records.get(0);
+ MatcherAssert.assertThat(
+ "Name should be foo",
+ record.name(),
+ Matchers.is("foo")
+ );
+ MatcherAssert.assertThat(
+ "Version should be 3.0",
+ record.version(),
+ Matchers.is("3.0")
+ );
+ MatcherAssert.assertThat(
+ "Size should be 75",
+ record.size(),
+ Matchers.is(75L)
+ );
+ }
+
+ @Test
+ void normalizesPackageName(@TempDir final Path temp)
+ throws IOException {
+ final Path pkgDir = temp.resolve("My_Package");
+ Files.createDirectories(pkgDir);
+ Files.write(
+ pkgDir.resolve("My_Package-2.0.0-py3-none-any.whl"),
+ new byte[30]
+ );
+ final PypiScanner scanner = new PypiScanner();
+ final List records = scanner.scan(temp, "pypi-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce exactly 1 record",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Name should be normalized to my-package",
+ records.get(0).name(),
+ Matchers.is("my-package")
+ );
+ }
+
+ @Test
+ void skipsNonConformingFilenames(@TempDir final Path temp)
+ throws IOException {
+ final Path dataDir = temp.resolve("data");
+ Files.createDirectories(dataDir);
+ Files.writeString(dataDir.resolve("readme.txt"), "hello");
+ Files.writeString(dataDir.resolve("notes.md"), "notes");
+ final PypiScanner scanner = new PypiScanner();
+ final List records = scanner.scan(temp, "pypi-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 0 records for non-conforming files",
+ records,
+ Matchers.empty()
+ );
+ }
+
+ @Test
+ void handlesMultipleVersions(@TempDir final Path temp)
+ throws IOException {
+ final Path pkgDir = temp.resolve("flask");
+ Files.createDirectories(pkgDir);
+ Files.write(
+ pkgDir.resolve("flask-2.0.0-py3-none-any.whl"),
+ new byte[40]
+ );
+ Files.write(
+ pkgDir.resolve("flask-2.1.0.tar.gz"),
+ new byte[60]
+ );
+ Files.write(
+ pkgDir.resolve("flask-2.2.0.zip"),
+ new byte[80]
+ );
+ final PypiScanner scanner = new PypiScanner();
+ final List records = scanner.scan(temp, "pypi-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should produce 3 records for multiple versions",
+ records,
+ Matchers.hasSize(3)
+ );
+ MatcherAssert.assertThat(
+ "Should contain version 2.0.0",
+ records.stream().anyMatch(
+ r -> "2.0.0".equals(r.version())
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain version 2.1.0",
+ records.stream().anyMatch(
+ r -> "2.1.0".equals(r.version())
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain version 2.2.0",
+ records.stream().anyMatch(
+ r -> "2.2.0".equals(r.version())
+ ),
+ Matchers.is(true)
+ );
+ }
+
+ @Test
+ void skipsHiddenFiles(@TempDir final Path temp) throws IOException {
+ final Path pkgDir = temp.resolve("hidden");
+ Files.createDirectories(pkgDir);
+ Files.write(
+ pkgDir.resolve(".hidden-1.0.0.tar.gz"),
+ new byte[20]
+ );
+ final PypiScanner scanner = new PypiScanner();
+ final List records = scanner.scan(temp, "pypi-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should skip hidden files",
+ records,
+ Matchers.empty()
+ );
+ }
+
+ @Test
+ void scansVersionedSubdirectoryLayout(@TempDir final Path temp)
+ throws IOException {
+ // Real Artipie PyPI layout: package-name/version/file
+ final Path v100 = temp.resolve("dnssec-validator/1.0.0");
+ final Path v101 = temp.resolve("dnssec-validator/1.0.1");
+ Files.createDirectories(v100);
+ Files.createDirectories(v101);
+ Files.write(
+ v100.resolve("dnssec_validator-1.0.0-py3-none-any.whl"),
+ new byte[30]
+ );
+ Files.write(
+ v100.resolve("dnssec_validator-1.0.0.tar.gz"),
+ new byte[40]
+ );
+ Files.write(
+ v101.resolve("dnssec_validator-1.0.1-py3-none-any.whl"),
+ new byte[50]
+ );
+ Files.write(
+ v101.resolve("dnssec_validator-1.0.1.tar.gz"),
+ new byte[60]
+ );
+ final PypiScanner scanner = new PypiScanner();
+ final List records = scanner.scan(temp, "pypi-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should find all 4 files in versioned subdirs",
+ records,
+ Matchers.hasSize(4)
+ );
+ MatcherAssert.assertThat(
+ "Should contain version 1.0.0",
+ records.stream().anyMatch(
+ r -> "1.0.0".equals(r.version())
+ ),
+ Matchers.is(true)
+ );
+ MatcherAssert.assertThat(
+ "Should contain version 1.0.1",
+ records.stream().anyMatch(
+ r -> "1.0.1".equals(r.version())
+ ),
+ Matchers.is(true)
+ );
+ }
+
+ @Test
+ void skipsHiddenDirectories(@TempDir final Path temp)
+ throws IOException {
+ // Real Artipie layout has .meta and .pypi hidden dirs
+ final Path metaDir = temp.resolve(".meta/pypi/shards/pkg/1.0.0");
+ final Path pypiDir = temp.resolve(".pypi/pkg");
+ final Path realDir = temp.resolve("pkg/1.0.0");
+ Files.createDirectories(metaDir);
+ Files.createDirectories(pypiDir);
+ Files.createDirectories(realDir);
+ Files.write(
+ metaDir.resolve("pkg-1.0.0-py3-none-any.whl.json"),
+ "{}".getBytes()
+ );
+ Files.writeString(
+ pypiDir.resolve("pkg.html"),
+ ""
+ );
+ Files.write(
+ realDir.resolve("pkg-1.0.0-py3-none-any.whl"),
+ new byte[25]
+ );
+ final PypiScanner scanner = new PypiScanner();
+ final List records = scanner.scan(temp, "pypi-repo")
+ .collect(Collectors.toList());
+ MatcherAssert.assertThat(
+ "Should only find the real whl, not files in hidden dirs",
+ records,
+ Matchers.hasSize(1)
+ );
+ MatcherAssert.assertThat(
+ "Name should be pkg",
+ records.get(0).name(),
+ Matchers.is("pkg")
+ );
+ }
+}
diff --git a/artipie-backfill/src/test/java/com/artipie/backfill/RepoConfigYamlTest.java b/artipie-backfill/src/test/java/com/artipie/backfill/RepoConfigYamlTest.java
new file mode 100644
index 000000000..4134cf7f2
--- /dev/null
+++ b/artipie-backfill/src/test/java/com/artipie/backfill/RepoConfigYamlTest.java
@@ -0,0 +1,137 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * Tests for {@link RepoConfigYaml}.
+ *
+ * @since 1.20.13
+ */
+final class RepoConfigYamlTest {
+
+ /**
+ * Happy path: a well-formed config file is parsed correctly.
+ * Repo name is derived from the filename stem; rawType from repo.type.
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if file creation fails
+ */
+ @Test
+ void parsesValidConfig(@TempDir final Path tmp) throws IOException {
+ final Path file = tmp.resolve("go.yaml");
+ Files.writeString(file, "repo:\n type: go\n");
+ final RepoEntry entry = RepoConfigYaml.parse(file);
+ MatcherAssert.assertThat(
+ "repoName should be the filename stem",
+ entry.repoName(),
+ Matchers.is("go")
+ );
+ MatcherAssert.assertThat(
+ "rawType should match repo.type in YAML",
+ entry.rawType(),
+ Matchers.is("go")
+ );
+ }
+
+ /**
+ * Proxy type is preserved as-is (normalisation is done by RepoTypeNormalizer).
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if file creation fails
+ */
+ @Test
+ void parsesProxyType(@TempDir final Path tmp) throws IOException {
+ final Path file = tmp.resolve("docker_proxy.yaml");
+ Files.writeString(file, "repo:\n type: docker-proxy\n");
+ final RepoEntry entry = RepoConfigYaml.parse(file);
+ MatcherAssert.assertThat(
+ "rawType should be preserved without normalisation",
+ entry.rawType(),
+ Matchers.is("docker-proxy")
+ );
+ MatcherAssert.assertThat(
+ "repoName should match filename stem",
+ entry.repoName(),
+ Matchers.is("docker_proxy")
+ );
+ }
+
+ /**
+ * Missing {@code repo.type} key must throw {@link IOException}.
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if file creation fails
+ */
+ @Test
+ void throwsWhenRepoTypeMissing(@TempDir final Path tmp) throws IOException {
+ final Path file = tmp.resolve("bad.yaml");
+ Files.writeString(file, "repo:\n storage:\n type: fs\n");
+ Assertions.assertThrows(
+ IOException.class,
+ () -> RepoConfigYaml.parse(file),
+ "Missing repo.type should throw IOException"
+ );
+ }
+
+ /**
+ * Malformed YAML (not parseable) must throw {@link IOException}.
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if file creation fails
+ */
+ @Test
+ void throwsOnMalformedYaml(@TempDir final Path tmp) throws IOException {
+ final Path file = tmp.resolve("broken.yaml");
+ Files.writeString(file, "repo: [\nunclosed bracket\n");
+ Assertions.assertThrows(
+ IOException.class,
+ () -> RepoConfigYaml.parse(file),
+ "Malformed YAML should throw IOException"
+ );
+ }
+
+ /**
+ * Empty YAML file must throw {@link IOException}.
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if file creation fails
+ */
+ @Test
+ void throwsOnEmptyFile(@TempDir final Path tmp) throws IOException {
+ final Path file = tmp.resolve("empty.yaml");
+ Files.writeString(file, "");
+ Assertions.assertThrows(
+ IOException.class,
+ () -> RepoConfigYaml.parse(file),
+ "Empty YAML should throw IOException"
+ );
+ }
+
+ /**
+ * YAML with additional fields alongside repo.type parses without error.
+ *
+ * @param tmp JUnit temp directory
+ * @throws IOException if file creation fails
+ */
+ @Test
+ void toleratesExtraFields(@TempDir final Path tmp) throws IOException {
+ final Path file = tmp.resolve("npm.yaml");
+ Files.writeString(
+ file,
+ "repo:\n type: npm\n url: http://example.com\n storage:\n type: fs\n path: /data\n"
+ );
+ final RepoEntry entry = RepoConfigYaml.parse(file);
+ MatcherAssert.assertThat(entry.rawType(), Matchers.is("npm"));
+ }
+}
diff --git a/artipie-backfill/src/test/java/com/artipie/backfill/RepoTypeNormalizerTest.java b/artipie-backfill/src/test/java/com/artipie/backfill/RepoTypeNormalizerTest.java
new file mode 100644
index 000000000..bcde8c80f
--- /dev/null
+++ b/artipie-backfill/src/test/java/com/artipie/backfill/RepoTypeNormalizerTest.java
@@ -0,0 +1,37 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
+
+/**
+ * Tests for {@link RepoTypeNormalizer}.
+ *
+ * @since 1.20.13
+ */
+final class RepoTypeNormalizerTest {
+
+ @ParameterizedTest
+ @CsvSource({
+ "docker-proxy, docker",
+ "npm-proxy, npm",
+ "maven-proxy, maven",
+ "go-proxy, go",
+ "maven, maven",
+ "docker, docker",
+ "file, file",
+ "go, go"
+ })
+ void normalizesType(final String raw, final String expected) {
+ MatcherAssert.assertThat(
+ String.format("normalize('%s') should return '%s'", raw, expected),
+ RepoTypeNormalizer.normalize(raw),
+ Matchers.is(expected.trim())
+ );
+ }
+}
diff --git a/artipie-backfill/src/test/java/com/artipie/backfill/ScannerFactoryTest.java b/artipie-backfill/src/test/java/com/artipie/backfill/ScannerFactoryTest.java
new file mode 100644
index 000000000..c72475f91
--- /dev/null
+++ b/artipie-backfill/src/test/java/com/artipie/backfill/ScannerFactoryTest.java
@@ -0,0 +1,99 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.backfill;
+
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+/**
+ * Tests for {@link ScannerFactory}.
+ *
+ * @since 1.20.13
+ */
+final class ScannerFactoryTest {
+
+ @ParameterizedTest
+ @ValueSource(strings = {
+ "maven", "gradle", "docker", "npm", "pypi",
+ "go", "helm", "composer", "php", "file",
+ "deb", "debian", "gem", "gems",
+ "maven-proxy", "gradle-proxy", "docker-proxy",
+ "npm-proxy", "pypi-proxy", "go-proxy",
+ "helm-proxy", "php-proxy", "file-proxy",
+ "deb-proxy", "debian-proxy", "gem-proxy"
+ })
+ void createsNonNullScannerForKnownTypes(final String type) {
+ MatcherAssert.assertThat(
+ String.format("Scanner for type '%s' must not be null", type),
+ ScannerFactory.create(type),
+ Matchers.notNullValue()
+ );
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {
+ "MAVEN", "Docker", "NPM", "PyPi", "HELM"
+ })
+ void handlesUpperCaseTypes(final String type) {
+ MatcherAssert.assertThat(
+ String.format("Scanner for type '%s' (case-insensitive) must not be null", type),
+ ScannerFactory.create(type),
+ Matchers.notNullValue()
+ );
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"unknown", "svn", ""})
+ void throwsForUnknownType(final String type) {
+ Assertions.assertThrows(
+ IllegalArgumentException.class,
+ () -> ScannerFactory.create(type),
+ String.format("Expected IllegalArgumentException for unknown type '%s'", type)
+ );
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"maven", "gradle"})
+ void mavenAndGradleReturnMavenScanner(final String type) {
+ MatcherAssert.assertThat(
+ String.format("Type '%s' should produce a MavenScanner", type),
+ ScannerFactory.create(type),
+ Matchers.instanceOf(MavenScanner.class)
+ );
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"composer", "php"})
+ void composerAndPhpReturnComposerScanner(final String type) {
+ MatcherAssert.assertThat(
+ String.format("Type '%s' should produce a ComposerScanner", type),
+ ScannerFactory.create(type),
+ Matchers.instanceOf(ComposerScanner.class)
+ );
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"deb", "debian"})
+ void debAndDebianReturnDebianScanner(final String type) {
+ MatcherAssert.assertThat(
+ String.format("Type '%s' should produce a DebianScanner", type),
+ ScannerFactory.create(type),
+ Matchers.instanceOf(DebianScanner.class)
+ );
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"gem", "gems"})
+ void gemAndGemsReturnGemScanner(final String type) {
+ MatcherAssert.assertThat(
+ String.format("Type '%s' should produce a GemScanner", type),
+ ScannerFactory.create(type),
+ Matchers.instanceOf(GemScanner.class)
+ );
+ }
+}
diff --git a/artipie-core/pom.xml b/artipie-core/pom.xml
index b110aca61..44d0e2a5d 100644
--- a/artipie-core/pom.xml
+++ b/artipie-core/pom.xml
@@ -6,11 +6,11 @@
com.artipieartipie
- 1.20.12
+ 1.22.0artipie-core
- 1.20.12
+ 1.22.0jar${project.basedir}/../LICENSE.header
@@ -19,7 +19,7 @@
com.artipieasto-core
- 1.20.12
+ 1.22.0compile
@@ -94,6 +94,13 @@
6.4.0.RELEASEcompile
+
+
+ org.apache.commons
+ commons-pool2
+ 2.12.0
+ compile
+ com.fasterxml.jackson.core
diff --git a/artipie-core/src/main/java/com/artipie/asto/dedup/ContentAddressableStorage.java b/artipie-core/src/main/java/com/artipie/asto/dedup/ContentAddressableStorage.java
new file mode 100644
index 000000000..3769a1a9d
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/asto/dedup/ContentAddressableStorage.java
@@ -0,0 +1,132 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.asto.dedup;
+
+import com.artipie.asto.Key;
+import com.artipie.asto.Storage;
+
+import java.util.Objects;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.atomic.AtomicLong;
+
+/**
+ * Content-addressable storage layer for deduplication.
+ * Stores blobs by SHA-256 hash with reference counting.
+ *
+ * @since 1.20.13
+ */
+public final class ContentAddressableStorage {
+
+ /**
+ * Blob reference counts: sha256 -> ref count.
+ */
+ private final ConcurrentMap refCounts;
+
+ /**
+ * Artifact-to-blob mapping: "repoName::path" -> sha256.
+ */
+ private final ConcurrentMap artifactBlobs;
+
+ /**
+ * Underlying storage for actual blob content.
+ */
+ private final Storage storage;
+
+ /**
+ * Ctor.
+ * @param storage Underlying storage for blobs
+ */
+ public ContentAddressableStorage(final Storage storage) {
+ this.storage = Objects.requireNonNull(storage, "storage");
+ this.refCounts = new ConcurrentHashMap<>();
+ this.artifactBlobs = new ConcurrentHashMap<>();
+ }
+
+ /**
+ * Save content with deduplication.
+ * If the same SHA-256 already exists, increment ref count instead of storing again.
+ *
+ * @param repoName Repository name
+ * @param path Artifact path
+ * @param sha256 SHA-256 hash of the content
+ * @param content Content bytes
+ * @return Future completing when saved
+ */
+ public CompletableFuture save(
+ final String repoName, final String path,
+ final String sha256, final byte[] content
+ ) {
+ final String artKey = artKey(repoName, path);
+ // Remove old mapping if exists
+ final String oldSha = this.artifactBlobs.put(artKey, sha256);
+ if (oldSha != null && !oldSha.equals(sha256)) {
+ this.decrementRef(oldSha);
+ }
+ // Increment ref count
+ this.refCounts.computeIfAbsent(sha256, k -> new AtomicLong(0)).incrementAndGet();
+ // Store blob if new
+ final Key blobKey = blobKey(sha256);
+ return this.storage.exists(blobKey).thenCompose(exists -> {
+ if (exists) {
+ return CompletableFuture.completedFuture(null);
+ }
+ return this.storage.save(blobKey, new com.artipie.asto.Content.From(content))
+ .toCompletableFuture();
+ });
+ }
+
+ /**
+ * Delete an artifact reference.
+ * Decrements ref count and removes blob if zero.
+ *
+ * @param repoName Repository name
+ * @param path Artifact path
+ * @return Future completing when deleted
+ */
+ public CompletableFuture delete(final String repoName, final String path) {
+ final String sha = this.artifactBlobs.remove(artKey(repoName, path));
+ if (sha == null) {
+ return CompletableFuture.completedFuture(null);
+ }
+ return this.decrementRef(sha);
+ }
+
+ /**
+ * Get the ref count for a blob.
+ * @param sha256 SHA-256 hash
+ * @return Reference count, 0 if not found
+ */
+ public long refCount(final String sha256) {
+ final AtomicLong count = this.refCounts.get(sha256);
+ return count != null ? count.get() : 0;
+ }
+
+ /**
+ * Decrement ref count, delete blob if zero.
+ */
+ private CompletableFuture decrementRef(final String sha256) {
+ final AtomicLong count = this.refCounts.get(sha256);
+ if (count != null && count.decrementAndGet() <= 0) {
+ this.refCounts.remove(sha256);
+ return this.storage.delete(blobKey(sha256)).toCompletableFuture();
+ }
+ return CompletableFuture.completedFuture(null);
+ }
+
+ private static String artKey(final String repoName, final String path) {
+ return repoName + "::" + path;
+ }
+
+ private static Key blobKey(final String sha256) {
+ return new Key.From(
+ ".cas",
+ sha256.substring(0, 2),
+ sha256.substring(2, 4),
+ sha256
+ );
+ }
+}
diff --git a/artipie-core/src/main/java/com/artipie/cache/CacheInvalidationPubSub.java b/artipie-core/src/main/java/com/artipie/cache/CacheInvalidationPubSub.java
new file mode 100644
index 000000000..2a62834a0
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/cache/CacheInvalidationPubSub.java
@@ -0,0 +1,184 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.cache;
+
+import com.artipie.asto.misc.Cleanable;
+import com.artipie.http.log.EcsLogger;
+import io.lettuce.core.pubsub.RedisPubSubAdapter;
+import io.lettuce.core.pubsub.StatefulRedisPubSubConnection;
+import io.lettuce.core.pubsub.api.async.RedisPubSubAsyncCommands;
+import java.util.Map;
+import java.util.UUID;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * Redis/Valkey pub/sub channel for cross-instance cache invalidation.
+ *
+ * When multiple Artipie instances share a Valkey/Redis server, local
+ * Caffeine caches can become stale when another instance modifies data.
+ * This class uses Redis pub/sub to broadcast invalidation messages so
+ * all instances stay in sync.
+ *
+ * Each instance generates a unique {@code instanceId} on startup.
+ * Messages published by this instance are ignored on receipt to avoid
+ * invalidating caches that were already updated locally.
+ *
+ * Message format: {@code instanceId|cacheType|key}
+ *
+ * For invalidateAll: {@code instanceId|cacheType|*}
+ *
+ * @since 1.20.13
+ */
+public final class CacheInvalidationPubSub implements AutoCloseable {
+
+ /**
+ * Redis channel name for cache invalidation messages.
+ */
+ static final String CHANNEL = "artipie:cache:invalidate";
+
+ /**
+ * Wildcard key used for invalidateAll messages.
+ */
+ private static final String ALL = "*";
+
+ /**
+ * Message field separator.
+ */
+ private static final String SEP = "|";
+
+ /**
+ * Unique instance identifier to filter out self-messages.
+ */
+ private final String instanceId;
+
+ /**
+ * Connection for subscribing (receiving messages).
+ */
+ private final StatefulRedisPubSubConnection subConn;
+
+ /**
+ * Connection for publishing (sending messages).
+ * Pub/sub spec requires separate connections for sub and pub.
+ */
+ private final StatefulRedisPubSubConnection pubConn;
+
+ /**
+ * Async publish commands.
+ */
+ private final RedisPubSubAsyncCommands pubCommands;
+
+ /**
+ * Registered cache handlers keyed by cache type name.
+ */
+ private final Map> caches;
+
+ /**
+ * Ctor.
+ * @param valkey Valkey connection to create pub/sub connections from
+ */
+ public CacheInvalidationPubSub(final ValkeyConnection valkey) {
+ this.instanceId = UUID.randomUUID().toString();
+ this.subConn = valkey.connectPubSub();
+ this.pubConn = valkey.connectPubSub();
+ this.pubCommands = this.pubConn.async();
+ this.caches = new ConcurrentHashMap<>();
+ this.subConn.addListener(new Listener());
+ this.subConn.async().subscribe(CacheInvalidationPubSub.CHANNEL);
+ EcsLogger.info("com.artipie.cache")
+ .message("Cache invalidation pub/sub started (instance: "
+ + this.instanceId.substring(0, 8) + ")")
+ .eventCategory("cache")
+ .eventAction("pubsub_start")
+ .eventOutcome("success")
+ .log();
+ }
+
+ /**
+ * Register a cache for remote invalidation.
+ * @param name Cache type name (e.g. "auth", "filters", "policy")
+ * @param cache Cache instance to invalidate on remote messages
+ */
+ public void register(final String name, final Cleanable cache) {
+ this.caches.put(name, cache);
+ }
+
+ /**
+ * Publish an invalidation message for a specific key.
+ * Other instances will call {@code cache.invalidate(key)} on receipt.
+ * @param cacheType Cache type name
+ * @param key Cache key to invalidate
+ */
+ public void publish(final String cacheType, final String key) {
+ final String msg = String.join(
+ CacheInvalidationPubSub.SEP, this.instanceId, cacheType, key
+ );
+ this.pubCommands.publish(CacheInvalidationPubSub.CHANNEL, msg);
+ }
+
+ /**
+ * Publish an invalidateAll message.
+ * Other instances will call {@code cache.invalidateAll()} on receipt.
+ * @param cacheType Cache type name
+ */
+ public void publishAll(final String cacheType) {
+ final String msg = String.join(
+ CacheInvalidationPubSub.SEP, this.instanceId, cacheType,
+ CacheInvalidationPubSub.ALL
+ );
+ this.pubCommands.publish(CacheInvalidationPubSub.CHANNEL, msg);
+ }
+
+ @Override
+ public void close() {
+ this.subConn.close();
+ this.pubConn.close();
+ EcsLogger.info("com.artipie.cache")
+ .message("Cache invalidation pub/sub closed")
+ .eventCategory("cache")
+ .eventAction("pubsub_stop")
+ .eventOutcome("success")
+ .log();
+ }
+
+ /**
+ * Listener that receives pub/sub messages and dispatches to caches.
+ */
+ private final class Listener extends RedisPubSubAdapter {
+ @Override
+ public void message(final String channel, final String message) {
+ if (!CacheInvalidationPubSub.CHANNEL.equals(channel)) {
+ return;
+ }
+ final String[] parts = message.split(
+ "\\" + CacheInvalidationPubSub.SEP, 3
+ );
+ if (parts.length < 3) {
+ return;
+ }
+ final String sender = parts[0];
+ if (CacheInvalidationPubSub.this.instanceId.equals(sender)) {
+ return;
+ }
+ final String cacheType = parts[1];
+ final String key = parts[2];
+ final Cleanable cache =
+ CacheInvalidationPubSub.this.caches.get(cacheType);
+ if (cache == null) {
+ return;
+ }
+ if (CacheInvalidationPubSub.ALL.equals(key)) {
+ cache.invalidateAll();
+ } else {
+ cache.invalidate(key);
+ }
+ EcsLogger.debug("com.artipie.cache")
+ .message("Remote cache invalidation: " + cacheType + ":" + key)
+ .eventCategory("cache")
+ .eventAction("remote_invalidate")
+ .eventOutcome("success")
+ .log();
+ }
+ }
+}
diff --git a/artipie-core/src/main/java/com/artipie/cache/PublishingCleanable.java b/artipie-core/src/main/java/com/artipie/cache/PublishingCleanable.java
new file mode 100644
index 000000000..0e1346ea8
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/cache/PublishingCleanable.java
@@ -0,0 +1,67 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.cache;
+
+import com.artipie.asto.misc.Cleanable;
+
+/**
+ * Decorator that broadcasts cache invalidation to other Artipie instances
+ * via Redis pub/sub, in addition to performing the local invalidation.
+ *
+ * When {@link #invalidate(String)} or {@link #invalidateAll()} is called,
+ * this wrapper:
+ *
+ *
Invalidates the local cache (delegates to wrapped instance)
+ *
Publishes a message to Redis so other instances invalidate too
+ *
+ *
+ * The {@link CacheInvalidationPubSub} subscriber registers the inner
+ * (unwrapped) cache, so remote messages bypass this decorator and don't
+ * re-publish — preventing infinite loops.
+ *
+ * @since 1.20.13
+ */
+public final class PublishingCleanable implements Cleanable {
+
+ /**
+ * Inner cache to delegate to.
+ */
+ private final Cleanable inner;
+
+ /**
+ * Pub/sub channel to publish invalidation messages.
+ */
+ private final CacheInvalidationPubSub pubsub;
+
+ /**
+ * Cache type name (e.g. "auth", "filters", "policy").
+ */
+ private final String cacheType;
+
+ /**
+ * Ctor.
+ * @param inner Local cache to wrap
+ * @param pubsub Redis pub/sub channel
+ * @param cacheType Cache type identifier
+ */
+ public PublishingCleanable(final Cleanable inner,
+ final CacheInvalidationPubSub pubsub, final String cacheType) {
+ this.inner = inner;
+ this.pubsub = pubsub;
+ this.cacheType = cacheType;
+ }
+
+ @Override
+ public void invalidate(final String key) {
+ this.inner.invalidate(key);
+ this.pubsub.publish(this.cacheType, key);
+ }
+
+ @Override
+ public void invalidateAll() {
+ this.inner.invalidateAll();
+ this.pubsub.publishAll(this.cacheType);
+ }
+}
diff --git a/artipie-core/src/main/java/com/artipie/cache/StoragesCache.java b/artipie-core/src/main/java/com/artipie/cache/StoragesCache.java
index 83a107055..1b6621776 100644
--- a/artipie-core/src/main/java/com/artipie/cache/StoragesCache.java
+++ b/artipie-core/src/main/java/com/artipie/cache/StoragesCache.java
@@ -18,6 +18,7 @@
import com.google.common.base.Strings;
import org.apache.commons.lang3.NotImplementedException;
import com.artipie.http.log.EcsLogger;
+import com.artipie.http.misc.DispatchedStorage;
import java.time.Duration;
@@ -117,8 +118,10 @@ public Storage storage(final YamlMapping yaml) {
// Direct storage without JfrStorage wrapper
// JFR profiling removed - adds 2-10% overhead and bypassed by optimized slices
// Request-level metrics still active via Vert.x HTTP
- return StoragesLoader.STORAGES
- .newObject(type, new Config.YamlStorageConfig(key));
+ return new DispatchedStorage(
+ StoragesLoader.STORAGES
+ .newObject(type, new Config.YamlStorageConfig(key))
+ );
}
);
diff --git a/artipie-core/src/main/java/com/artipie/cache/ValkeyConnection.java b/artipie-core/src/main/java/com/artipie/cache/ValkeyConnection.java
index 8c94c41e7..e2708fbb5 100644
--- a/artipie-core/src/main/java/com/artipie/cache/ValkeyConnection.java
+++ b/artipie-core/src/main/java/com/artipie/cache/ValkeyConnection.java
@@ -4,6 +4,7 @@
*/
package com.artipie.cache;
+import com.artipie.http.log.EcsLogger;
import io.lettuce.core.RedisClient;
import io.lettuce.core.RedisURI;
import io.lettuce.core.api.StatefulRedisConnection;
@@ -11,34 +12,70 @@
import io.lettuce.core.codec.ByteArrayCodec;
import io.lettuce.core.codec.RedisCodec;
import io.lettuce.core.codec.StringCodec;
+import io.lettuce.core.pubsub.StatefulRedisPubSubConnection;
+import io.lettuce.core.support.ConnectionPoolSupport;
import java.time.Duration;
import java.util.Objects;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.commons.pool2.impl.GenericObjectPool;
+import org.apache.commons.pool2.impl.GenericObjectPoolConfig;
/**
- * Valkey/Redis connection for L2 cache across Artipie.
- * Shared connection used by all two-tier caches.
- * Thread-safe, async operations.
+ * Valkey/Redis connection pool for L2 cache across Artipie.
+ * Uses Lettuce's built-in connection pooling backed by Apache Commons Pool2.
+ * Thread-safe, async operations with round-robin connection selection.
*
* @since 1.0
*/
public final class ValkeyConnection implements AutoCloseable {
+ /**
+ * Default maximum total connections in the pool.
+ */
+ private static final int DEFAULT_MAX_TOTAL = 8;
+
+ /**
+ * Default maximum idle connections.
+ */
+ private static final int DEFAULT_MAX_IDLE = 4;
+
+ /**
+ * Default minimum idle connections.
+ */
+ private static final int DEFAULT_MIN_IDLE = 2;
+
/**
* Redis client.
*/
private final RedisClient client;
/**
- * Stateful connection.
+ * Connection pool.
+ */
+ private final GenericObjectPool> pool;
+
+ /**
+ * Pre-borrowed connections for round-robin async access.
+ * These connections stay borrowed for the lifetime of ValkeyConnection.
*/
- private final StatefulRedisConnection connection;
+ private final StatefulRedisConnection[] connections;
/**
- * Async commands interface.
+ * Async command interfaces corresponding to each connection.
*/
- private final RedisAsyncCommands async;
+ private final RedisAsyncCommands[] asyncCommands;
+
+ /**
+ * Round-robin index for connection selection.
+ */
+ private final AtomicInteger index;
+
+ /**
+ * Number of active (pre-borrowed) connections.
+ */
+ private final int poolSize;
/**
* Constructor from configuration.
@@ -54,7 +91,7 @@ public ValkeyConnection(final CacheConfig config) {
}
/**
- * Constructor with explicit parameters.
+ * Constructor with explicit parameters and default pool size.
*
* @param host Valkey/Redis host
* @param port Valkey/Redis port
@@ -64,6 +101,24 @@ public ValkeyConnection(
final String host,
final int port,
final Duration timeout
+ ) {
+ this(host, port, timeout, ValkeyConnection.DEFAULT_MAX_TOTAL);
+ }
+
+ /**
+ * Constructor with explicit parameters and custom pool size.
+ *
+ * @param host Valkey/Redis host
+ * @param port Valkey/Redis port
+ * @param timeout Request timeout
+ * @param size Number of connections in the pool
+ */
+ @SuppressWarnings("unchecked")
+ public ValkeyConnection(
+ final String host,
+ final int port,
+ final Duration timeout,
+ final int size
) {
this.client = RedisClient.create(
RedisURI.builder()
@@ -72,24 +127,37 @@ public ValkeyConnection(
.withTimeout(timeout)
.build()
);
- // Use String keys and byte[] values
final RedisCodec codec = RedisCodec.of(
StringCodec.UTF8,
ByteArrayCodec.INSTANCE
);
- this.connection = this.client.connect(codec);
- this.async = this.connection.async();
- // Enable pipelining for better throughput
- this.async.setAutoFlushCommands(true);
+ final GenericObjectPoolConfig> config =
+ new GenericObjectPoolConfig<>();
+ config.setMaxTotal(Math.max(size, ValkeyConnection.DEFAULT_MIN_IDLE));
+ config.setMaxIdle(Math.min(ValkeyConnection.DEFAULT_MAX_IDLE, size));
+ config.setMinIdle(ValkeyConnection.DEFAULT_MIN_IDLE);
+ config.setTestOnBorrow(true);
+ this.pool = ConnectionPoolSupport.createGenericObjectPool(
+ () -> this.client.connect(codec),
+ config
+ );
+ this.poolSize = Math.max(size, ValkeyConnection.DEFAULT_MIN_IDLE);
+ this.connections = new StatefulRedisConnection[this.poolSize];
+ this.asyncCommands = new RedisAsyncCommands[this.poolSize];
+ this.index = new AtomicInteger(0);
+ this.initConnections();
}
/**
* Get async commands interface.
+ * Returns commands from a pool connection using round-robin selection,
+ * distributing load across multiple connections.
*
* @return Redis async commands
*/
public RedisAsyncCommands async() {
- return this.async;
+ final int idx = Math.abs(this.index.getAndIncrement() % this.poolSize);
+ return this.asyncCommands[idx];
}
/**
@@ -102,12 +170,12 @@ public RedisAsyncCommands async() {
@Deprecated
public boolean ping() {
try {
- return "PONG".equals(this.async.ping().get());
- } catch (Exception e) {
+ return "PONG".equals(this.async().ping().get());
+ } catch (final Exception ex) {
return false;
}
}
-
+
/**
* Async ping to check connectivity (non-blocking).
* Preferred over blocking ping() method.
@@ -115,16 +183,71 @@ public boolean ping() {
* @return Future with true if connected, false on timeout or error
*/
public CompletableFuture pingAsync() {
- return this.async.ping()
+ return this.async().ping()
.toCompletableFuture()
.orTimeout(1000, TimeUnit.MILLISECONDS)
.thenApply(pong -> "PONG".equals(pong))
.exceptionally(err -> false);
}
+ /**
+ * Returns the number of connections in the pool.
+ *
+ * @return Pool size
+ */
+ public int poolSize() {
+ return this.poolSize;
+ }
+
+ /**
+ * Create a new pub/sub connection for subscribe/publish operations.
+ * Uses String codec for both keys and values (pub/sub channels are text).
+ *
+ * The caller is responsible for closing the returned connection.
+ *
+ * @return New pub/sub connection
+ * @since 1.20.13
+ */
+ public StatefulRedisPubSubConnection connectPubSub() {
+ return this.client.connectPubSub();
+ }
+
@Override
public void close() {
- this.connection.close();
+ for (int idx = 0; idx < this.poolSize; idx += 1) {
+ if (this.connections[idx] != null) {
+ try {
+ this.pool.returnObject(this.connections[idx]);
+ } catch (final Exception ex) {
+ EcsLogger.debug("com.artipie.cache")
+ .message("Failed to return connection to pool during close")
+ .error(ex)
+ .log();
+ }
+ }
+ }
+ this.pool.close();
this.client.shutdown();
}
+
+ /**
+ * Pre-borrow connections from the pool and set up async command interfaces.
+ */
+ private void initConnections() {
+ for (int idx = 0; idx < this.poolSize; idx += 1) {
+ try {
+ this.connections[idx] = this.pool.borrowObject();
+ this.asyncCommands[idx] = this.connections[idx].async();
+ this.asyncCommands[idx].setAutoFlushCommands(true);
+ } catch (final Exception ex) {
+ throw new IllegalStateException(
+ String.format(
+ "Failed to initialize connection %d of %d in Valkey pool",
+ idx + 1, this.poolSize
+ ),
+ ex
+ );
+ }
+ }
+ }
}
diff --git a/artipie-core/src/main/java/com/artipie/cluster/ClusterEventBus.java b/artipie-core/src/main/java/com/artipie/cluster/ClusterEventBus.java
new file mode 100644
index 000000000..783be1cf7
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/cluster/ClusterEventBus.java
@@ -0,0 +1,237 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.cluster;
+
+import com.artipie.cache.ValkeyConnection;
+import com.artipie.http.log.EcsLogger;
+import io.lettuce.core.pubsub.RedisPubSubAdapter;
+import io.lettuce.core.pubsub.StatefulRedisPubSubConnection;
+import io.lettuce.core.pubsub.api.async.RedisPubSubAsyncCommands;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.function.Consumer;
+
+/**
+ * Cross-instance event bus using Valkey pub/sub.
+ * Broadcasts events to all connected Artipie instances for HA clustering.
+ *
+ * Events are published as strings on Valkey channels with the naming
+ * convention {@code artipie:events:{topic}}. Each instance subscribes
+ * to channels of interest and dispatches received messages to all
+ * registered handlers for that topic.
+ *
+ * Each instance generates a unique identifier on startup. Messages
+ * published by the local instance are ignored on receipt to avoid
+ * double-processing events that were already handled locally.
+ *
+ * Message format on the wire: {@code instanceId|payload}
+ *
+ * Thread safety: this class is thread-safe. Handler lists use
+ * {@link CopyOnWriteArrayList} and topic subscriptions use
+ * {@link ConcurrentHashMap}.
+ *
+ * @since 1.20.13
+ */
+public final class ClusterEventBus implements AutoCloseable {
+
+ /**
+ * Channel prefix for all event bus topics.
+ */
+ static final String CHANNEL_PREFIX = "artipie:events:";
+
+ /**
+ * Message field separator between instance ID and payload.
+ */
+ private static final String SEP = "|";
+
+ /**
+ * Unique instance identifier to filter out self-published messages.
+ */
+ private final String instanceId;
+
+ /**
+ * Connection for subscribing (receiving messages).
+ */
+ private final StatefulRedisPubSubConnection subConn;
+
+ /**
+ * Connection for publishing (sending messages).
+ * Pub/sub spec requires separate connections for subscribe and publish.
+ */
+ private final StatefulRedisPubSubConnection pubConn;
+
+ /**
+ * Async publish commands.
+ */
+ private final RedisPubSubAsyncCommands pubCommands;
+
+ /**
+ * Registered handlers keyed by topic name.
+ * Each topic can have multiple handlers.
+ */
+ private final Map>> handlers;
+
+ /**
+ * Constructor. Sets up pub/sub connections and the message listener.
+ *
+ * @param valkey Valkey connection to create pub/sub connections from
+ */
+ public ClusterEventBus(final ValkeyConnection valkey) {
+ this.instanceId = UUID.randomUUID().toString();
+ this.subConn = valkey.connectPubSub();
+ this.pubConn = valkey.connectPubSub();
+ this.pubCommands = this.pubConn.async();
+ this.handlers = new ConcurrentHashMap<>();
+ this.subConn.addListener(new Dispatcher());
+ EcsLogger.info("com.artipie.cluster")
+ .message(
+ "Cluster event bus started (instance: "
+ + this.instanceId.substring(0, 8) + ")"
+ )
+ .eventCategory("cluster")
+ .eventAction("eventbus_start")
+ .eventOutcome("success")
+ .log();
+ }
+
+ /**
+ * Publish an event to a topic.
+ * The event will be broadcast to all Artipie instances subscribed
+ * to this topic. The publishing instance will ignore its own message.
+ *
+ * @param topic Topic name (e.g. "config.change", "repo.update")
+ * @param payload Event payload (typically JSON)
+ */
+ public void publish(final String topic, final String payload) {
+ final String channel = ClusterEventBus.CHANNEL_PREFIX + topic;
+ final String message = String.join(
+ ClusterEventBus.SEP, this.instanceId, payload
+ );
+ this.pubCommands.publish(channel, message);
+ EcsLogger.debug("com.artipie.cluster")
+ .message("Event published: " + topic)
+ .eventCategory("cluster")
+ .eventAction("event_publish")
+ .field("cluster.topic", topic)
+ .eventOutcome("success")
+ .log();
+ }
+
+ /**
+ * Subscribe a handler to a topic.
+ * The handler will be called with the event payload whenever a
+ * remote instance publishes to this topic. If this is the first
+ * handler for the topic, the Valkey channel subscription is created.
+ *
+ * @param topic Topic name (e.g. "config.change", "repo.update")
+ * @param handler Consumer that receives the event payload
+ */
+ public void subscribe(final String topic, final Consumer handler) {
+ final String channel = ClusterEventBus.CHANNEL_PREFIX + topic;
+ final boolean firstHandler = !this.handlers.containsKey(topic);
+ this.handlers
+ .computeIfAbsent(topic, key -> new CopyOnWriteArrayList<>())
+ .add(handler);
+ if (firstHandler) {
+ this.subConn.async().subscribe(channel);
+ EcsLogger.debug("com.artipie.cluster")
+ .message("Subscribed to topic: " + topic)
+ .eventCategory("cluster")
+ .eventAction("topic_subscribe")
+ .field("cluster.topic", topic)
+ .eventOutcome("success")
+ .log();
+ }
+ }
+
+ /**
+ * Returns the unique instance identifier for this event bus.
+ *
+ * @return Instance ID string
+ */
+ public String instanceId() {
+ return this.instanceId;
+ }
+
+ /**
+ * Returns the number of topics with active subscriptions.
+ *
+ * @return Number of subscribed topics
+ */
+ public int topicCount() {
+ return this.handlers.size();
+ }
+
+ @Override
+ public void close() {
+ this.subConn.close();
+ this.pubConn.close();
+ EcsLogger.info("com.artipie.cluster")
+ .message("Cluster event bus closed")
+ .eventCategory("cluster")
+ .eventAction("eventbus_stop")
+ .eventOutcome("success")
+ .log();
+ }
+
+ /**
+ * Listener that receives Valkey pub/sub messages and dispatches
+ * them to registered topic handlers.
+ */
+ private final class Dispatcher extends RedisPubSubAdapter {
+ @Override
+ public void message(final String channel, final String message) {
+ if (!channel.startsWith(ClusterEventBus.CHANNEL_PREFIX)) {
+ return;
+ }
+ final int sep = message.indexOf(ClusterEventBus.SEP);
+ if (sep < 0) {
+ return;
+ }
+ final String sender = message.substring(0, sep);
+ if (ClusterEventBus.this.instanceId.equals(sender)) {
+ return;
+ }
+ final String payload = message.substring(sep + 1);
+ final String topic = channel.substring(
+ ClusterEventBus.CHANNEL_PREFIX.length()
+ );
+ final List> topicHandlers =
+ ClusterEventBus.this.handlers.get(topic);
+ if (topicHandlers == null || topicHandlers.isEmpty()) {
+ return;
+ }
+ for (final Consumer handler : topicHandlers) {
+ try {
+ handler.accept(payload);
+ } catch (final Exception ex) {
+ EcsLogger.error("com.artipie.cluster")
+ .message(
+ "Event handler failed for topic: " + topic
+ )
+ .error(ex)
+ .eventCategory("cluster")
+ .eventAction("event_dispatch")
+ .field("cluster.topic", topic)
+ .eventOutcome("failure")
+ .log();
+ }
+ }
+ EcsLogger.debug("com.artipie.cluster")
+ .message(
+ "Event dispatched: " + topic + " to "
+ + topicHandlers.size() + " handler(s)"
+ )
+ .eventCategory("cluster")
+ .eventAction("event_dispatch")
+ .field("cluster.topic", topic)
+ .eventOutcome("success")
+ .log();
+ }
+ }
+}
diff --git a/artipie-core/src/main/java/com/artipie/cooldown/CooldownCache.java b/artipie-core/src/main/java/com/artipie/cooldown/CooldownCache.java
index 5676568f5..62f957fc1 100644
--- a/artipie-core/src/main/java/com/artipie/cooldown/CooldownCache.java
+++ b/artipie-core/src/main/java/com/artipie/cooldown/CooldownCache.java
@@ -9,6 +9,8 @@
import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.Expiry;
import io.lettuce.core.RedisFuture;
+import io.lettuce.core.ScanArgs;
+import io.lettuce.core.ScanCursor;
import io.lettuce.core.api.async.RedisAsyncCommands;
import java.nio.ByteBuffer;
import java.time.Duration;
@@ -366,14 +368,7 @@ public void unblockAll(final String repoName) {
// L2: Pattern update (SCAN is expensive but unblockAll is rare)
if (this.twoTier) {
final String pattern = prefix + "*";
- this.l2.keys(pattern).thenAccept(keys -> {
- if (keys != null && !keys.isEmpty()) {
- for (final String key : keys) {
- // Set each key to false with configured TTL
- this.l2.setex(key, this.l2AllowedTtlSeconds, "false".getBytes());
- }
- }
- });
+ this.scanAndUpdate(pattern);
}
}
@@ -443,4 +438,30 @@ private void putL2Boolean(final String key, final boolean blocked, final long tt
this.l2.setex(key, ttlSeconds, value);
}
+ /**
+ * Scan and update keys matching pattern using cursor-based SCAN.
+ * Sets each matched key to "false" (allowed) with configured TTL.
+ * Avoids blocking KEYS command that freezes Redis on large datasets.
+ * @param pattern Redis key pattern (glob-style)
+ */
+ private CompletableFuture scanAndUpdate(final String pattern) {
+ return this.scanAndUpdateStep(ScanCursor.INITIAL, pattern);
+ }
+
+ private CompletableFuture scanAndUpdateStep(
+ final ScanCursor cursor, final String pattern
+ ) {
+ return this.l2.scan(cursor, ScanArgs.Builder.matches(pattern).limit(100))
+ .toCompletableFuture()
+ .thenCompose(result -> {
+ for (final String key : result.getKeys()) {
+ this.l2.setex(key, this.l2AllowedTtlSeconds, "false".getBytes());
+ }
+ if (result.isFinished()) {
+ return CompletableFuture.completedFuture(null);
+ }
+ return this.scanAndUpdateStep(result, pattern);
+ });
+ }
+
}
diff --git a/artipie-core/src/main/java/com/artipie/cooldown/CooldownSettings.java b/artipie-core/src/main/java/com/artipie/cooldown/CooldownSettings.java
index 7e242ebdc..2f9586324 100644
--- a/artipie-core/src/main/java/com/artipie/cooldown/CooldownSettings.java
+++ b/artipie-core/src/main/java/com/artipie/cooldown/CooldownSettings.java
@@ -8,7 +8,6 @@
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
-import java.util.Optional;
/**
* Global and per-repo-type cooldown configuration.
@@ -23,21 +22,21 @@ public final class CooldownSettings {
/**
* Whether cooldown logic is enabled globally.
*/
- private final boolean enabled;
+ private volatile boolean enabled;
/**
* Minimum allowed age for an artifact release. If an artifact's release time
* is within this window (i.e. too fresh), it will be blocked until it reaches
* the minimum allowed age.
*/
- private final Duration minimumAllowedAge;
+ private volatile Duration minimumAllowedAge;
/**
* Per-repo-type overrides.
* Key: repository type (maven, npm, docker, etc.)
* Value: RepoTypeConfig with enabled flag and minimum age
*/
- private final Map repoTypeOverrides;
+ private volatile Map repoTypeOverrides;
/**
* Ctor with global settings only.
@@ -108,6 +107,29 @@ public Duration minimumAllowedAgeFor(final String repoType) {
return override != null ? override.minimumAllowedAge() : this.minimumAllowedAge;
}
+ /**
+ * Get a copy of per-repo-type overrides.
+ *
+ * @return Map of repo type to config
+ */
+ public Map repoTypeOverrides() {
+ return new HashMap<>(this.repoTypeOverrides);
+ }
+
+ /**
+ * Update cooldown settings in-place for hot reload.
+ *
+ * @param newEnabled Whether cooldown is enabled
+ * @param newMinAge New global minimum allowed age
+ * @param overrides New per-repo-type overrides
+ */
+ public void update(final boolean newEnabled, final Duration newMinAge,
+ final Map overrides) {
+ this.enabled = newEnabled;
+ this.minimumAllowedAge = Objects.requireNonNull(newMinAge);
+ this.repoTypeOverrides = new HashMap<>(Objects.requireNonNull(overrides));
+ }
+
/**
* Creates default configuration (enabled, 72 hours minimum allowed age).
*
diff --git a/artipie-core/src/main/java/com/artipie/cooldown/metadata/CooldownMetadataServiceImpl.java b/artipie-core/src/main/java/com/artipie/cooldown/metadata/CooldownMetadataServiceImpl.java
index c30e200e7..64f7fd938 100644
--- a/artipie-core/src/main/java/com/artipie/cooldown/metadata/CooldownMetadataServiceImpl.java
+++ b/artipie-core/src/main/java/com/artipie/cooldown/metadata/CooldownMetadataServiceImpl.java
@@ -294,13 +294,13 @@ private CompletableFuture computeFilteredM
}
EcsLogger.debug("com.artipie.cooldown.metadata")
- .message("Evaluating cooldown for versions")
+ .message(String.format(
+ "Evaluating cooldown for versions: %d total, %d to evaluate",
+ allVersions.size(), versionsToEvaluate.size()))
.eventCategory("cooldown")
.eventAction("metadata_filter")
.field("repository.type", repoType)
.field("package.name", packageName)
- .field("versions.total", allVersions.size())
- .field("versions.evaluating", versionsToEvaluate.size())
.log();
return new FilterContext<>(
@@ -349,12 +349,12 @@ private CompletableFuture evaluateAndFilte
}
EcsLogger.debug("com.artipie.cooldown.metadata")
- .message("Cooldown evaluation complete")
+ .message(String.format(
+ "Cooldown evaluation complete: %d versions blocked", blockedVersions.size()))
.eventCategory("cooldown")
.eventAction("metadata_filter")
.field("repository.type", ctx.repoType)
.field("package.name", ctx.packageName)
- .field("versions.blocked", blockedVersions.size())
.log();
// Note: Blocked versions gauge is updated by JdbcCooldownService on block/unblock
@@ -383,12 +383,12 @@ private CompletableFuture evaluateAndFilte
if (newLatest.isPresent()) {
filtered = ctx.filter.updateLatest(filtered, newLatest.get());
EcsLogger.debug("com.artipie.cooldown.metadata")
- .message("Updated latest version (by release date)")
+ .message(String.format(
+ "Updated latest version (by release date): %s -> %s",
+ currentLatest.get(), newLatest.get()))
.eventCategory("cooldown")
.eventAction("metadata_filter")
.field("package.name", ctx.packageName)
- .field("latest.old", currentLatest.get())
- .field("latest.new", newLatest.get())
.log();
}
}
@@ -399,15 +399,15 @@ private CompletableFuture evaluateAndFilte
// Log performance
final long durationMs = (System.nanoTime() - ctx.startTime) / 1_000_000;
EcsLogger.info("com.artipie.cooldown.metadata")
- .message("Metadata filtering complete")
+ .message(String.format(
+ "Metadata filtering complete: %d total versions, %d blocked",
+ ctx.allVersions.size(), blockedVersions.size()))
.eventCategory("cooldown")
.eventAction("metadata_filter")
.eventOutcome("success")
.field("repository.type", ctx.repoType)
.field("package.name", ctx.packageName)
- .field("versions.total", ctx.allVersions.size())
- .field("versions.blocked", blockedVersions.size())
- .field("duration_ms", durationMs)
+ .field("event.duration", durationMs * 1_000_000L)
.log();
// Record metrics via CooldownMetrics
@@ -501,10 +501,10 @@ private void preloadReleaseDates(
if (!releaseDates.isEmpty()) {
((MetadataAwareInspector) inspector).preloadReleaseDates(releaseDates);
EcsLogger.debug("com.artipie.cooldown.metadata")
- .message("Preloaded release dates from metadata")
+ .message(String.format(
+ "Preloaded %d release dates from metadata", releaseDates.size()))
.eventCategory("cooldown")
.eventAction("metadata_filter")
- .field("dates.count", releaseDates.size())
.log();
}
}
diff --git a/artipie-core/src/main/java/com/artipie/http/ResponseBuilder.java b/artipie-core/src/main/java/com/artipie/http/ResponseBuilder.java
index 1aad820ad..dcd024617 100644
--- a/artipie-core/src/main/java/com/artipie/http/ResponseBuilder.java
+++ b/artipie-core/src/main/java/com/artipie/http/ResponseBuilder.java
@@ -100,6 +100,10 @@ public static ResponseBuilder rangeNotSatisfiable() {
return new ResponseBuilder(RsStatus.REQUESTED_RANGE_NOT_SATISFIABLE);
}
+ public static ResponseBuilder badGateway() {
+ return new ResponseBuilder(RsStatus.BAD_GATEWAY);
+ }
+
public static ResponseBuilder gatewayTimeout() {
return new ResponseBuilder(RsStatus.GATEWAY_TIMEOUT);
}
@@ -245,6 +249,7 @@ public Response build() {
case TOO_MANY_REQUESTS -> RSP_TOO_MANY_REQUESTS;
case INTERNAL_ERROR -> RSP_INTERNAL_ERROR;
case NOT_IMPLEMENTED -> RSP_NOT_IMPLEMENTED;
+ case BAD_GATEWAY -> RSP_BAD_GATEWAY;
case SERVICE_UNAVAILABLE -> RSP_SERVICE_UNAVAILABLE;
case PARTIAL_CONTENT -> RSP_PARTIAL_CONTENT;
case GATEWAY_TIMEOUT -> RSP_GATEWAY_TIMEOUT;
@@ -284,6 +289,7 @@ public CompletableFuture completedFuture() {
private final static Response RSP_TOO_MANY_REQUESTS = new Response(RsStatus.TOO_MANY_REQUESTS, Headers.EMPTY, Content.EMPTY);
private final static Response RSP_INTERNAL_ERROR = new Response(RsStatus.INTERNAL_ERROR, Headers.EMPTY, Content.EMPTY);
private final static Response RSP_NOT_IMPLEMENTED = new Response(RsStatus.NOT_IMPLEMENTED, Headers.EMPTY, Content.EMPTY);
+ private final static Response RSP_BAD_GATEWAY = new Response(RsStatus.BAD_GATEWAY, Headers.EMPTY, Content.EMPTY);
private final static Response RSP_SERVICE_UNAVAILABLE = new Response(RsStatus.SERVICE_UNAVAILABLE, Headers.EMPTY, Content.EMPTY);
private final static Response RSP_PARTIAL_CONTENT = new Response(RsStatus.PARTIAL_CONTENT, Headers.EMPTY, Content.EMPTY);
private final static Response RSP_GATEWAY_TIMEOUT = new Response(RsStatus.GATEWAY_TIMEOUT, Headers.EMPTY, Content.EMPTY);
diff --git a/artipie-core/src/main/java/com/artipie/http/RsStatus.java b/artipie-core/src/main/java/com/artipie/http/RsStatus.java
index edfff24e5..e27105dc8 100644
--- a/artipie-core/src/main/java/com/artipie/http/RsStatus.java
+++ b/artipie-core/src/main/java/com/artipie/http/RsStatus.java
@@ -120,6 +120,10 @@ public enum RsStatus {
* Not Implemented.
*/
NOT_IMPLEMENTED(HttpStatus.SC_NOT_IMPLEMENTED),
+ /**
+ * Bad Gateway (502).
+ */
+ BAD_GATEWAY(HttpStatus.SC_BAD_GATEWAY),
/**
* Service Unavailable.
*/
diff --git a/artipie-core/src/main/java/com/artipie/http/auth/AuthzSlice.java b/artipie-core/src/main/java/com/artipie/http/auth/AuthzSlice.java
index 775761d31..e0aaf3424 100644
--- a/artipie-core/src/main/java/com/artipie/http/auth/AuthzSlice.java
+++ b/artipie-core/src/main/java/com/artipie/http/auth/AuthzSlice.java
@@ -11,6 +11,7 @@
import com.artipie.http.Slice;
import com.artipie.http.headers.Header;
import com.artipie.http.headers.WwwAuthenticate;
+import com.artipie.http.log.EcsLogger;
import com.artipie.http.rq.RequestLine;
import org.slf4j.MDC;
@@ -87,8 +88,11 @@ public CompletableFuture response(
.header(new WwwAuthenticate(challenge))
.completedFuture();
}
- } catch (final UnsupportedOperationException ignored) {
- // fall through when scheme does not provide challenge
+ } catch (final UnsupportedOperationException ex) {
+ EcsLogger.debug("com.artipie.http.auth")
+ .message("Auth scheme does not provide challenge")
+ .error(ex)
+ .log();
}
if (this.control.allowed(result.user())) {
return this.origin.response(
diff --git a/artipie-core/src/main/java/com/artipie/http/auth/CombinedAuthzSlice.java b/artipie-core/src/main/java/com/artipie/http/auth/CombinedAuthzSlice.java
index 2a1d55b40..060dbe095 100644
--- a/artipie-core/src/main/java/com/artipie/http/auth/CombinedAuthzSlice.java
+++ b/artipie-core/src/main/java/com/artipie/http/auth/CombinedAuthzSlice.java
@@ -11,6 +11,7 @@
import com.artipie.http.headers.Authorization;
import com.artipie.http.headers.Header;
import com.artipie.http.headers.WwwAuthenticate;
+import com.artipie.http.log.EcsLogger;
import com.artipie.http.rq.RequestLine;
import com.artipie.http.rq.RqHeaders;
import com.artipie.http.trace.TraceContextExecutor;
@@ -137,8 +138,11 @@ public CompletableFuture response(
.header(new WwwAuthenticate(challenge))
.completedFuture();
}
- } catch (final UnsupportedOperationException ignored) {
- // fall through when scheme does not provide challenge
+ } catch (final UnsupportedOperationException ex) {
+ EcsLogger.debug("com.artipie.http.auth")
+ .message("Auth scheme does not provide challenge")
+ .error(ex)
+ .log();
}
if (this.control.allowed(result.user())) {
return this.origin.response(
diff --git a/artipie-core/src/main/java/com/artipie/http/cache/BaseCachedProxySlice.java b/artipie-core/src/main/java/com/artipie/http/cache/BaseCachedProxySlice.java
new file mode 100644
index 000000000..c6e5e28b9
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/http/cache/BaseCachedProxySlice.java
@@ -0,0 +1,999 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.http.cache;
+
+import com.artipie.asto.Content;
+import com.artipie.asto.Key;
+import com.artipie.asto.Storage;
+import com.artipie.asto.cache.Cache;
+import com.artipie.asto.cache.CacheControl;
+import com.artipie.asto.cache.Remote;
+import com.artipie.cooldown.CooldownInspector;
+import com.artipie.cooldown.CooldownRequest;
+import com.artipie.cooldown.CooldownResponses;
+import com.artipie.cooldown.CooldownResult;
+import com.artipie.cooldown.CooldownService;
+import com.artipie.http.Headers;
+import com.artipie.http.Response;
+import com.artipie.http.ResponseBuilder;
+import com.artipie.http.RsStatus;
+import com.artipie.http.Slice;
+import com.artipie.http.headers.Header;
+import com.artipie.http.headers.Login;
+import com.artipie.http.log.EcsLogger;
+import com.artipie.http.rq.RequestLine;
+import com.artipie.http.slice.KeyFromPath;
+import com.artipie.scheduling.ProxyArtifactEvent;
+
+import io.reactivex.Flowable;
+import java.io.IOException;
+import java.net.ConnectException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.security.MessageDigest;
+import java.time.Instant;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeParseException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Queue;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+
+/**
+ * Abstract base class for all proxy adapter cache slices.
+ *
+ *
Implements the shared proxy flow via template method pattern:
+ *
+ *
Check negative cache - fast-fail on known 404s
+ *
Check local cache (offline-safe) - serve if fresh hit
+ *
Evaluate cooldown - block if in cooldown period
+ *
Deduplicate concurrent requests for same path
+ *
Fetch from upstream
+ *
On 200: cache content, compute digests, generate sidecars, enqueue event
+ *
On 404: update negative cache
+ *
Record metrics
+ *
+ *
+ *
Adapters override only the hooks they need:
+ * {@link #isCacheable(String)}, {@link #buildCooldownRequest(String, Headers)},
+ * {@link #digestAlgorithms()}, {@link #buildArtifactEvent(Key, Headers, long, String)},
+ * {@link #postProcess(Response, RequestLine)}, {@link #generateSidecars(String, Map)}.
+ *
+ * @since 1.20.13
+ */
+@SuppressWarnings({"PMD.GodClass", "PMD.ExcessiveImports"})
+public abstract class BaseCachedProxySlice implements Slice {
+
+ /**
+ * Upstream remote slice.
+ */
+ private final Slice client;
+
+ /**
+ * Asto cache for artifact storage.
+ */
+ private final Cache cache;
+
+ /**
+ * Repository name.
+ */
+ private final String repoName;
+
+ /**
+ * Repository type (e.g., "maven", "npm", "pypi").
+ */
+ private final String repoType;
+
+ /**
+ * Upstream base URL for metrics.
+ */
+ private final String upstreamUrl;
+
+ /**
+ * Optional local storage for metadata and sidecars.
+ */
+ private final Optional metadataStore;
+
+ /**
+ * Whether cache is backed by persistent storage.
+ */
+ private final boolean storageBacked;
+
+ /**
+ * Event queue for proxy artifact events.
+ */
+ private final Optional> events;
+
+ /**
+ * Unified proxy configuration.
+ */
+ private final ProxyCacheConfig config;
+
+ /**
+ * Negative cache for 404 responses.
+ */
+ private final NegativeCache negativeCache;
+
+ /**
+ * Cooldown service (null if cooldown disabled).
+ */
+ private final CooldownService cooldownService;
+
+ /**
+ * Cooldown inspector (null if cooldown disabled).
+ */
+ private final CooldownInspector cooldownInspector;
+
+ /**
+ * Request deduplicator.
+ */
+ private final RequestDeduplicator deduplicator;
+
+ /**
+ * Raw storage for direct saves (bypasses FromStorageCache lazy tee-content).
+ */
+ private final Optional storage;
+
+ /**
+ * Constructor.
+ *
+ * @param client Upstream remote slice
+ * @param cache Asto cache for artifact storage
+ * @param repoName Repository name
+ * @param repoType Repository type
+ * @param upstreamUrl Upstream base URL
+ * @param storage Optional local storage
+ * @param events Event queue for proxy artifacts
+ * @param config Unified proxy configuration
+ * @param cooldownService Cooldown service (nullable, required if cooldown enabled)
+ * @param cooldownInspector Cooldown inspector (nullable, required if cooldown enabled)
+ */
+ @SuppressWarnings("PMD.ExcessiveParameterList")
+ protected BaseCachedProxySlice(
+ final Slice client,
+ final Cache cache,
+ final String repoName,
+ final String repoType,
+ final String upstreamUrl,
+ final Optional storage,
+ final Optional> events,
+ final ProxyCacheConfig config,
+ final CooldownService cooldownService,
+ final CooldownInspector cooldownInspector
+ ) {
+ this.client = Objects.requireNonNull(client, "client");
+ this.cache = Objects.requireNonNull(cache, "cache");
+ this.repoName = Objects.requireNonNull(repoName, "repoName");
+ this.repoType = Objects.requireNonNull(repoType, "repoType");
+ this.upstreamUrl = Objects.requireNonNull(upstreamUrl, "upstreamUrl");
+ this.events = Objects.requireNonNull(events, "events");
+ this.config = Objects.requireNonNull(config, "config");
+ this.storage = storage;
+ this.metadataStore = storage.map(CachedArtifactMetadataStore::new);
+ this.storageBacked = this.metadataStore.isPresent()
+ && !Objects.equals(this.cache, Cache.NOP);
+ this.negativeCache = config.negativeCacheEnabled()
+ ? new NegativeCache(repoType, repoName) : null;
+ this.cooldownService = cooldownService;
+ this.cooldownInspector = cooldownInspector;
+ this.deduplicator = new RequestDeduplicator(config.dedupStrategy());
+ }
+
+ /**
+ * Convenience constructor without cooldown (for adapters that don't use it).
+ */
+ @SuppressWarnings("PMD.ExcessiveParameterList")
+ protected BaseCachedProxySlice(
+ final Slice client,
+ final Cache cache,
+ final String repoName,
+ final String repoType,
+ final String upstreamUrl,
+ final Optional storage,
+ final Optional> events,
+ final ProxyCacheConfig config
+ ) {
+ this(client, cache, repoName, repoType, upstreamUrl,
+ storage, events, config, null, null);
+ }
+
+ @Override
+ public final CompletableFuture response(
+ final RequestLine line, final Headers headers, final Content body
+ ) {
+ final String path = line.uri().getPath();
+ if ("/".equals(path) || path.isEmpty()) {
+ return this.handleRootPath(line);
+ }
+ final Key key = new KeyFromPath(path);
+ // Step 1: Negative cache fast-fail
+ if (this.negativeCache != null && this.negativeCache.isNotFound(key)) {
+ this.logDebug("Negative cache hit", path);
+ return CompletableFuture.completedFuture(ResponseBuilder.notFound().build());
+ }
+ // Step 2: Pre-process hook (adapter-specific short-circuit)
+ final Optional> pre =
+ this.preProcess(line, headers, key, path);
+ if (pre.isPresent()) {
+ return pre.get();
+ }
+ // Step 3: Check if path is cacheable at all
+ if (!this.isCacheable(path)) {
+ return this.fetchDirect(line, key, new Login(headers).getValue());
+ }
+ // Step 4: Cache-first (offline-safe) — check cache before any network calls
+ if (this.storageBacked) {
+ return this.cacheFirstFlow(line, headers, key, path);
+ }
+ // No persistent storage — go directly to upstream
+ return this.fetchDirect(line, key, new Login(headers).getValue());
+ }
+
+ // ===== Abstract hooks — adapters override these =====
+
+ /**
+ * Determine if a request path is cacheable.
+ * @param path Request path (e.g., "/com/example/foo/1.0/foo-1.0.jar")
+ * @return True if this path should be cached
+ */
+ protected abstract boolean isCacheable(String path);
+
+ // ===== Overridable hooks with defaults =====
+
+ /**
+ * Build a cooldown request from the path.
+ * Return empty to skip cooldown for this path.
+ * @param path Request path
+ * @param headers Request headers
+ * @return Cooldown request or empty
+ */
+ protected Optional buildCooldownRequest(
+ final String path, final Headers headers
+ ) {
+ return Optional.empty();
+ }
+
+ /**
+ * Return the set of digest algorithms to compute during cache streaming.
+ * Return empty set to skip digest computation.
+ * Override in adapters to enable digest computation (e.g., SHA-256, MD5).
+ * @return Set of algorithm names (e.g., "SHA-256", "MD5")
+ */
+ protected java.util.Set digestAlgorithms() {
+ return Collections.emptySet();
+ }
+
+ /**
+ * Build a proxy artifact event for the event queue.
+ * Return empty to skip event emission.
+ * @param key Artifact cache key
+ * @param responseHeaders Upstream response headers
+ * @param size Artifact size in bytes
+ * @param owner Authenticated user login
+ * @return Proxy artifact event or empty
+ */
+ protected Optional buildArtifactEvent(
+ final Key key, final Headers responseHeaders, final long size,
+ final String owner
+ ) {
+ return Optional.empty();
+ }
+
+ /**
+ * Post-process response before returning to caller.
+ * Default: identity (no transformation).
+ * @param response The response to post-process
+ * @param line Original request line
+ * @return Post-processed response
+ */
+ protected Response postProcess(final Response response, final RequestLine line) {
+ return response;
+ }
+
+ /**
+ * Generate sidecar files from computed digests.
+ * Default: empty list (no sidecars).
+ * @param path Original artifact path
+ * @param digests Computed digests map (algorithm -> hex value)
+ * @return List of sidecar files to store alongside the artifact
+ */
+ protected List generateSidecars(
+ final String path, final Map digests
+ ) {
+ return Collections.emptyList();
+ }
+
+ /**
+ * Check if path is a sidecar checksum file that should be served from cache.
+ * Default: false. Override in adapters that generate checksum sidecars.
+ * @param path Request path
+ * @return True if this is a checksum sidecar file
+ */
+ protected boolean isChecksumSidecar(final String path) {
+ return false;
+ }
+
+ /**
+ * Pre-process a request before the standard flow.
+ * If non-empty, the returned response short-circuits the standard flow.
+ * Use for adapter-specific handling (e.g., Maven metadata cache).
+ * Default: empty (use standard flow for all paths).
+ * @param line Request line
+ * @param headers Request headers
+ * @param key Cache key
+ * @param path Request path
+ * @return Optional future response to short-circuit, or empty for standard flow
+ */
+ protected Optional> preProcess(
+ final RequestLine line, final Headers headers, final Key key, final String path
+ ) {
+ return Optional.empty();
+ }
+
+ // ===== Protected accessors for subclass use =====
+
+ /**
+ * @return Repository name
+ */
+ protected final String repoName() {
+ return this.repoName;
+ }
+
+ /**
+ * @return Repository type
+ */
+ protected final String repoType() {
+ return this.repoType;
+ }
+
+ /**
+ * @return Upstream URL
+ */
+ protected final String upstreamUrl() {
+ return this.upstreamUrl;
+ }
+
+ /**
+ * @return The upstream client slice
+ */
+ protected final Slice client() {
+ return this.client;
+ }
+
+ /**
+ * @return The asto cache
+ */
+ protected final Cache cache() {
+ return this.cache;
+ }
+
+ /**
+ * @return Proxy cache config
+ */
+ protected final ProxyCacheConfig config() {
+ return this.config;
+ }
+
+ /**
+ * @return Metadata store if storage-backed
+ */
+ protected final Optional metadataStore() {
+ return this.metadataStore;
+ }
+
+ // ===== Internal flow implementation =====
+
+ /**
+ * Cache-first flow: check cache, then evaluate cooldown, then fetch.
+ */
+ private CompletableFuture cacheFirstFlow(
+ final RequestLine line,
+ final Headers headers,
+ final Key key,
+ final String path
+ ) {
+ // Checksum sidecars: serve from storage if present, else try upstream
+ if (this.isChecksumSidecar(path)) {
+ return this.serveChecksumFromStorage(line, key, new Login(headers).getValue());
+ }
+ final CachedArtifactMetadataStore store = this.metadataStore.orElseThrow();
+ return this.cache.load(key, Remote.EMPTY, CacheControl.Standard.ALWAYS)
+ .thenCompose(cached -> {
+ if (cached.isPresent()) {
+ this.logDebug("Cache hit", path);
+ // Fast path: serve from cache with async metadata
+ return store.load(key).thenApply(meta -> {
+ final ResponseBuilder builder = ResponseBuilder.ok()
+ .body(cached.get());
+ meta.ifPresent(m -> builder.headers(stripContentEncoding(m.headers())));
+ return this.postProcess(builder.build(), line);
+ });
+ }
+ // Cache miss: evaluate cooldown then fetch
+ return this.evaluateCooldownAndFetch(line, headers, key, path, store);
+ }).toCompletableFuture();
+ }
+
+ /**
+ * Evaluate cooldown, then fetch from upstream if allowed.
+ */
+ private CompletableFuture evaluateCooldownAndFetch(
+ final RequestLine line,
+ final Headers headers,
+ final Key key,
+ final String path,
+ final CachedArtifactMetadataStore store
+ ) {
+ if (this.config.cooldownEnabled()
+ && this.cooldownService != null
+ && this.cooldownInspector != null) {
+ final Optional request =
+ this.buildCooldownRequest(path, headers);
+ if (request.isPresent()) {
+ return this.cooldownService.evaluate(request.get(), this.cooldownInspector)
+ .thenCompose(result -> {
+ if (result.blocked()) {
+ return CompletableFuture.completedFuture(
+ CooldownResponses.forbidden(result.block().orElseThrow())
+ );
+ }
+ return this.fetchAndCache(line, key, headers, store);
+ });
+ }
+ }
+ return this.fetchAndCache(line, key, headers, store);
+ }
+
+ /**
+ * Fetch from upstream and cache the result, with request deduplication.
+ * Uses NIO temp file streaming to avoid buffering full artifacts on heap.
+ */
+ private CompletableFuture fetchAndCache(
+ final RequestLine line,
+ final Key key,
+ final Headers headers,
+ final CachedArtifactMetadataStore store
+ ) {
+ final String owner = new Login(headers).getValue();
+ final long startTime = System.currentTimeMillis();
+ return this.client.response(line, Headers.EMPTY, Content.EMPTY)
+ .thenCompose(resp -> {
+ final long duration = System.currentTimeMillis() - startTime;
+ if (resp.status().code() == 404) {
+ return this.handle404(resp, key, duration)
+ .thenCompose(signal ->
+ this.signalToResponse(signal, line, key, headers, store));
+ }
+ if (!resp.status().success()) {
+ return this.handleNonSuccess(resp, key, duration)
+ .thenCompose(signal ->
+ this.signalToResponse(signal, line, key, headers, store));
+ }
+ this.recordProxyMetric("success", duration);
+ return this.deduplicator.deduplicate(key, () -> {
+ return this.cacheResponse(resp, key, owner, store)
+ .thenApply(r -> RequestDeduplicator.FetchSignal.SUCCESS);
+ }).thenCompose(signal ->
+ this.signalToResponse(signal, line, key, headers, store));
+ })
+ .exceptionally(error -> {
+ final long duration = System.currentTimeMillis() - startTime;
+ this.trackUpstreamFailure(error);
+ this.recordProxyMetric("exception", duration);
+ EcsLogger.warn("com.artipie." + this.repoType)
+ .message("Upstream request failed with exception")
+ .eventCategory("repository")
+ .eventAction("proxy_upstream")
+ .eventOutcome("failure")
+ .field("repository.name", this.repoName)
+ .field("event.duration", duration)
+ .error(error)
+ .log();
+ return ResponseBuilder.unavailable()
+ .textBody("Upstream temporarily unavailable")
+ .build();
+ });
+ }
+
+ /**
+ * Convert a dedup signal into an HTTP response.
+ */
+ private CompletableFuture signalToResponse(
+ final RequestDeduplicator.FetchSignal signal,
+ final RequestLine line,
+ final Key key,
+ final Headers headers,
+ final CachedArtifactMetadataStore store
+ ) {
+ switch (signal) {
+ case SUCCESS:
+ // Read from cache (populated by the winning fetch)
+ return this.cache.load(key, Remote.EMPTY, CacheControl.Standard.ALWAYS)
+ .thenCompose(cached -> {
+ if (cached.isPresent()) {
+ return store.load(key).thenApply(meta -> {
+ final ResponseBuilder builder = ResponseBuilder.ok()
+ .body(cached.get());
+ meta.ifPresent(m -> builder.headers(stripContentEncoding(m.headers())));
+ return this.postProcess(builder.build(), line);
+ });
+ }
+ return CompletableFuture.completedFuture(
+ ResponseBuilder.notFound().build()
+ );
+ }).toCompletableFuture();
+ case NOT_FOUND:
+ return CompletableFuture.completedFuture(
+ ResponseBuilder.notFound().build()
+ );
+ case ERROR:
+ default:
+ return CompletableFuture.completedFuture(
+ ResponseBuilder.unavailable()
+ .textBody("Upstream temporarily unavailable")
+ .build()
+ );
+ }
+ }
+
+ /**
+ * Cache a successful upstream response using NIO temp file streaming.
+ * Streams body to a temp file while computing digests incrementally,
+ * then saves from temp file to cache. Never buffers the full artifact on heap.
+ */
+ @SuppressWarnings("PMD.AvoidCatchingGenericException")
+ private CompletableFuture cacheResponse(
+ final Response resp,
+ final Key key,
+ final String owner,
+ final CachedArtifactMetadataStore store
+ ) {
+ final Path tempFile;
+ final FileChannel channel;
+ try {
+ tempFile = Files.createTempFile("artipie-cache-", ".tmp");
+ tempFile.toFile().deleteOnExit();
+ channel = FileChannel.open(
+ tempFile,
+ StandardOpenOption.WRITE,
+ StandardOpenOption.TRUNCATE_EXISTING
+ );
+ } catch (final IOException ex) {
+ EcsLogger.warn("com.artipie." + this.repoType)
+ .message("Failed to create temp file for cache streaming")
+ .eventCategory("repository")
+ .eventAction("proxy_cache")
+ .eventOutcome("failure")
+ .field("repository.name", this.repoName)
+ .field("file.path", key.string())
+ .error(ex)
+ .log();
+ return CompletableFuture.completedFuture(
+ RequestDeduplicator.FetchSignal.ERROR
+ );
+ }
+ final Map digests =
+ DigestComputer.createDigests(this.digestAlgorithms());
+ final AtomicLong totalSize = new AtomicLong(0);
+ final CompletableFuture streamDone = new CompletableFuture<>();
+ Flowable.fromPublisher(resp.body())
+ .doOnNext(buf -> {
+ final int nbytes = buf.remaining();
+ DigestComputer.updateDigests(digests, buf);
+ final ByteBuffer copy = buf.asReadOnlyBuffer();
+ while (copy.hasRemaining()) {
+ channel.write(copy);
+ }
+ totalSize.addAndGet(nbytes);
+ })
+ .doOnComplete(() -> {
+ channel.force(true);
+ channel.close();
+ })
+ .doOnError(err -> {
+ closeChannelQuietly(channel);
+ deleteTempQuietly(tempFile);
+ })
+ .subscribe(
+ item -> { },
+ streamDone::completeExceptionally,
+ () -> streamDone.complete(null)
+ );
+ return streamDone.thenCompose(v -> {
+ final Map digestResults =
+ DigestComputer.finalizeDigests(digests);
+ final long size = totalSize.get();
+ return this.saveFromTempFile(key, tempFile, size)
+ .thenCompose(loaded -> {
+ final Map digestsCopy =
+ new java.util.HashMap<>(digestResults);
+ final CachedArtifactMetadataStore.ComputedDigests computed =
+ new CachedArtifactMetadataStore.ComputedDigests(
+ size, digestsCopy
+ );
+ return store.save(key, stripContentEncoding(resp.headers()), computed);
+ }).thenCompose(savedHeaders -> {
+ final List sidecars =
+ this.generateSidecars(key.string(), digestResults);
+ if (sidecars.isEmpty()) {
+ return CompletableFuture.completedFuture(
+ (Void) null
+ );
+ }
+ final CompletableFuture>[] writes;
+ if (this.storage.isPresent()) {
+ // Save sidecars directly to storage (avoids lazy tee-content)
+ writes = sidecars.stream()
+ .map(sc -> this.storage.get().save(
+ new Key.From(sc.path()),
+ new Content.From(sc.content())
+ ))
+ .toArray(CompletableFuture[]::new);
+ } else {
+ writes = sidecars.stream()
+ .map(sc -> this.cache.load(
+ new Key.From(sc.path()),
+ () -> CompletableFuture.completedFuture(
+ Optional.of(new Content.From(sc.content()))
+ ),
+ CacheControl.Standard.ALWAYS
+ ))
+ .toArray(CompletableFuture[]::new);
+ }
+ return CompletableFuture.allOf(writes);
+ }).thenApply(ignored -> {
+ this.enqueueEvent(key, resp.headers(), size, owner);
+ deleteTempQuietly(tempFile);
+ return RequestDeduplicator.FetchSignal.SUCCESS;
+ });
+ }).exceptionally(err -> {
+ deleteTempQuietly(tempFile);
+ EcsLogger.warn("com.artipie." + this.repoType)
+ .message("Failed to cache upstream response")
+ .eventCategory("repository")
+ .eventAction("proxy_cache")
+ .eventOutcome("failure")
+ .field("repository.name", this.repoName)
+ .field("file.path", key.string())
+ .error(err)
+ .log();
+ return RequestDeduplicator.FetchSignal.ERROR;
+ });
+ }
+
+ /**
+ * Save content to cache from a temp file using NIO streaming.
+ * Saves directly to storage to avoid FromStorageCache's lazy tee-content
+ * which requires the returned Content to be consumed for the save to happen.
+ * @param key Cache key
+ * @param tempFile Temp file with content
+ * @param size File size in bytes
+ * @return Save future
+ */
+ @SuppressWarnings("PMD.AvoidCatchingGenericException")
+ private CompletableFuture> saveFromTempFile(
+ final Key key, final Path tempFile, final long size
+ ) {
+ if (this.storage.isPresent()) {
+ final Flowable flow = Flowable.using(
+ () -> FileChannel.open(tempFile, StandardOpenOption.READ),
+ chan -> Flowable.generate(emitter -> {
+ final ByteBuffer buf = ByteBuffer.allocate(65536);
+ final int read = chan.read(buf);
+ if (read < 0) {
+ emitter.onComplete();
+ } else {
+ buf.flip();
+ emitter.onNext(buf);
+ }
+ }),
+ FileChannel::close
+ );
+ final Content content = new Content.From(Optional.of(size), flow);
+ return this.storage.get().save(key, content);
+ }
+ // Fallback: use cache.load (non-storage-backed mode)
+ final Flowable flow = Flowable.using(
+ () -> FileChannel.open(tempFile, StandardOpenOption.READ),
+ chan -> Flowable.generate(emitter -> {
+ final ByteBuffer buf = ByteBuffer.allocate(65536);
+ final int read = chan.read(buf);
+ if (read < 0) {
+ emitter.onComplete();
+ } else {
+ buf.flip();
+ emitter.onNext(buf);
+ }
+ }),
+ FileChannel::close
+ );
+ final Content content = new Content.From(Optional.of(size), flow);
+ return this.cache.load(
+ key,
+ () -> CompletableFuture.completedFuture(Optional.of(content)),
+ CacheControl.Standard.ALWAYS
+ ).toCompletableFuture();
+ }
+
+ /**
+ * Close a FileChannel quietly.
+ * @param channel Channel to close
+ */
+ private static void closeChannelQuietly(final FileChannel channel) {
+ try {
+ if (channel.isOpen()) {
+ channel.close();
+ }
+ } catch (final IOException ex) {
+ EcsLogger.debug("com.artipie.cache")
+ .message("Failed to close file channel")
+ .error(ex)
+ .log();
+ }
+ }
+
+ /**
+ * Delete a temp file quietly.
+ * @param path Temp file to delete
+ */
+ private static void deleteTempQuietly(final Path path) {
+ try {
+ Files.deleteIfExists(path);
+ } catch (final IOException ex) {
+ EcsLogger.debug("com.artipie.cache")
+ .message("Failed to delete temp file")
+ .error(ex)
+ .log();
+ }
+ }
+
+ /**
+ * Fetch directly from upstream without caching (non-cacheable paths).
+ */
+ private CompletableFuture fetchDirect(
+ final RequestLine line, final Key key, final String owner
+ ) {
+ final long startTime = System.currentTimeMillis();
+ return this.client.response(line, Headers.EMPTY, Content.EMPTY)
+ .thenCompose(resp -> {
+ final long duration = System.currentTimeMillis() - startTime;
+ if (!resp.status().success()) {
+ if (resp.status().code() == 404) {
+ if (this.negativeCache != null
+ && !this.isChecksumSidecar(key.string())) {
+ resp.body().asBytesFuture().thenAccept(
+ bytes -> this.negativeCache.cacheNotFound(key)
+ );
+ }
+ this.recordProxyMetric("not_found", duration);
+ } else if (resp.status().code() >= 500) {
+ this.trackUpstreamFailure(
+ new RuntimeException("HTTP " + resp.status().code())
+ );
+ this.recordProxyMetric("error", duration);
+ } else {
+ this.recordProxyMetric("client_error", duration);
+ }
+ return resp.body().asBytesFuture()
+ .thenApply(bytes -> ResponseBuilder.notFound().build());
+ }
+ this.recordProxyMetric("success", duration);
+ this.enqueueEvent(key, resp.headers(), -1, owner);
+ return CompletableFuture.completedFuture(
+ this.postProcess(
+ ResponseBuilder.ok()
+ .headers(stripContentEncoding(resp.headers()))
+ .body(resp.body())
+ .build(),
+ line
+ )
+ );
+ })
+ .exceptionally(error -> {
+ final long duration = System.currentTimeMillis() - startTime;
+ this.trackUpstreamFailure(error);
+ this.recordProxyMetric("exception", duration);
+ EcsLogger.warn("com.artipie." + this.repoType)
+ .message("Direct upstream request failed with exception")
+ .eventCategory("repository")
+ .eventAction("proxy_upstream")
+ .eventOutcome("failure")
+ .field("repository.name", this.repoName)
+ .field("event.duration", duration)
+ .error(error)
+ .log();
+ return ResponseBuilder.unavailable()
+ .textBody("Upstream error")
+ .build();
+ });
+ }
+
+ private CompletableFuture handle404(
+ final Response resp, final Key key, final long duration
+ ) {
+ this.recordProxyMetric("not_found", duration);
+ return resp.body().asBytesFuture().thenApply(bytes -> {
+ if (this.negativeCache != null && !this.isChecksumSidecar(key.string())) {
+ this.negativeCache.cacheNotFound(key);
+ }
+ return RequestDeduplicator.FetchSignal.NOT_FOUND;
+ });
+ }
+
+ private CompletableFuture handleNonSuccess(
+ final Response resp, final Key key, final long duration
+ ) {
+ if (resp.status().code() >= 500) {
+ this.trackUpstreamFailure(
+ new RuntimeException("HTTP " + resp.status().code())
+ );
+ this.recordProxyMetric("error", duration);
+ } else {
+ this.recordProxyMetric("client_error", duration);
+ }
+ return resp.body().asBytesFuture()
+ .thenApply(bytes -> RequestDeduplicator.FetchSignal.ERROR);
+ }
+
+ private CompletableFuture serveChecksumFromStorage(
+ final RequestLine line, final Key key, final String owner
+ ) {
+ return this.cache.load(key, Remote.EMPTY, CacheControl.Standard.ALWAYS)
+ .thenCompose(cached -> {
+ if (cached.isPresent()) {
+ return CompletableFuture.completedFuture(
+ ResponseBuilder.ok()
+ .header("Content-Type", "text/plain")
+ .body(cached.get())
+ .build()
+ );
+ }
+ return this.fetchDirect(line, key, owner);
+ }).toCompletableFuture();
+ }
+
+ private CompletableFuture handleRootPath(final RequestLine line) {
+ return this.client.response(line, Headers.EMPTY, Content.EMPTY)
+ .thenCompose(resp -> {
+ if (resp.status().success()) {
+ return CompletableFuture.completedFuture(
+ ResponseBuilder.ok()
+ .headers(stripContentEncoding(resp.headers()))
+ .body(resp.body())
+ .build()
+ );
+ }
+ return resp.body().asBytesFuture()
+ .thenApply(ignored -> ResponseBuilder.notFound().build());
+ });
+ }
+
+ private void enqueueEvent(
+ final Key key, final Headers headers, final long size, final String owner
+ ) {
+ if (this.events.isEmpty()) {
+ return;
+ }
+ final Optional event =
+ this.buildArtifactEvent(key, headers, size, owner);
+ event.ifPresent(e -> this.events.get().offer(e));
+ }
+
+ private void trackUpstreamFailure(final Throwable error) {
+ final String errorType;
+ if (error instanceof TimeoutException) {
+ errorType = "timeout";
+ } else if (error instanceof ConnectException) {
+ errorType = "connection_refused";
+ } else {
+ errorType = "unknown";
+ }
+ this.recordMetric(() ->
+ com.artipie.metrics.ArtipieMetrics.instance()
+ .upstreamFailure(this.repoName, this.upstreamUrl, errorType)
+ );
+ }
+
+ private void recordProxyMetric(final String result, final long duration) {
+ this.recordMetric(() -> {
+ if (com.artipie.metrics.MicrometerMetrics.isInitialized()) {
+ com.artipie.metrics.MicrometerMetrics.getInstance()
+ .recordProxyRequest(this.repoName, this.upstreamUrl, result, duration);
+ }
+ });
+ }
+
+ @SuppressWarnings("PMD.AvoidCatchingGenericException")
+ private void recordMetric(final Runnable metric) {
+ try {
+ if (com.artipie.metrics.ArtipieMetrics.isEnabled()) {
+ metric.run();
+ }
+ } catch (final Exception ex) {
+ EcsLogger.debug("com.artipie.cache")
+ .message("Failed to record metric")
+ .error(ex)
+ .log();
+ }
+ }
+
+ private void logDebug(final String message, final String path) {
+ EcsLogger.debug("com.artipie." + this.repoType)
+ .message(message)
+ .eventCategory("repository")
+ .eventAction("proxy_request")
+ .field("repository.name", this.repoName)
+ .field("url.path", path)
+ .log();
+ }
+
+ /**
+ * Strip {@code Content-Encoding} and {@code Content-Length} headers that indicate
+ * the HTTP client already decoded the response body.
+ *
+ *
Jetty's {@code GZIPContentDecoder} (registered by default) auto-decodes gzip,
+ * deflate and br response bodies but leaves the original {@code Content-Encoding}
+ * header intact. Passing those headers through to callers creates a header/body
+ * mismatch: the body is plain bytes while the header still claims it is compressed.
+ * Any client that trusts the header will fail to inflate the body
+ * ({@code Z_DATA_ERROR: zlib: incorrect header check}).
+ *
+ *
We strip {@code Content-Length} as well because it refers to the compressed
+ * size, which no longer matches the decoded body length.
+ *
+ * @param headers Upstream response headers
+ * @return Headers without Content-Encoding (gzip/deflate/br) and Content-Length
+ */
+ protected static Headers stripContentEncoding(final Headers headers) {
+ final boolean hasDecoded = StreamSupport.stream(headers.spliterator(), false)
+ .filter(h -> "content-encoding".equalsIgnoreCase(h.getKey()))
+ .map(Header::getValue)
+ .map(v -> v.toLowerCase(Locale.ROOT).trim())
+ .anyMatch(v -> v.contains("gzip") || v.contains("deflate") || v.contains("br"));
+ if (!hasDecoded) {
+ return headers;
+ }
+ final List filtered = StreamSupport.stream(headers.spliterator(), false)
+ .filter(h -> !"content-encoding".equalsIgnoreCase(h.getKey())
+ && !"content-length".equalsIgnoreCase(h.getKey()))
+ .collect(Collectors.toList());
+ return new Headers(filtered);
+ }
+
+ /**
+ * Extract Last-Modified timestamp from response headers.
+ * @param headers Response headers
+ * @return Optional epoch millis
+ */
+ protected static Optional extractLastModified(final Headers headers) {
+ try {
+ return StreamSupport.stream(headers.spliterator(), false)
+ .filter(h -> "Last-Modified".equalsIgnoreCase(h.getKey()))
+ .findFirst()
+ .map(Header::getValue)
+ .map(val -> Instant.from(
+ DateTimeFormatter.RFC_1123_DATE_TIME.parse(val)
+ ).toEpochMilli());
+ } catch (final DateTimeParseException ex) {
+ EcsLogger.debug("com.artipie.cache")
+ .message("Failed to parse Last-Modified header")
+ .error(ex)
+ .log();
+ return Optional.empty();
+ }
+ }
+}
diff --git a/artipie-core/src/main/java/com/artipie/http/cache/ConditionalRequest.java b/artipie-core/src/main/java/com/artipie/http/cache/ConditionalRequest.java
new file mode 100644
index 000000000..d04a422c8
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/http/cache/ConditionalRequest.java
@@ -0,0 +1,77 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.http.cache;
+
+import com.artipie.http.Headers;
+import com.artipie.http.headers.Header;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * Builds conditional request headers (ETag/If-None-Match, Last-Modified/If-Modified-Since)
+ * for upstream requests when cached content is available.
+ *
+ * @since 1.20.13
+ */
+public final class ConditionalRequest {
+
+ /**
+ * Private ctor — static utility.
+ */
+ private ConditionalRequest() {
+ }
+
+ /**
+ * Build conditional headers from cached metadata.
+ *
+ * @param cachedEtag ETag from previously cached response (if available)
+ * @param cachedLastModified Last-Modified header value from cached response (if available)
+ * @return Headers with conditional request fields, or empty headers if no metadata
+ */
+ public static Headers conditionalHeaders(
+ final Optional cachedEtag,
+ final Optional cachedLastModified
+ ) {
+ final List headers = new ArrayList<>(2);
+ cachedEtag.ifPresent(
+ etag -> headers.add(new Header("If-None-Match", etag))
+ );
+ cachedLastModified.ifPresent(
+ lm -> headers.add(new Header("If-Modified-Since", lm))
+ );
+ if (headers.isEmpty()) {
+ return Headers.EMPTY;
+ }
+ return new Headers(headers);
+ }
+
+ /**
+ * Extract ETag value from response headers.
+ *
+ * @param headers Response headers
+ * @return ETag value if present
+ */
+ public static Optional extractEtag(final Headers headers) {
+ return headers.stream()
+ .filter(h -> "ETag".equalsIgnoreCase(h.getKey()))
+ .findFirst()
+ .map(com.artipie.http.headers.Header::getValue);
+ }
+
+ /**
+ * Extract Last-Modified value from response headers.
+ *
+ * @param headers Response headers
+ * @return Last-Modified value if present
+ */
+ public static Optional extractLastModified(final Headers headers) {
+ return headers.stream()
+ .filter(h -> "Last-Modified".equalsIgnoreCase(h.getKey()))
+ .findFirst()
+ .map(com.artipie.http.headers.Header::getValue);
+ }
+}
diff --git a/artipie-core/src/main/java/com/artipie/http/cache/DedupStrategy.java b/artipie-core/src/main/java/com/artipie/http/cache/DedupStrategy.java
new file mode 100644
index 000000000..d8bc535e7
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/http/cache/DedupStrategy.java
@@ -0,0 +1,33 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.http.cache;
+
+/**
+ * Request deduplication strategy for proxy caches.
+ *
+ * @since 1.20.13
+ */
+public enum DedupStrategy {
+
+ /**
+ * No deduplication. Each concurrent request independently fetches from upstream.
+ */
+ NONE,
+
+ /**
+ * Storage-level deduplication. Uses storage key locking to prevent
+ * concurrent writes to the same cache key. Second request waits for
+ * the first to complete and reads from cache.
+ */
+ STORAGE,
+
+ /**
+ * Signal-based deduplication (zero-copy). First request fetches and caches,
+ * then signals completion. Waiting requests read from cache on SUCCESS
+ * signal, or return appropriate error on NOT_FOUND / ERROR signals.
+ * No response body buffering in memory.
+ */
+ SIGNAL
+}
diff --git a/artipie-core/src/main/java/com/artipie/http/cache/DigestComputer.java b/artipie-core/src/main/java/com/artipie/http/cache/DigestComputer.java
new file mode 100644
index 000000000..a8351b27c
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/http/cache/DigestComputer.java
@@ -0,0 +1,157 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.http.cache;
+
+import java.nio.ByteBuffer;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HexFormat;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+
+/**
+ * Computes cryptographic digests for artifact content.
+ * Thread-safe utility — each call allocates fresh MessageDigest instances.
+ *
+ *
*
* @since 1.0
@@ -43,6 +57,21 @@ public final class ProxyCacheConfig {
*/
public static final Duration DEFAULT_METADATA_TTL = Duration.ofDays(7);
+ /**
+ * Default stale-while-revalidate max age (1 hour).
+ */
+ public static final Duration DEFAULT_STALE_MAX_AGE = Duration.ofHours(1);
+
+ /**
+ * Default retry initial delay (100ms).
+ */
+ public static final Duration DEFAULT_RETRY_INITIAL_DELAY = Duration.ofMillis(100);
+
+ /**
+ * Default retry backoff multiplier.
+ */
+ public static final double DEFAULT_RETRY_BACKOFF_MULTIPLIER = 2.0;
+
/**
* YAML configuration.
*/
@@ -84,7 +113,7 @@ public int negativeCacheMaxSize() {
/**
* Check if metadata caching is enabled.
- * @return True if enabled (default: false - needs implementation)
+ * @return True if enabled (default: false)
*/
public boolean metadataCacheEnabled() {
return this.boolValue("cache", "metadata", "enabled").orElse(false);
@@ -99,6 +128,84 @@ public Duration metadataCacheTtl() {
.orElse(DEFAULT_METADATA_TTL);
}
+ /**
+ * Check if cooldown is enabled for this adapter.
+ * @return True if enabled (default: false)
+ */
+ public boolean cooldownEnabled() {
+ return this.boolValue("cache", "cooldown", "enabled").orElse(false);
+ }
+
+ /**
+ * Get request deduplication strategy.
+ * @return Dedup strategy (default: SIGNAL)
+ */
+ public DedupStrategy dedupStrategy() {
+ return this.stringValue("cache", "dedup_strategy")
+ .map(s -> DedupStrategy.valueOf(s.toUpperCase(Locale.ROOT)))
+ .orElse(DedupStrategy.SIGNAL);
+ }
+
+ /**
+ * Check if conditional requests (ETag/If-None-Match) are enabled.
+ * @return True if enabled (default: true)
+ */
+ public boolean conditionalRequestsEnabled() {
+ return this.boolValue("cache", "conditional_requests").orElse(true);
+ }
+
+ /**
+ * Check if stale-while-revalidate is enabled.
+ * @return True if enabled (default: false)
+ */
+ public boolean staleWhileRevalidateEnabled() {
+ return this.boolValue("cache", "stale_while_revalidate", "enabled")
+ .orElse(false);
+ }
+
+ /**
+ * Get stale-while-revalidate max age.
+ * @return Max age duration (default: 1 hour)
+ */
+ public Duration staleMaxAge() {
+ return this.durationValue("cache", "stale_while_revalidate", "max_age")
+ .orElse(DEFAULT_STALE_MAX_AGE);
+ }
+
+ /**
+ * Get maximum number of retry attempts for upstream requests.
+ * @return Max retries (default: 0 = disabled)
+ */
+ public int retryMaxRetries() {
+ return this.intValue("cache", "retry", "max_retries").orElse(0);
+ }
+
+ /**
+ * Get initial delay between retry attempts.
+ * @return Initial delay duration (default: 100ms)
+ */
+ public Duration retryInitialDelay() {
+ return this.durationValue("cache", "retry", "initial_delay")
+ .orElse(DEFAULT_RETRY_INITIAL_DELAY);
+ }
+
+ /**
+ * Get backoff multiplier for retry delays.
+ * @return Backoff multiplier (default: 2.0)
+ */
+ public double retryBackoffMultiplier() {
+ return this.doubleValue("cache", "retry", "backoff_multiplier")
+ .orElse(DEFAULT_RETRY_BACKOFF_MULTIPLIER);
+ }
+
+ /**
+ * Check if proxy metrics recording is enabled.
+ * @return True if enabled (default: true)
+ */
+ public boolean metricsEnabled() {
+ return this.boolValue("cache", "metrics").orElse(true);
+ }
+
/**
* Check if any caching is configured.
* @return True if cache section exists
@@ -113,14 +220,7 @@ public boolean hasCacheConfig() {
* @return Optional boolean value
*/
private Optional boolValue(final String... path) {
- YamlMapping current = this.yaml;
- for (int i = 0; i < path.length - 1; i++) {
- current = current.yamlMapping(path[i]);
- if (current == null) {
- return Optional.empty();
- }
- }
- final String value = current.string(path[path.length - 1]);
+ final String value = this.rawValue(path);
return value == null ? Optional.empty() : Optional.of(Boolean.parseBoolean(value));
}
@@ -130,14 +230,7 @@ private Optional boolValue(final String... path) {
* @return Optional integer value
*/
private Optional intValue(final String... path) {
- YamlMapping current = this.yaml;
- for (int i = 0; i < path.length - 1; i++) {
- current = current.yamlMapping(path[i]);
- if (current == null) {
- return Optional.empty();
- }
- }
- final String value = current.string(path[path.length - 1]);
+ final String value = this.rawValue(path);
try {
return value == null ? Optional.empty() : Optional.of(Integer.parseInt(value));
} catch (final NumberFormatException ex) {
@@ -145,6 +238,20 @@ private Optional intValue(final String... path) {
}
}
+ /**
+ * Get double value from nested YAML path.
+ * @param path YAML path segments
+ * @return Optional double value
+ */
+ private Optional doubleValue(final String... path) {
+ final String value = this.rawValue(path);
+ try {
+ return value == null ? Optional.empty() : Optional.of(Double.parseDouble(value));
+ } catch (final NumberFormatException ex) {
+ return Optional.empty();
+ }
+ }
+
/**
* Get duration value from nested YAML path.
* Supports ISO-8601 duration format (e.g., PT24H, P1D).
@@ -152,14 +259,7 @@ private Optional intValue(final String... path) {
* @return Optional duration value
*/
private Optional durationValue(final String... path) {
- YamlMapping current = this.yaml;
- for (int i = 0; i < path.length - 1; i++) {
- current = current.yamlMapping(path[i]);
- if (current == null) {
- return Optional.empty();
- }
- }
- final String value = current.string(path[path.length - 1]);
+ final String value = this.rawValue(path);
try {
return value == null ? Optional.empty() : Optional.of(Duration.parse(value));
} catch (final Exception ex) {
@@ -167,11 +267,57 @@ private Optional durationValue(final String... path) {
}
}
+ /**
+ * Get string value from nested YAML path.
+ * @param path YAML path segments
+ * @return Optional string value
+ */
+ private Optional stringValue(final String... path) {
+ return Optional.ofNullable(this.rawValue(path));
+ }
+
+ /**
+ * Navigate YAML path and return raw string value at leaf.
+ * @param path YAML path segments
+ * @return Raw string value or null
+ */
+ private String rawValue(final String... path) {
+ YamlMapping current = this.yaml;
+ for (int idx = 0; idx < path.length - 1; idx++) {
+ current = current.yamlMapping(path[idx]);
+ if (current == null) {
+ return null;
+ }
+ }
+ return current.string(path[path.length - 1]);
+ }
+
/**
* Create default configuration (all caching enabled with defaults).
* @return Default configuration
*/
public static ProxyCacheConfig defaults() {
- return new ProxyCacheConfig(com.amihaiemil.eoyaml.Yaml.createYamlMappingBuilder().build());
+ return new ProxyCacheConfig(
+ com.amihaiemil.eoyaml.Yaml.createYamlMappingBuilder().build()
+ );
+ }
+
+ /**
+ * Create configuration with cooldown enabled.
+ * Used by adapters that support cooldown enforcement (e.g., Maven proxy).
+ * @return Configuration with cooldown enabled
+ */
+ public static ProxyCacheConfig withCooldown() {
+ return new ProxyCacheConfig(
+ com.amihaiemil.eoyaml.Yaml.createYamlMappingBuilder()
+ .add("cache",
+ com.amihaiemil.eoyaml.Yaml.createYamlMappingBuilder()
+ .add("cooldown",
+ com.amihaiemil.eoyaml.Yaml.createYamlMappingBuilder()
+ .add("enabled", "true")
+ .build())
+ .build())
+ .build()
+ );
}
}
diff --git a/artipie-core/src/main/java/com/artipie/http/cache/RequestDeduplicator.java b/artipie-core/src/main/java/com/artipie/http/cache/RequestDeduplicator.java
new file mode 100644
index 000000000..883cc9971
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/http/cache/RequestDeduplicator.java
@@ -0,0 +1,198 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.http.cache;
+
+import com.artipie.asto.Key;
+import com.artipie.http.misc.ConfigDefaults;
+
+import java.util.Objects;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.function.Supplier;
+
+/**
+ * Deduplicates concurrent requests for the same cache key.
+ *
+ *
When multiple clients request the same artifact simultaneously, only one
+ * upstream fetch is performed. Other callers either wait for the signal (SIGNAL
+ * strategy) or are coalesced at the storage level (STORAGE strategy).
+ *
+ *
With SIGNAL strategy (default):
+ *
+ *
First request: executes the supplier, signals result on completion
+ *
Waiting requests: receive the same signal (SUCCESS, NOT_FOUND, ERROR)
+ *
After completion: entry is removed from in-flight map
+ *
+ *
+ *
With NONE strategy, every call immediately delegates to the supplier.
+ *
+ * @since 1.20.13
+ */
+public final class RequestDeduplicator implements AutoCloseable {
+
+ /**
+ * Maximum age of an in-flight entry before it's considered zombie (5 minutes).
+ * Configurable via ARTIPIE_DEDUP_MAX_AGE_MS environment variable.
+ */
+ private static final long MAX_AGE_MS =
+ ConfigDefaults.getLong("ARTIPIE_DEDUP_MAX_AGE_MS", 300_000L);
+
+ /**
+ * Maps cache key to the in-flight fetch entry (future + creation time).
+ */
+ private final ConcurrentHashMap inFlight;
+
+ /**
+ * Strategy to use.
+ */
+ private final DedupStrategy strategy;
+
+ /**
+ * Cleanup scheduler.
+ */
+ private final java.util.concurrent.ScheduledExecutorService cleanup;
+
+ /**
+ * Ctor.
+ * @param strategy Dedup strategy
+ */
+ public RequestDeduplicator(final DedupStrategy strategy) {
+ this.strategy = Objects.requireNonNull(strategy, "strategy");
+ this.inFlight = new ConcurrentHashMap<>();
+ this.cleanup = java.util.concurrent.Executors.newSingleThreadScheduledExecutor(r -> {
+ final Thread thread = new Thread(r, "dedup-cleanup");
+ thread.setDaemon(true);
+ return thread;
+ });
+ this.cleanup.scheduleAtFixedRate(this::evictStale, 60, 60, java.util.concurrent.TimeUnit.SECONDS);
+ }
+
+ /**
+ * Execute a fetch with deduplication.
+ *
+ *
If a fetch for the same key is already in progress and strategy is SIGNAL,
+ * this call returns a future that completes when the existing fetch completes.
+ *
+ * @param key Cache key identifying the artifact
+ * @param fetcher Supplier that performs the actual upstream fetch.
+ * Must complete the returned future with a FetchSignal.
+ * @return Future with the fetch signal (SUCCESS, NOT_FOUND, or ERROR)
+ */
+ public CompletableFuture deduplicate(
+ final Key key,
+ final Supplier> fetcher
+ ) {
+ if (this.strategy == DedupStrategy.NONE || this.strategy == DedupStrategy.STORAGE) {
+ return fetcher.get();
+ }
+ final CompletableFuture fresh = new CompletableFuture<>();
+ final InFlightEntry freshEntry = new InFlightEntry(fresh, System.currentTimeMillis());
+ final InFlightEntry existing = this.inFlight.putIfAbsent(key, freshEntry);
+ if (existing != null) {
+ return existing.future;
+ }
+ fetcher.get().whenComplete((signal, err) -> {
+ this.inFlight.remove(key);
+ if (err != null) {
+ fresh.complete(FetchSignal.ERROR);
+ } else {
+ fresh.complete(signal);
+ }
+ });
+ return fresh;
+ }
+
+ /**
+ * Get the number of currently in-flight requests. For monitoring.
+ * @return Count of in-flight dedup entries
+ */
+ public int inFlightCount() {
+ return this.inFlight.size();
+ }
+
+ /**
+ * Remove entries that have been in-flight for too long (zombie protection).
+ */
+ private void evictStale() {
+ final long now = System.currentTimeMillis();
+ this.inFlight.entrySet().removeIf(entry -> {
+ if (now - entry.getValue().createdAt > MAX_AGE_MS) {
+ entry.getValue().future.complete(FetchSignal.ERROR);
+ return true;
+ }
+ return false;
+ });
+ }
+
+ /**
+ * Shuts down the cleanup scheduler and completes all in-flight entries with ERROR.
+ * Should be called when the deduplicator is no longer needed.
+ */
+ @Override
+ public void close() {
+ this.cleanup.shutdownNow();
+ this.inFlight.values().forEach(
+ entry -> entry.future.complete(FetchSignal.ERROR)
+ );
+ this.inFlight.clear();
+ }
+
+ /**
+ * Alias for {@link #close()}, for explicit lifecycle management.
+ */
+ public void shutdown() {
+ this.close();
+ }
+
+ /**
+ * In-flight entry tracking future and creation time.
+ */
+ private static final class InFlightEntry {
+ /**
+ * The future for the in-flight fetch.
+ */
+ final CompletableFuture future;
+
+ /**
+ * Timestamp when this entry was created.
+ */
+ final long createdAt;
+
+ /**
+ * Ctor.
+ * @param future The future for the in-flight fetch
+ * @param createdAt Timestamp when this entry was created
+ */
+ InFlightEntry(final CompletableFuture future, final long createdAt) {
+ this.future = future;
+ this.createdAt = createdAt;
+ }
+ }
+
+ /**
+ * Signal indicating the outcome of a deduplicated fetch.
+ *
+ * @since 1.20.13
+ */
+ public enum FetchSignal {
+ /**
+ * Upstream returned 200 and content is now cached in storage.
+ * Waiting callers should read from cache.
+ */
+ SUCCESS,
+
+ /**
+ * Upstream returned 404. Negative cache has been updated.
+ * Waiting callers should return 404.
+ */
+ NOT_FOUND,
+
+ /**
+ * Upstream returned an error (5xx, timeout, exception).
+ * Waiting callers should return 503 or fall back to stale cache.
+ */
+ ERROR
+ }
+}
diff --git a/artipie-core/src/main/java/com/artipie/http/cache/SidecarFile.java b/artipie-core/src/main/java/com/artipie/http/cache/SidecarFile.java
new file mode 100644
index 000000000..4551a4521
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/http/cache/SidecarFile.java
@@ -0,0 +1,28 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.http.cache;
+
+import java.util.Objects;
+
+/**
+ * Checksum sidecar file generated alongside a cached artifact.
+ * For example, Maven generates .sha1, .sha256, .md5 files next to each artifact.
+ *
+ * @param path Sidecar file path (e.g., "com/example/foo/1.0/foo-1.0.jar.sha256")
+ * @param content Sidecar file content (the hex-encoded checksum string as bytes)
+ * @since 1.20.13
+ */
+public record SidecarFile(String path, byte[] content) {
+
+ /**
+ * Ctor with validation.
+ * @param path Sidecar file path
+ * @param content Sidecar file content
+ */
+ public SidecarFile {
+ Objects.requireNonNull(path, "path");
+ Objects.requireNonNull(content, "content");
+ }
+}
diff --git a/artipie-core/src/main/java/com/artipie/http/headers/Login.java b/artipie-core/src/main/java/com/artipie/http/headers/Login.java
index af882d641..29028fdc6 100644
--- a/artipie-core/src/main/java/com/artipie/http/headers/Login.java
+++ b/artipie-core/src/main/java/com/artipie/http/headers/Login.java
@@ -6,6 +6,7 @@
import com.artipie.http.Headers;
import com.artipie.http.auth.AuthzSlice;
+import com.artipie.http.log.EcsLogger;
import com.artipie.scheduling.ArtifactEvent;
import org.slf4j.MDC;
@@ -84,7 +85,11 @@ private static Optional decodeAuthorization(final String header) {
if (!credentials.isBlank()) {
return Optional.of(credentials);
}
- } catch (final IllegalArgumentException ignored) {
+ } catch (final IllegalArgumentException ex) {
+ EcsLogger.debug("com.artipie.http")
+ .message("Failed to decode Basic auth credentials")
+ .error(ex)
+ .log();
return Optional.empty();
}
}
diff --git a/artipie-core/src/main/java/com/artipie/http/misc/ConfigDefaults.java b/artipie-core/src/main/java/com/artipie/http/misc/ConfigDefaults.java
new file mode 100644
index 000000000..9dd715bee
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/http/misc/ConfigDefaults.java
@@ -0,0 +1,63 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.http.misc;
+
+/**
+ * Centralized configuration defaults with environment variable overrides.
+ * Values are read with precedence: env var > system property > default.
+ *
+ * @since 1.20.13
+ */
+public final class ConfigDefaults {
+
+ private ConfigDefaults() {
+ }
+
+ /**
+ * Read a configuration value.
+ * @param envVar Environment variable name
+ * @param defaultValue Default value if not set
+ * @return Configured value or default
+ */
+ public static String get(final String envVar, final String defaultValue) {
+ final String env = System.getenv(envVar);
+ if (env != null && !env.isEmpty()) {
+ return env;
+ }
+ final String prop = System.getProperty(envVar.toLowerCase().replace('_', '.'));
+ if (prop != null && !prop.isEmpty()) {
+ return prop;
+ }
+ return defaultValue;
+ }
+
+ /**
+ * Read an integer configuration value.
+ * @param envVar Environment variable name
+ * @param defaultValue Default value
+ * @return Configured value or default
+ */
+ public static int getInt(final String envVar, final int defaultValue) {
+ try {
+ return Integer.parseInt(get(envVar, String.valueOf(defaultValue)));
+ } catch (final NumberFormatException e) {
+ return defaultValue;
+ }
+ }
+
+ /**
+ * Read a long configuration value.
+ * @param envVar Environment variable name
+ * @param defaultValue Default value
+ * @return Configured value or default
+ */
+ public static long getLong(final String envVar, final long defaultValue) {
+ try {
+ return Long.parseLong(get(envVar, String.valueOf(defaultValue)));
+ } catch (final NumberFormatException e) {
+ return defaultValue;
+ }
+ }
+}
diff --git a/artipie-core/src/main/java/com/artipie/http/misc/DispatchedStorage.java b/artipie-core/src/main/java/com/artipie/http/misc/DispatchedStorage.java
new file mode 100644
index 000000000..e8da6cf0e
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/http/misc/DispatchedStorage.java
@@ -0,0 +1,151 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.http.misc;
+
+import com.artipie.asto.Content;
+import com.artipie.asto.Key;
+import com.artipie.asto.ListResult;
+import com.artipie.asto.Meta;
+import com.artipie.asto.Storage;
+
+import java.util.Collection;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.CompletionStage;
+import java.util.concurrent.Executor;
+import java.util.function.Function;
+
+/**
+ * Decorator that wraps any {@link Storage} and dispatches completion
+ * continuations to the named thread pools from {@link StorageExecutors}.
+ *
+ * Each storage operation category is dispatched to its own pool:
+ *
+ *
READ ops (exists, value, metadata) use {@link StorageExecutors#READ}
+ *
WRITE ops (save, move, delete) use {@link StorageExecutors#WRITE}
+ *
LIST ops (list) use {@link StorageExecutors#LIST}
+ *
+ *
+ * The {@code exclusively()} method delegates directly without dispatching
+ * to avoid deadlocks with lock management. The {@code identifier()} method
+ * also delegates directly as it is synchronous with no I/O.
+ *
+ * @since 1.20.13
+ */
+public final class DispatchedStorage implements Storage {
+
+ /**
+ * Delegate storage.
+ */
+ private final Storage delegate;
+
+ /**
+ * Wraps the given storage with thread pool dispatching.
+ * @param delegate Storage to wrap
+ */
+ public DispatchedStorage(final Storage delegate) {
+ this.delegate = delegate;
+ }
+
+ @Override
+ public CompletableFuture exists(final Key key) {
+ return dispatch(this.delegate.exists(key), StorageExecutors.READ);
+ }
+
+ @Override
+ public CompletableFuture> list(final Key prefix) {
+ return dispatch(this.delegate.list(prefix), StorageExecutors.LIST);
+ }
+
+ @Override
+ public CompletableFuture list(final Key prefix, final String delimiter) {
+ return dispatch(this.delegate.list(prefix, delimiter), StorageExecutors.LIST);
+ }
+
+ @Override
+ public CompletableFuture save(final Key key, final Content content) {
+ return dispatch(this.delegate.save(key, content), StorageExecutors.WRITE);
+ }
+
+ @Override
+ public CompletableFuture move(final Key source, final Key destination) {
+ return dispatch(this.delegate.move(source, destination), StorageExecutors.WRITE);
+ }
+
+ @Override
+ public CompletableFuture extends Meta> metadata(final Key key) {
+ return dispatch(this.delegate.metadata(key), StorageExecutors.READ);
+ }
+
+ @Override
+ public CompletableFuture value(final Key key) {
+ return dispatch(this.delegate.value(key), StorageExecutors.READ);
+ }
+
+ @Override
+ public CompletableFuture delete(final Key key) {
+ return dispatch(this.delegate.delete(key), StorageExecutors.WRITE);
+ }
+
+ @Override
+ public CompletableFuture deleteAll(final Key prefix) {
+ return dispatch(this.delegate.deleteAll(prefix), StorageExecutors.WRITE);
+ }
+
+ @Override
+ public CompletionStage exclusively(
+ final Key key,
+ final Function> operation
+ ) {
+ return this.delegate.exclusively(key, operation);
+ }
+
+ /**
+ * Returns the underlying delegate storage.
+ * Useful for inspecting the actual storage type when this decorator wraps it.
+ * @return The delegate storage
+ */
+ public Storage unwrap() {
+ return this.delegate;
+ }
+
+ @Override
+ public String identifier() {
+ return this.delegate.identifier();
+ }
+
+ /**
+ * Dispatch a future's completion to the given executor.
+ * Guarantees the returned future is always completed by a thread
+ * from the target executor, so downstream {@code thenApply()} /
+ * {@code thenCompose()} continuations run on that pool.
+ *
+ * @param source Source future from the delegate storage
+ * @param executor Target executor pool
+ * @param Result type
+ * @return Future that completes on the target executor
+ */
+ private static CompletableFuture dispatch(
+ final CompletableFuture extends T> source,
+ final Executor executor
+ ) {
+ final CompletableFuture result = new CompletableFuture<>();
+ source.whenComplete(
+ (val, err) -> {
+ try {
+ executor.execute(() -> {
+ if (err != null) {
+ result.completeExceptionally(err);
+ } else {
+ result.complete(val);
+ }
+ });
+ } catch (final java.util.concurrent.RejectedExecutionException rex) {
+ result.completeExceptionally(rex);
+ }
+ }
+ );
+ return result;
+ }
+}
diff --git a/artipie-core/src/main/java/com/artipie/http/misc/RepoNameMeterFilter.java b/artipie-core/src/main/java/com/artipie/http/misc/RepoNameMeterFilter.java
new file mode 100644
index 000000000..0517a4555
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/http/misc/RepoNameMeterFilter.java
@@ -0,0 +1,79 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.http.misc;
+
+import io.micrometer.core.instrument.Meter;
+import io.micrometer.core.instrument.Tag;
+import io.micrometer.core.instrument.config.MeterFilter;
+
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Collectors;
+
+/**
+ * Meter filter that caps the cardinality of the "repo_name" tag.
+ * Only the first N distinct repo names are kept; additional repos are
+ * replaced with "_other" to prevent unbounded series growth.
+ *
+ * @since 1.20.13
+ */
+public final class RepoNameMeterFilter implements MeterFilter {
+
+ /**
+ * Tag name to filter.
+ */
+ private static final String TAG_NAME = "repo_name";
+
+ /**
+ * Maximum number of distinct repo_name values.
+ */
+ private final int maxRepos;
+
+ /**
+ * Known repo names (first N to be seen).
+ */
+ private final Set known;
+
+ /**
+ * Counter for tracking how many distinct repos we've seen.
+ */
+ private final AtomicInteger count;
+
+ /**
+ * Constructor.
+ * @param maxRepos Maximum distinct repo names to track
+ */
+ public RepoNameMeterFilter(final int maxRepos) {
+ this.maxRepos = maxRepos;
+ this.known = ConcurrentHashMap.newKeySet();
+ this.count = new AtomicInteger(0);
+ }
+
+ @Override
+ public Meter.Id map(final Meter.Id id) {
+ final String repoName = id.getTag(TAG_NAME);
+ if (repoName == null) {
+ return id;
+ }
+ if (this.known.contains(repoName)) {
+ return id;
+ }
+ if (this.count.get() < this.maxRepos) {
+ if (this.known.add(repoName)) {
+ this.count.incrementAndGet();
+ }
+ return id;
+ }
+ // Over limit — replace tag with _other
+ final List newTags = id.getTags().stream()
+ .map(tag -> TAG_NAME.equals(tag.getKey())
+ ? Tag.of(TAG_NAME, "_other")
+ : tag)
+ .collect(Collectors.toList());
+ return id.replaceTags(newTags);
+ }
+}
diff --git a/artipie-core/src/main/java/com/artipie/http/misc/StorageExecutors.java b/artipie-core/src/main/java/com/artipie/http/misc/StorageExecutors.java
new file mode 100644
index 000000000..de492c9d5
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/http/misc/StorageExecutors.java
@@ -0,0 +1,146 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.http.misc;
+
+import io.micrometer.core.instrument.Gauge;
+import io.micrometer.core.instrument.MeterRegistry;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Named thread pools for storage operations, separated by operation type.
+ * Prevents slow writes from starving fast reads by providing independent pools.
+ *
+ *
Pool sizing (configurable via environment variables):
+ *
+ *
READ: ARTIPIE_IO_READ_THREADS, default 4x CPUs
+ *
WRITE: ARTIPIE_IO_WRITE_THREADS, default 2x CPUs
+ *
LIST: ARTIPIE_IO_LIST_THREADS, default 1x CPUs
+ *
+ *
+ * @since 1.20.13
+ */
+public final class StorageExecutors {
+
+ /**
+ * Thread pool for storage read operations (value, exists, metadata).
+ */
+ public static final ExecutorService READ = Executors.newFixedThreadPool(
+ ConfigDefaults.getInt(
+ "ARTIPIE_IO_READ_THREADS",
+ Runtime.getRuntime().availableProcessors() * 4
+ ),
+ namedThreadFactory("artipie-io-read-%d")
+ );
+
+ /**
+ * Thread pool for storage write operations (save, move, delete).
+ */
+ public static final ExecutorService WRITE = Executors.newFixedThreadPool(
+ ConfigDefaults.getInt(
+ "ARTIPIE_IO_WRITE_THREADS",
+ Runtime.getRuntime().availableProcessors() * 2
+ ),
+ namedThreadFactory("artipie-io-write-%d")
+ );
+
+ /**
+ * Thread pool for storage list operations.
+ */
+ public static final ExecutorService LIST = Executors.newFixedThreadPool(
+ ConfigDefaults.getInt(
+ "ARTIPIE_IO_LIST_THREADS",
+ Runtime.getRuntime().availableProcessors()
+ ),
+ namedThreadFactory("artipie-io-list-%d")
+ );
+
+ private StorageExecutors() {
+ // Utility class
+ }
+
+ /**
+ * Register pool utilization metrics gauges with the given meter registry.
+ * Registers active thread count and queue size for each pool (READ, WRITE, LIST).
+ * @param registry Micrometer meter registry
+ */
+ public static void registerMetrics(final MeterRegistry registry) {
+ Gauge.builder(
+ "artipie.pool.read.active", READ,
+ pool -> ((ThreadPoolExecutor) pool).getActiveCount()
+ ).description("Active threads in READ pool").register(registry);
+ Gauge.builder(
+ "artipie.pool.write.active", WRITE,
+ pool -> ((ThreadPoolExecutor) pool).getActiveCount()
+ ).description("Active threads in WRITE pool").register(registry);
+ Gauge.builder(
+ "artipie.pool.list.active", LIST,
+ pool -> ((ThreadPoolExecutor) pool).getActiveCount()
+ ).description("Active threads in LIST pool").register(registry);
+ Gauge.builder(
+ "artipie.pool.read.queue", READ,
+ pool -> ((ThreadPoolExecutor) pool).getQueue().size()
+ ).description("Queue size of READ pool").register(registry);
+ Gauge.builder(
+ "artipie.pool.write.queue", WRITE,
+ pool -> ((ThreadPoolExecutor) pool).getQueue().size()
+ ).description("Queue size of WRITE pool").register(registry);
+ Gauge.builder(
+ "artipie.pool.list.queue", LIST,
+ pool -> ((ThreadPoolExecutor) pool).getQueue().size()
+ ).description("Queue size of LIST pool").register(registry);
+ }
+
+ /**
+ * Shutdown all storage executor pools and await termination.
+ * Should be called during application shutdown.
+ */
+ @SuppressWarnings("PMD.AvoidCatchingGenericException")
+ public static void shutdown() {
+ READ.shutdown();
+ WRITE.shutdown();
+ LIST.shutdown();
+ try {
+ if (!READ.awaitTermination(5, TimeUnit.SECONDS)) {
+ READ.shutdownNow();
+ }
+ if (!WRITE.awaitTermination(5, TimeUnit.SECONDS)) {
+ WRITE.shutdownNow();
+ }
+ if (!LIST.awaitTermination(5, TimeUnit.SECONDS)) {
+ LIST.shutdownNow();
+ }
+ } catch (final InterruptedException ex) {
+ Thread.currentThread().interrupt();
+ READ.shutdownNow();
+ WRITE.shutdownNow();
+ LIST.shutdownNow();
+ }
+ }
+
+ /**
+ * Create a named daemon thread factory.
+ * @param nameFormat Thread name format with %d placeholder
+ * @return Thread factory
+ */
+ private static ThreadFactory namedThreadFactory(final String nameFormat) {
+ return new ThreadFactory() {
+ private final AtomicInteger counter = new AtomicInteger(0);
+ @Override
+ public Thread newThread(final Runnable r) {
+ final Thread thread = new Thread(r);
+ thread.setName(
+ String.format(nameFormat, this.counter.getAndIncrement())
+ );
+ thread.setDaemon(true);
+ return thread;
+ }
+ };
+ }
+}
diff --git a/artipie-core/src/main/java/com/artipie/http/retry/RetrySlice.java b/artipie-core/src/main/java/com/artipie/http/retry/RetrySlice.java
new file mode 100644
index 000000000..1c9f041fc
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/http/retry/RetrySlice.java
@@ -0,0 +1,167 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.http.retry;
+
+import com.artipie.asto.Content;
+import com.artipie.http.Headers;
+import com.artipie.http.Response;
+import com.artipie.http.Slice;
+import com.artipie.http.rq.RequestLine;
+
+import java.time.Duration;
+import java.util.Objects;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.Executor;
+import java.util.concurrent.TimeUnit;
+import java.util.function.Function;
+
+/**
+ * Slice decorator that retries failed requests with exponential backoff.
+ *
Retries on: 5xx status codes, connection timeouts, exceptions.
+ * Does NOT retry on: 4xx client errors, successful responses.
+ *
+ * @since 1.20.13
+ */
+public final class RetrySlice implements Slice {
+
+ /**
+ * Default max retries.
+ */
+ public static final int DEFAULT_MAX_RETRIES = 2;
+
+ /**
+ * Default initial delay.
+ */
+ public static final Duration DEFAULT_INITIAL_DELAY = Duration.ofMillis(100);
+
+ /**
+ * Default backoff multiplier.
+ */
+ public static final double DEFAULT_BACKOFF_MULTIPLIER = 2.0;
+
+ /**
+ * Wrapped slice.
+ */
+ private final Slice origin;
+
+ /**
+ * Maximum number of retry attempts.
+ */
+ private final int maxRetries;
+
+ /**
+ * Initial delay before first retry.
+ */
+ private final Duration initialDelay;
+
+ /**
+ * Backoff multiplier for subsequent retries.
+ */
+ private final double backoffMultiplier;
+
+ /**
+ * Constructor with defaults.
+ * @param origin Slice to wrap
+ */
+ public RetrySlice(final Slice origin) {
+ this(origin, DEFAULT_MAX_RETRIES, DEFAULT_INITIAL_DELAY, DEFAULT_BACKOFF_MULTIPLIER);
+ }
+
+ /**
+ * Constructor with custom configuration.
+ * @param origin Slice to wrap
+ * @param maxRetries Maximum retry attempts
+ * @param initialDelay Initial delay before first retry
+ * @param backoffMultiplier Multiplier for exponential backoff
+ */
+ public RetrySlice(
+ final Slice origin,
+ final int maxRetries,
+ final Duration initialDelay,
+ final double backoffMultiplier
+ ) {
+ this.origin = Objects.requireNonNull(origin, "origin");
+ this.maxRetries = maxRetries;
+ this.initialDelay = Objects.requireNonNull(initialDelay, "initialDelay");
+ this.backoffMultiplier = backoffMultiplier;
+ }
+
+ @Override
+ public CompletableFuture response(
+ final RequestLine line,
+ final Headers headers,
+ final Content body
+ ) {
+ return this.attempt(line, headers, body, 0, this.initialDelay.toMillis());
+ }
+
+ /**
+ * Attempt a request, retrying on failure with exponential backoff.
+ * @param line Request line
+ * @param headers Request headers
+ * @param body Request body
+ * @param attempt Current attempt number (0-based)
+ * @param delayMs Current delay in milliseconds
+ * @return Response future
+ */
+ private CompletableFuture attempt(
+ final RequestLine line,
+ final Headers headers,
+ final Content body,
+ final int attempt,
+ final long delayMs
+ ) {
+ return this.origin.response(line, headers, body)
+ .>handle((response, error) -> {
+ if (error != null) {
+ if (attempt < this.maxRetries) {
+ return this.delayedAttempt(
+ line, headers, body, attempt + 1,
+ (long) (delayMs * this.backoffMultiplier)
+ );
+ }
+ return CompletableFuture.failedFuture(error);
+ }
+ if (shouldRetry(response) && attempt < this.maxRetries) {
+ return this.delayedAttempt(
+ line, headers, body, attempt + 1,
+ (long) (delayMs * this.backoffMultiplier)
+ );
+ }
+ return CompletableFuture.completedFuture(response);
+ })
+ .thenCompose(Function.identity());
+ }
+
+ /**
+ * Schedule a retry attempt after a delay with jitter.
+ * Jitter prevents thundering herd by adding random 0-50% to the delay.
+ */
+ private CompletableFuture delayedAttempt(
+ final RequestLine line,
+ final Headers headers,
+ final Content body,
+ final int attempt,
+ final long delayMs
+ ) {
+ // Add jitter: delay * (1.0 + random[0, 0.5)) to prevent thundering herd
+ final long jitteredDelay = (long) (delayMs
+ * (1.0 + java.util.concurrent.ThreadLocalRandom.current().nextDouble(0.5)));
+ final Executor delayed = CompletableFuture.delayedExecutor(
+ jitteredDelay, TimeUnit.MILLISECONDS
+ );
+ return CompletableFuture.supplyAsync(() -> null, delayed)
+ .thenCompose(ignored -> this.attempt(line, headers, body, attempt, delayMs));
+ }
+
+ /**
+ * Whether to retry based on response status.
+ * @param response HTTP response
+ * @return True if response indicates a retryable server error
+ */
+ private static boolean shouldRetry(final Response response) {
+ return response.status().code() >= 500;
+ }
+}
diff --git a/artipie-core/src/main/java/com/artipie/http/slice/CircuitBreakerSlice.java b/artipie-core/src/main/java/com/artipie/http/slice/CircuitBreakerSlice.java
index 50753cab1..f51ebe0c7 100644
--- a/artipie-core/src/main/java/com/artipie/http/slice/CircuitBreakerSlice.java
+++ b/artipie-core/src/main/java/com/artipie/http/slice/CircuitBreakerSlice.java
@@ -10,250 +10,63 @@
import com.artipie.http.ResponseBuilder;
import com.artipie.http.Slice;
import com.artipie.http.rq.RequestLine;
-import com.artipie.http.log.EcsLogger;
+import com.artipie.http.timeout.AutoBlockRegistry;
-import java.time.Duration;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
-import java.util.concurrent.atomic.AtomicReference;
/**
- * Circuit Breaker pattern for upstream repositories.
- * Prevents hammering failed upstream by failing fast after threshold.
- *
- *
States:
- *
- *
CLOSED: Normal operation, requests pass through
- *
OPEN: Too many failures, fail fast without calling upstream
- *
HALF_OPEN: Testing if upstream recovered, single request allowed
- *
- *
+ * Circuit breaker slice delegating to {@link AutoBlockRegistry}.
+ * Fails fast with 503 when the remote is auto-blocked.
+ * Records success/failure to the registry after each request.
+ *
* @since 1.0
*/
public final class CircuitBreakerSlice implements Slice {
- /**
- * Circuit breaker state.
- */
- enum State {
- /**
- * Normal operation.
- */
- CLOSED,
-
- /**
- * Failing fast.
- */
- OPEN,
-
- /**
- * Testing recovery.
- */
- HALF_OPEN
- }
-
- /**
- * Default failure threshold before opening circuit.
- */
- private static final int DEFAULT_FAILURE_THRESHOLD = 5;
-
- /**
- * Default timeout before trying again.
- */
- private static final Duration DEFAULT_TIMEOUT = Duration.ofMinutes(1);
-
- /**
- * Origin slice (upstream).
- */
private final Slice origin;
+ private final AutoBlockRegistry registry;
+ private final String remoteId;
/**
- * Current circuit state.
- */
- private final AtomicReference state;
-
- /**
- * Consecutive failure count.
- */
- private final AtomicInteger failureCount;
-
- /**
- * Timestamp of last failure.
- */
- private final AtomicLong lastFailureTime;
-
- /**
- * Failure threshold before opening circuit.
- */
- private final int failureThreshold;
-
- /**
- * Timeout before retrying (millis).
- */
- private final long timeoutMillis;
-
- /**
- * Constructor with defaults.
- * @param origin Origin slice
- */
- public CircuitBreakerSlice(final Slice origin) {
- this(origin, DEFAULT_FAILURE_THRESHOLD, DEFAULT_TIMEOUT);
- }
-
- /**
- * Constructor with custom settings.
- * @param origin Origin slice
- * @param failureThreshold Failures before opening circuit
- * @param timeout Timeout before retrying
+ * Constructor.
+ * @param origin Origin slice (upstream)
+ * @param registry Shared auto-block registry
+ * @param remoteId Unique identifier for this remote
*/
public CircuitBreakerSlice(
final Slice origin,
- final int failureThreshold,
- final Duration timeout
+ final AutoBlockRegistry registry,
+ final String remoteId
) {
this.origin = origin;
- this.state = new AtomicReference<>(State.CLOSED);
- this.failureCount = new AtomicInteger(0);
- this.lastFailureTime = new AtomicLong(0);
- this.failureThreshold = failureThreshold;
- this.timeoutMillis = timeout.toMillis();
+ this.registry = registry;
+ this.remoteId = remoteId;
}
@Override
public CompletableFuture response(
- final RequestLine line,
- final Headers headers,
- final Content body
+ final RequestLine line, final Headers headers, final Content body
) {
- final State currentState = this.state.get();
-
- // Check if circuit is open
- if (currentState == State.OPEN) {
- final long timeSinceFailure = System.currentTimeMillis() - this.lastFailureTime.get();
-
- if (timeSinceFailure > this.timeoutMillis) {
- // Timeout expired - try half-open
- EcsLogger.info("com.artipie.http")
- .message("Circuit breaker HALF_OPEN, testing upstream after " + timeSinceFailure + "ms since last failure")
- .eventCategory("circuit_breaker")
- .eventAction("state_change")
- .eventOutcome("success")
- .log();
- this.state.compareAndSet(State.OPEN, State.HALF_OPEN);
- } else {
- // Still open - fail fast
- EcsLogger.debug("com.artipie.http")
- .message("Circuit breaker OPEN, failing fast with " + this.failureCount.get() + " failures (" + timeSinceFailure + "ms since last failure)")
- .eventCategory("circuit_breaker")
- .eventAction("fail_fast")
- .eventOutcome("success")
- .log();
- return CompletableFuture.completedFuture(
- ResponseBuilder.serviceUnavailable(
- "Circuit breaker open - upstream unavailable"
- ).build()
- );
- }
+ if (this.registry.isBlocked(this.remoteId)) {
+ return CompletableFuture.completedFuture(
+ ResponseBuilder.serviceUnavailable(
+ "Auto-blocked - remote unavailable: " + this.remoteId
+ ).build()
+ );
}
-
- // Try request
return this.origin.response(line, headers, body)
.handle((resp, error) -> {
if (error != null) {
- // Request failed
- onFailure(error);
+ this.registry.recordFailure(this.remoteId);
throw new CompletionException(error);
}
-
- // Check response status
- final int statusCode = resp.status().code();
- if (statusCode >= 500 && statusCode < 600) {
- // Server error - count as failure
- onFailure(new IllegalStateException("HTTP " + statusCode));
- throw new CompletionException(
- new IllegalStateException("Upstream error: " + statusCode)
- );
+ if (resp.status().code() >= 500) {
+ this.registry.recordFailure(this.remoteId);
+ } else {
+ this.registry.recordSuccess(this.remoteId);
}
-
- // Success
- onSuccess();
return resp;
});
}
-
- /**
- * Handle successful request.
- */
- private void onSuccess() {
- final int failures = this.failureCount.getAndSet(0);
-
- final State currentState = this.state.get();
- if (currentState == State.HALF_OPEN) {
- // Recovery successful
- this.state.compareAndSet(State.HALF_OPEN, State.CLOSED);
- EcsLogger.info("com.artipie.http")
- .message("Circuit breaker CLOSED - upstream recovered after " + failures + " previous failures")
- .eventCategory("circuit_breaker")
- .eventAction("state_change")
- .eventOutcome("success")
- .log();
- } else if (failures > 0) {
- // Reset failure count
- EcsLogger.debug("com.artipie.http")
- .message("Circuit breaker reset failure count (" + failures + " previous failures)")
- .eventCategory("circuit_breaker")
- .eventAction("failure_reset")
- .eventOutcome("success")
- .log();
- }
- }
-
- /**
- * Handle failed request.
- * @param error Error that occurred
- */
- private void onFailure(final Throwable error) {
- final int failures = this.failureCount.incrementAndGet();
- this.lastFailureTime.set(System.currentTimeMillis());
-
- if (failures >= this.failureThreshold) {
- // Open circuit
- final boolean wasOpen = this.state.getAndSet(State.OPEN) == State.OPEN;
- if (!wasOpen) {
- EcsLogger.warn("com.artipie.http")
- .message("Circuit breaker OPENED after " + failures + " failures (threshold: " + this.failureThreshold + ")")
- .eventCategory("circuit_breaker")
- .eventAction("state_change")
- .eventOutcome("failure")
- .field("error.message", error.getMessage())
- .log();
- }
- } else {
- EcsLogger.debug("com.artipie.http")
- .message("Circuit breaker failure recorded (" + failures + "/" + this.failureThreshold + " failures)")
- .eventCategory("circuit_breaker")
- .eventAction("failure_record")
- .eventOutcome("failure")
- .field("error.message", error.getMessage())
- .log();
- }
- }
-
- /**
- * Get current circuit state (for testing/monitoring).
- * @return Current state
- */
- public State getState() {
- return this.state.get();
- }
-
- /**
- * Get current failure count (for testing/monitoring).
- * @return Failure count
- */
- public int getFailureCount() {
- return this.failureCount.get();
- }
}
diff --git a/artipie-core/src/main/java/com/artipie/http/slice/LoggingSlice.java b/artipie-core/src/main/java/com/artipie/http/slice/LoggingSlice.java
index 3598b61d4..7723c9313 100644
--- a/artipie-core/src/main/java/com/artipie/http/slice/LoggingSlice.java
+++ b/artipie-core/src/main/java/com/artipie/http/slice/LoggingSlice.java
@@ -58,10 +58,9 @@ public CompletableFuture response(
// Log request at DEBUG level (diagnostic only)
if (this.level.intValue() <= Level.FINE.intValue()) {
EcsLogger.debug("com.artipie.http")
- .message("HTTP request")
+ .message("HTTP request: " + msg.toString())
.eventCategory("http")
.eventAction("request")
- .field("http.request.body.content", msg.toString())
.log();
}
@@ -74,10 +73,9 @@ public CompletableFuture response(
// Log response at DEBUG level (diagnostic only)
if (LoggingSlice.this.level.intValue() <= Level.FINE.intValue()) {
EcsLogger.debug("com.artipie.http")
- .message("HTTP response")
+ .message("HTTP response: " + sb.toString())
.eventCategory("http")
.eventAction("response")
- .field("http.response.body.content", sb.toString())
.log();
}
diff --git a/artipie-core/src/main/java/com/artipie/http/slice/TrimPathSlice.java b/artipie-core/src/main/java/com/artipie/http/slice/TrimPathSlice.java
index 2faf70344..30a4f1ff3 100644
--- a/artipie-core/src/main/java/com/artipie/http/slice/TrimPathSlice.java
+++ b/artipie-core/src/main/java/com/artipie/http/slice/TrimPathSlice.java
@@ -138,9 +138,10 @@ private static String asPath(final String result) {
if (result == null || result.isEmpty()) {
return "/";
}
- if (result.charAt(0) != '/') {
- return '/' + result;
+ String path = result;
+ if (path.charAt(0) != '/') {
+ path = '/' + path;
}
- return result;
+ return path.replaceAll("/+", "/");
}
}
diff --git a/artipie-core/src/main/java/com/artipie/http/timeout/AutoBlockRegistry.java b/artipie-core/src/main/java/com/artipie/http/timeout/AutoBlockRegistry.java
new file mode 100644
index 000000000..ee1ddac30
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/http/timeout/AutoBlockRegistry.java
@@ -0,0 +1,110 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.http.timeout;
+
+import java.time.Instant;
+import java.util.Locale;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+
+/**
+ * Thread-safe registry tracking auto-block state for remote endpoints.
+ * Uses Fibonacci backoff for increasing block durations.
+ * Industry-standard approach used by Nexus and Artifactory.
+ *
+ * @since 1.20.13
+ */
+public final class AutoBlockRegistry {
+
+ /**
+ * Fibonacci multiplier sequence.
+ */
+ private static final long[] FIBONACCI = {1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89};
+
+ private final AutoBlockSettings settings;
+ private final ConcurrentMap states;
+
+ public AutoBlockRegistry(final AutoBlockSettings settings) {
+ this.settings = settings;
+ this.states = new ConcurrentHashMap<>();
+ }
+
+ /**
+ * Check if a remote is currently blocked.
+ * If the block has expired, transitions to PROBING state and returns false.
+ */
+ public boolean isBlocked(final String remoteId) {
+ final BlockState state = this.states.getOrDefault(
+ remoteId, BlockState.online()
+ );
+ if (state.status() == BlockState.Status.BLOCKED) {
+ if (Instant.now().isAfter(state.blockedUntil())) {
+ this.states.put(
+ remoteId,
+ new BlockState(
+ state.failureCount(), state.fibonacciIndex(),
+ state.blockedUntil(), BlockState.Status.PROBING
+ )
+ );
+ return false;
+ }
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Get the current status of a remote: "online", "blocked", or "probing".
+ */
+ public String status(final String remoteId) {
+ final BlockState state = this.states.getOrDefault(
+ remoteId, BlockState.online()
+ );
+ if (state.status() == BlockState.Status.BLOCKED
+ && Instant.now().isAfter(state.blockedUntil())) {
+ return "probing";
+ }
+ return state.status().name().toLowerCase(Locale.ROOT);
+ }
+
+ /**
+ * Record a failure for a remote. If the failure threshold is reached,
+ * blocks the remote with Fibonacci-increasing duration.
+ */
+ public void recordFailure(final String remoteId) {
+ this.states.compute(remoteId, (key, current) -> {
+ final BlockState state =
+ current != null ? current : BlockState.online();
+ final int failures = state.failureCount() + 1;
+ if (failures >= this.settings.failureThreshold()) {
+ final int fibIdx = state.status() == BlockState.Status.ONLINE
+ ? 0
+ : Math.min(
+ state.fibonacciIndex() + 1, FIBONACCI.length - 1
+ );
+ final long blockMs = Math.min(
+ this.settings.initialBlockDuration().toMillis()
+ * FIBONACCI[fibIdx],
+ this.settings.maxBlockDuration().toMillis()
+ );
+ return new BlockState(
+ failures, fibIdx, Instant.now().plusMillis(blockMs),
+ BlockState.Status.BLOCKED
+ );
+ }
+ return new BlockState(
+ failures, state.fibonacciIndex(),
+ state.blockedUntil(), state.status()
+ );
+ });
+ }
+
+ /**
+ * Record a success for a remote. Resets to ONLINE state.
+ */
+ public void recordSuccess(final String remoteId) {
+ this.states.put(remoteId, BlockState.online());
+ }
+}
diff --git a/artipie-core/src/main/java/com/artipie/http/timeout/AutoBlockSettings.java b/artipie-core/src/main/java/com/artipie/http/timeout/AutoBlockSettings.java
new file mode 100644
index 000000000..79766ecec
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/http/timeout/AutoBlockSettings.java
@@ -0,0 +1,23 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.http.timeout;
+
+import java.time.Duration;
+
+/**
+ * Configuration for auto-block behavior. All values configurable via YAML.
+ *
+ * @since 1.20.13
+ */
+public record AutoBlockSettings(
+ int failureThreshold,
+ Duration initialBlockDuration,
+ Duration maxBlockDuration
+) {
+
+ public static AutoBlockSettings defaults() {
+ return new AutoBlockSettings(3, Duration.ofSeconds(40), Duration.ofMinutes(5));
+ }
+}
diff --git a/artipie-core/src/main/java/com/artipie/http/timeout/BlockState.java b/artipie-core/src/main/java/com/artipie/http/timeout/BlockState.java
new file mode 100644
index 000000000..039b35491
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/http/timeout/BlockState.java
@@ -0,0 +1,21 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.http.timeout;
+
+import java.time.Instant;
+
+/**
+ * Immutable block state for a remote endpoint.
+ *
+ * @since 1.20.13
+ */
+record BlockState(int failureCount, int fibonacciIndex, Instant blockedUntil, Status status) {
+
+ enum Status { ONLINE, BLOCKED, PROBING }
+
+ static BlockState online() {
+ return new BlockState(0, 0, Instant.MIN, Status.ONLINE);
+ }
+}
diff --git a/artipie-core/src/main/java/com/artipie/http/timeout/TimeoutSettings.java b/artipie-core/src/main/java/com/artipie/http/timeout/TimeoutSettings.java
new file mode 100644
index 000000000..aa1cac6bb
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/http/timeout/TimeoutSettings.java
@@ -0,0 +1,93 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.http.timeout;
+
+import java.time.Duration;
+import java.util.Objects;
+
+/**
+ * Immutable timeout configuration with hierarchical override support.
+ * Resolution order: per-remote > per-repo > global > defaults.
+ *
+ * @since 1.20.13
+ */
+public final class TimeoutSettings {
+
+ public static final Duration DEFAULT_CONNECTION_TIMEOUT = Duration.ofSeconds(5);
+ public static final Duration DEFAULT_IDLE_TIMEOUT = Duration.ofSeconds(30);
+ public static final Duration DEFAULT_REQUEST_TIMEOUT = Duration.ofSeconds(120);
+
+ private final Duration connectionTimeout;
+ private final Duration idleTimeout;
+ private final Duration requestTimeout;
+
+ public TimeoutSettings(
+ final Duration connectionTimeout,
+ final Duration idleTimeout,
+ final Duration requestTimeout
+ ) {
+ this.connectionTimeout = Objects.requireNonNull(connectionTimeout);
+ this.idleTimeout = Objects.requireNonNull(idleTimeout);
+ this.requestTimeout = Objects.requireNonNull(requestTimeout);
+ }
+
+ public static TimeoutSettings defaults() {
+ return new TimeoutSettings(
+ DEFAULT_CONNECTION_TIMEOUT, DEFAULT_IDLE_TIMEOUT, DEFAULT_REQUEST_TIMEOUT
+ );
+ }
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ public Duration connectionTimeout() {
+ return this.connectionTimeout;
+ }
+
+ public Duration idleTimeout() {
+ return this.idleTimeout;
+ }
+
+ public Duration requestTimeout() {
+ return this.requestTimeout;
+ }
+
+ public static final class Builder {
+ private Duration connectionTimeout;
+ private Duration idleTimeout;
+ private Duration requestTimeout;
+
+ public Builder connectionTimeout(final Duration val) {
+ this.connectionTimeout = val;
+ return this;
+ }
+
+ public Builder idleTimeout(final Duration val) {
+ this.idleTimeout = val;
+ return this;
+ }
+
+ public Builder requestTimeout(final Duration val) {
+ this.requestTimeout = val;
+ return this;
+ }
+
+ public TimeoutSettings buildWithParent(final TimeoutSettings parent) {
+ return new TimeoutSettings(
+ this.connectionTimeout != null
+ ? this.connectionTimeout : parent.connectionTimeout(),
+ this.idleTimeout != null
+ ? this.idleTimeout : parent.idleTimeout(),
+ this.requestTimeout != null
+ ? this.requestTimeout : parent.requestTimeout()
+ );
+ }
+
+ public TimeoutSettings build() {
+ return buildWithParent(TimeoutSettings.defaults());
+ }
+ }
+}
diff --git a/artipie-core/src/main/java/com/artipie/index/ArtifactDocument.java b/artipie-core/src/main/java/com/artipie/index/ArtifactDocument.java
new file mode 100644
index 000000000..0fc4a659f
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/index/ArtifactDocument.java
@@ -0,0 +1,42 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.index;
+
+import java.time.Instant;
+import java.util.Objects;
+
+/**
+ * Artifact document for the search index.
+ *
+ * @param repoType Repository type (e.g., "maven", "npm", "pypi")
+ * @param repoName Repository name
+ * @param artifactPath Full artifact path (unique per repo)
+ * @param artifactName Human-readable artifact name (tokenized for search)
+ * @param version Artifact version
+ * @param size Artifact size in bytes
+ * @param createdAt Creation timestamp
+ * @param owner Owner/uploader username (nullable)
+ * @since 1.20.13
+ */
+public record ArtifactDocument(
+ String repoType,
+ String repoName,
+ String artifactPath,
+ String artifactName,
+ String version,
+ long size,
+ Instant createdAt,
+ String owner
+) {
+
+ /**
+ * Ctor.
+ */
+ public ArtifactDocument {
+ Objects.requireNonNull(repoType, "repoType");
+ Objects.requireNonNull(repoName, "repoName");
+ Objects.requireNonNull(artifactPath, "artifactPath");
+ }
+}
diff --git a/artipie-core/src/main/java/com/artipie/index/ArtifactIndex.java b/artipie-core/src/main/java/com/artipie/index/ArtifactIndex.java
new file mode 100644
index 000000000..70454190b
--- /dev/null
+++ b/artipie-core/src/main/java/com/artipie/index/ArtifactIndex.java
@@ -0,0 +1,139 @@
+/*
+ * The MIT License (MIT) Copyright (c) 2020-2023 artipie.com
+ * https://github.com/artipie/artipie/blob/master/LICENSE.txt
+ */
+package com.artipie.index;
+
+import java.io.Closeable;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+
+/**
+ * Artifact search index interface.
+ * Supports full-text search, exact path lookup, and artifact-to-repo location.
+ *
+ * @since 1.20.13
+ */
+public interface ArtifactIndex extends Closeable {
+
+ /**
+ * Index (upsert) an artifact document.
+ * If a document with the same repoName+artifactPath exists, it is replaced.
+ *
+ * @param doc Artifact document to index
+ * @return Future completing when indexed
+ */
+ CompletableFuture index(ArtifactDocument doc);
+
+ /**
+ * Remove an artifact from the index.
+ *
+ * @param repoName Repository name
+ * @param artifactPath Artifact path
+ * @return Future completing when removed
+ */
+ CompletableFuture remove(String repoName, String artifactPath);
+
+ /**
+ * Full-text search across all indexed artifacts.
+ *
+ * @param query Search query string
+ * @param maxResults Maximum results to return
+ * @param offset Starting offset for pagination
+ * @return Search result with matching documents
+ */
+ CompletableFuture search(String query, int maxResults, int offset);
+
+ /**
+ * Locate which repositories contain a given artifact path.
+ * Uses path_prefix matching — slower, used as fallback.
+ *
+ * @param artifactPath Artifact path to locate
+ * @return List of repository names containing this artifact
+ */
+ CompletableFuture> locate(String artifactPath);
+
+ /**
+ * Locate which repositories contain an artifact by its indexed name.
+ * Uses the {@code name} column with B-tree index — O(log n), fast.
+ * This is the primary operation for group lookup when the adapter type
+ * is known and the name can be parsed from the URL.
+ *
+ * @param artifactName Artifact name as stored in the DB (adapter-specific format)
+ * @return List of repository names containing this artifact
+ */
+ default CompletableFuture> locateByName(final String artifactName) {
+ return locate(artifactName);
+ }
+
+ /**
+ * Whether the index has completed its initial warmup scan.
+ * @return true if warmup is complete and the index can be trusted
+ */
+ default boolean isWarmedUp() {
+ return false;
+ }
+
+ /**
+ * Mark the index as warmed up after initial scan completes.
+ */
+ default void setWarmedUp() {
+ // no-op by default
+ }
+
+ /**
+ * Get index statistics.
+ * @return map of stat name to value
+ */
+ default CompletableFuture> getStats() {
+ return CompletableFuture.completedFuture(Map.of());
+ }
+
+ /**
+ * Index a batch of documents efficiently (single commit).
+ * Default implementation falls back to individual index() calls.
+ *
+ * @param docs Collection of documents to index
+ * @return Future completing when batch is indexed
+ */
+ default CompletableFuture indexBatch(final java.util.Collection docs) {
+ CompletableFuture result = CompletableFuture.completedFuture(null);
+ for (final ArtifactDocument doc : docs) {
+ result = result.thenCompose(v -> index(doc));
+ }
+ return result;
+ }
+
+ /**
+ * No-op implementation that performs no indexing or searching.
+ */
+ ArtifactIndex NOP = new ArtifactIndex() {
+ @Override
+ public CompletableFuture index(final ArtifactDocument doc) {
+ return CompletableFuture.completedFuture(null);
+ }
+
+ @Override
+ public CompletableFuture