Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions packages/cli/src/commands/init.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import {
} from "@codemcp/knowledge-core";
import {
GitRepoLoader,
ZipLoader,
WebSourceType,
} from "@codemcp/knowledge-content-loader";

Expand Down Expand Up @@ -262,6 +263,61 @@ export const initCommand = new Command("init")
docset_id: docsetId,
};

await fs.writeFile(
path.join(localPath, `.agentic-source-${index}.json`),
JSON.stringify(metadata, null, 2),
);
} else if (source.type === "zip") {
// Handle zip file initialization
const loader = new ZipLoader();
const sourceUrl = source.url || source.path || "";

console.log(chalk.gray(` Using ZipLoader for zip extraction`));

const webSourceConfig = {
url: sourceUrl,
type: WebSourceType.ZIP,
options: {
paths: source.paths || [],
},
};

// Validate configuration
const validation = loader.validateConfig(webSourceConfig);
if (validation !== true) {
throw new Error(
`Invalid zip source configuration: ${validation}`,
);
}

// Load content using ZipLoader
const result = await loader.load(webSourceConfig, localPath);

if (!result.success) {
throw new Error(`Zip loading failed: ${result.error}`);
}

// Collect discovered paths for config update
allDiscoveredPaths.push(...result.files);

totalFiles += result.files.length;
console.log(
chalk.green(
` ✅ Extracted ${result.files.length} files from zip`,
),
);

// Create source metadata
const metadata = {
source_url: sourceUrl,
source_type: source.type,
downloaded_at: new Date().toISOString(),
files_count: result.files.length,
files: result.files,
docset_id: docsetId,
content_hash: result.contentHash,
};

await fs.writeFile(
path.join(localPath, `.agentic-source-${index}.json`),
JSON.stringify(metadata, null, 2),
Expand Down
104 changes: 104 additions & 0 deletions packages/cli/src/commands/refresh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import {
calculateLocalPath,
ensureKnowledgeGitignoreSync,
} from "@codemcp/knowledge-core";
import { ZipLoader, WebSourceType } from "@codemcp/knowledge-content-loader";

interface DocsetMetadata {
docset_id: string;
Expand Down Expand Up @@ -168,6 +169,16 @@ async function refreshDocset(
);
totalFiles += sourceFiles.files_count;
refreshedSources.push(sourceFiles);
} else if (source.type === "zip") {
const sourceFiles = await refreshZipSource(
source,
localPath,
index,
docset.id,
force,
);
totalFiles += sourceFiles.files_count;
refreshedSources.push(sourceFiles);
} else {
console.log(
chalk.yellow(
Expand Down Expand Up @@ -361,6 +372,99 @@ async function refreshGitSource(
}
}

async function refreshZipSource(
source: any,
localPath: string,
index: number,
docsetId: string,
force: boolean,
): Promise<SourceMetadata> {
const sourceMetadataPath = path.join(
localPath,
`.agentic-source-${index}.json`,
);
let existingSourceMetadata: SourceMetadata | null = null;

try {
const content = await fs.readFile(sourceMetadataPath, "utf8");
existingSourceMetadata = JSON.parse(content);
} catch {
// No existing metadata, will do full refresh
}

const sourceUrl = source.url || source.path || "";
const loader = new ZipLoader();
const webSourceConfig = {
url: sourceUrl,
type: WebSourceType.ZIP,
options: {
paths: source.paths || [],
},
};

// Check if content has changed
if (!force && existingSourceMetadata) {
try {
const currentId = await loader.getContentId(webSourceConfig);
const lastHash = (existingSourceMetadata as any).content_hash;
if (lastHash === currentId) {
const updatedMetadata: SourceMetadata = {
...existingSourceMetadata,
downloaded_at: new Date().toISOString(),
};
await fs.writeFile(
sourceMetadataPath,
JSON.stringify(updatedMetadata, null, 2),
);
return updatedMetadata;
}
} catch {
// Could not check, proceed with full refresh
}
}

// Remove old files from this source (if we have metadata)
if (existingSourceMetadata) {
for (const file of existingSourceMetadata.files) {
const filePath = path.join(localPath, file);
try {
await fs.unlink(filePath);
} catch {
// File might already be deleted, ignore
}
}
}

// Load content
const result = await loader.load(webSourceConfig, localPath);

if (!result.success) {
throw new Error(`Zip refresh failed: ${result.error}`);
}

const metadata: SourceMetadata = {
source_url: sourceUrl,
source_type: "zip",
downloaded_at: new Date().toISOString(),
files_count: result.files.length,
files: result.files,
docset_id: docsetId,
};

// Store content hash for future change detection
const metadataWithHash = {
...metadata,
content_hash: result.contentHash,
};

await fs.writeFile(
sourceMetadataPath,
JSON.stringify(metadataWithHash, null, 2),
);

return metadata;
}

// Reuse utility functions from init.ts
async function findMarkdownFiles(dir: string): Promise<string[]> {
const files: string[] = [];
Expand Down
2 changes: 2 additions & 0 deletions packages/content-loader/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@
"typecheck": "tsc --noEmit"
},
"dependencies": {
"adm-zip": "0.5.16",
"simple-git": "^3.22.0"
},
"devDependencies": {
"@eslint/js": "^9.34.0",
"@types/adm-zip": "0.5.7",
"@types/node": "^24.3.0",
"eslint": "^9.34.0",
"rimraf": "^6.0.1",
Expand Down
17 changes: 11 additions & 6 deletions packages/content-loader/src/__tests__/git-repo-loader.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ import { join } from "path";
import { tmpdir } from "os";
import { GitRepoLoader } from "../content/git-repo-loader.js";
import { WebSourceType } from "../types.js";
import {
isDocumentationFile,
filterDocumentationFiles,
} from "../content/file-filter.js";

describe("Git Repository Loading - User Workflows", () => {
let tempDir: string;
Expand Down Expand Up @@ -191,7 +195,7 @@ describe("Git Repository Loading - User Workflows", () => {
];

// Test the filtering method directly
const filtered = (loader as any).filterDocumentationFiles(mockFiles);
const filtered = filterDocumentationFiles(mockFiles);

expect(filtered).toEqual([
"README.md",
Expand Down Expand Up @@ -274,7 +278,7 @@ describe("Git Repository Loading - User Workflows", () => {

// Test each case
for (const testCase of testCases) {
const result = (loader as any).isDocumentationFile(testCase.file);
const result = isDocumentationFile(testCase.file);
expect(result).toBe(testCase.expected);
}
});
Expand All @@ -301,7 +305,7 @@ describe("Git Repository Loading - User Workflows", () => {
"examples/demo.js", // Should be included
];

const filtered = (loader as any).filterDocumentationFiles(testFiles);
const filtered = filterDocumentationFiles(testFiles);

// Verify smart filtering is working correctly
expect(filtered).toContain("README.md");
Expand Down Expand Up @@ -346,7 +350,7 @@ describe("Git Repository Loading - User Workflows", () => {
];

// Test direct filtering method
const filtered = (loader as any).filterDocumentationFiles(mockFiles);
const filtered = filterDocumentationFiles(mockFiles);

expect(filtered).toEqual([
"README.md",
Expand Down Expand Up @@ -423,8 +427,9 @@ describe("Git Repository Loading - User Workflows", () => {
// but we can test it indirectly through the architecture
expect(typeof (loader as any).scanAllFiles).toBe("function");
expect(typeof (loader as any).extractDocumentationFiles).toBe("function");
expect(typeof (loader as any).filterDocumentationFiles).toBe("function");
expect(typeof (loader as any).isDocumentationFile).toBe("function");
// filterDocumentationFiles and isDocumentationFile are shared utilities in file-filter.ts
expect(typeof filterDocumentationFiles).toBe("function");
expect(typeof isDocumentationFile).toBe("function");

// These methods form the centralized architecture for content filtering
});
Expand Down
81 changes: 32 additions & 49 deletions packages/content-loader/src/__tests__/smart-filtering.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,74 +2,57 @@
* Smart Filtering Tests - Test the key filtering behaviors for REQ-18
*/

import { describe, test, expect, beforeEach } from "vitest";
import { GitRepoLoader } from "../content/git-repo-loader.js";
import { describe, test, expect } from "vitest";
import {
isDocumentationFile,
filterDocumentationFiles,
} from "../content/file-filter.js";

describe("Smart Content Filtering - REQ-18", () => {
let loader: GitRepoLoader;

beforeEach(() => {
loader = new GitRepoLoader();
});

test("should include markdown files anywhere in repository", () => {
expect((loader as any).isDocumentationFile("README.md")).toBe(true);
expect((loader as any).isDocumentationFile("docs/guide.md")).toBe(true);
expect((loader as any).isDocumentationFile("deep/nested/api.mdx")).toBe(
true,
);
expect((loader as any).isDocumentationFile("tutorial.rst")).toBe(true);
expect((loader as any).isDocumentationFile("notes.txt")).toBe(true);
expect(isDocumentationFile("README.md")).toBe(true);
expect(isDocumentationFile("docs/guide.md")).toBe(true);
expect(isDocumentationFile("deep/nested/api.mdx")).toBe(true);
expect(isDocumentationFile("tutorial.rst")).toBe(true);
expect(isDocumentationFile("notes.txt")).toBe(true);
});

test("should exclude .github directory files even if they are markdown", () => {
expect(
(loader as any).isDocumentationFile(".github/issue_template.md"),
).toBe(false);
expect(
(loader as any).isDocumentationFile(".github/pull_request_template.md"),
).toBe(false);
expect(
(loader as any).isDocumentationFile(".github/workflows/ci.yml"),
).toBe(false);
expect(isDocumentationFile(".github/issue_template.md")).toBe(false);
expect(isDocumentationFile(".github/pull_request_template.md")).toBe(false);
expect(isDocumentationFile(".github/workflows/ci.yml")).toBe(false);
});

test("should exclude project metadata files", () => {
expect((loader as any).isDocumentationFile("CHANGELOG.md")).toBe(false);
expect((loader as any).isDocumentationFile("LICENSE.md")).toBe(false);
expect((loader as any).isDocumentationFile("CONTRIBUTING.md")).toBe(false);
expect((loader as any).isDocumentationFile("CODE_OF_CONDUCT.md")).toBe(
false,
);
expect(isDocumentationFile("CHANGELOG.md")).toBe(false);
expect(isDocumentationFile("LICENSE.md")).toBe(false);
expect(isDocumentationFile("CONTRIBUTING.md")).toBe(false);
expect(isDocumentationFile("CODE_OF_CONDUCT.md")).toBe(false);
});

test("should exclude config and source files", () => {
// Config files should be excluded
expect((loader as any).isDocumentationFile("package.json")).toBe(false);
expect((loader as any).isDocumentationFile(".postcssrc.json")).toBe(false);
expect((loader as any).isDocumentationFile("config.ts")).toBe(false);
expect((loader as any).isDocumentationFile("styles.css")).toBe(false);
expect(isDocumentationFile("package.json")).toBe(false);
expect(isDocumentationFile(".postcssrc.json")).toBe(false);
expect(isDocumentationFile("config.ts")).toBe(false);
expect(isDocumentationFile("styles.css")).toBe(false);

// Source files should be excluded
expect((loader as any).isDocumentationFile("index.ts")).toBe(false);
expect((loader as any).isDocumentationFile("src/index.ts")).toBe(false);
expect((loader as any).isDocumentationFile("src/utils.ts")).toBe(false);
expect((loader as any).isDocumentationFile("src/helpers.ts")).toBe(false);
expect(isDocumentationFile("index.ts")).toBe(false);
expect(isDocumentationFile("src/index.ts")).toBe(false);
expect(isDocumentationFile("src/utils.ts")).toBe(false);
expect(isDocumentationFile("src/helpers.ts")).toBe(false);
});

test("should include files in examples directory", () => {
expect((loader as any).isDocumentationFile("examples/config.json")).toBe(
true,
);
expect((loader as any).isDocumentationFile("examples/demo.js")).toBe(true);
expect((loader as any).isDocumentationFile("examples/style.css")).toBe(
true,
);
expect(isDocumentationFile("examples/config.json")).toBe(true);
expect(isDocumentationFile("examples/demo.js")).toBe(true);
expect(isDocumentationFile("examples/style.css")).toBe(true);
});

test("should exclude binary files even in examples", () => {
expect((loader as any).isDocumentationFile("examples/app.exe")).toBe(false);
expect((loader as any).isDocumentationFile("examples/lib.so")).toBe(false);
expect(isDocumentationFile("examples/app.exe")).toBe(false);
expect(isDocumentationFile("examples/lib.so")).toBe(false);
});

test("should filter mixed file list correctly", () => {
Expand All @@ -82,7 +65,7 @@ describe("Smart Content Filtering - REQ-18", () => {
"examples/demo.js", // Include
];

const filtered = (loader as any).filterDocumentationFiles(mixedFiles);
const filtered = filterDocumentationFiles(mixedFiles);

expect(filtered).toEqual(["README.md", "docs/api.md", "examples/demo.js"]);
});
Expand Down Expand Up @@ -127,7 +110,7 @@ describe("Smart Content Filtering - REQ-18", () => {
".github/workflows/ci.yml",
];

const filtered = (loader as any).filterDocumentationFiles(repositoryFiles);
const filtered = filterDocumentationFiles(repositoryFiles);

// Expected: Documentation files + all files from examples/samples
const expectedIncludes = [
Expand Down
Loading
Loading