diff --git a/components/ui/Autocomplete.tsx b/components/ui/Autocomplete.tsx
index 0aa20feb0..d048009b9 100644
--- a/components/ui/Autocomplete.tsx
+++ b/components/ui/Autocomplete.tsx
@@ -42,7 +42,11 @@ const InKeepTrigger = dynamic(
},
) as typeof InkeepModalSearchAndChat;
-import { DocsSearchItem, EndpointSearchItem } from "@/types";
+import {
+ DocsSearchItem,
+ EndpointSearchItem,
+ EnhancedDocsSearchItem,
+} from "@/types";
import { Button } from "@telegraph/button";
import { Icon } from "@telegraph/icon";
import { MenuItem } from "@telegraph/menu";
@@ -72,7 +76,9 @@ const highlightingStyles = {
const NUM_DOCS_HITS = 12;
const NUM_ENDPOINT_HITS = 5;
-type ResultItem = (DocsSearchItem & BaseItem) | (EndpointSearchItem & BaseItem);
+type ResultItem =
+ | (EnhancedDocsSearchItem & BaseItem)
+ | (EndpointSearchItem & BaseItem);
const algoliaAppId = process.env.NEXT_PUBLIC_ALGOLIA_APP_ID || "";
const algoliaSearchApiKey =
@@ -178,7 +184,7 @@ const DocsSearchResult = ({
)}
- {item.section}
+ {item.pageTitle ? `${item.pageTitle as string} ā¢` : ""} {item.section}
@@ -757,7 +763,7 @@ const Autocomplete = () => {
/>
) : (
autocomplete.setQuery("")}
/>
)}
diff --git a/package.json b/package.json
index 49cc8c449..a2a76e8ab 100644
--- a/package.json
+++ b/package.json
@@ -16,9 +16,10 @@
"generate-llms": "yarn run open-api-to-md && tsx scripts/generateApiMarkdown.ts && tsx scripts/generateLlmsTxt.ts",
"generate-reference-md": "tsx scripts/generateApiMarkdown.ts",
"index-apis": "tsx scripts/indexApisForSearch.ts",
+ "index-docs": "tsx scripts/indexDocsForSearch.ts",
"open-api-to-md": "bash scripts/openApiToMd.sh",
"predev": "yarn generate-llms",
- "prebuild": "yarn generate-llms && yarn index-apis"
+ "prebuild": "yarn generate-llms && yarn index-docs && yarn index-apis"
},
"dependencies": {
"@algolia/autocomplete-js": "^1.6.3",
diff --git a/scripts/indexDocsForSearch.ts b/scripts/indexDocsForSearch.ts
new file mode 100644
index 000000000..d9a073097
--- /dev/null
+++ b/scripts/indexDocsForSearch.ts
@@ -0,0 +1,407 @@
+import fs from "fs";
+import path from "path";
+import { unified } from "unified";
+import remarkParse from "remark-parse";
+import remarkFrontmatter from "remark-frontmatter";
+import yaml from "yaml";
+import algoliasearch from "algoliasearch";
+import { loadEnvConfig } from "@next/env";
+import type { EnhancedDocsSearchItem } from "@/types";
+
+// Load Next.js environment variables
+const projectDir = process.cwd();
+loadEnvConfig(projectDir);
+
+const algoliaAppId = process.env.NEXT_PUBLIC_ALGOLIA_APP_ID ?? "";
+const algoliaAdminApiKey = process.env.ALGOLIA_ADMIN_API_KEY ?? "";
+const algoliaPagesIndexName = process.env.NEXT_PUBLIC_ALGOLIA_INDEX_NAME ?? "";
+
+const CONTENT_DIR = path.join(projectDir, "content");
+const DOCS_FILE_EXTENSIONS = [".mdx", ".md"];
+
+// Maximum content length per record (in characters)
+// Algolia recommends keeping records small for better performance
+const MAX_CONTENT_LENGTH = 2000;
+
+// Keep count of indexed items
+let pageCount = 0;
+let headingCount = 0;
+
+interface Heading {
+ level: number;
+ title: string;
+ slug: string;
+ content: string;
+}
+
+interface Frontmatter {
+ title: string;
+ description?: string;
+ tags?: string[];
+ section: string;
+}
+
+/**
+ * Recursively get all files in a directory with specific extensions
+ */
+function getAllFilesInDir(
+ directory: string,
+ files: string[] = [],
+ extensions?: string[],
+): string[] {
+ fs.readdirSync(directory).forEach((file) => {
+ const subpath = path.join(directory, file);
+ if (fs.lstatSync(subpath).isDirectory()) {
+ getAllFilesInDir(subpath, files, extensions);
+ } else {
+ if (!extensions || extensions.includes(path.extname(subpath))) {
+ files.push(subpath);
+ }
+ }
+ });
+
+ return files;
+}
+
+/**
+ * Parse frontmatter from markdown content using remark
+ */
+async function parseFrontmatter(
+ markdownContent: string,
+): Promise {
+ const file = await unified()
+ .use(remarkParse)
+ .use(remarkFrontmatter, ["yaml"])
+ .parse(markdownContent);
+
+ const yamlNode = file.children.find(
+ (node): node is { type: "yaml"; value: string } => node.type === "yaml",
+ );
+ if (!yamlNode) return null;
+ return yaml.parse(yamlNode.value);
+}
+
+/**
+ * Create a URL-friendly slug from a heading title
+ */
+function slugify(text: string): string {
+ return text
+ .toLowerCase()
+ .replace(/[^\w\s-]/g, "") // Remove non-word characters except spaces and hyphens
+ .replace(/\s+/g, "-") // Replace spaces with hyphens
+ .replace(/-+/g, "-") // Replace multiple hyphens with single
+ .trim();
+}
+
+/**
+ * Remove frontmatter from markdown content
+ */
+function removeFrontmatter(content: string): string {
+ // Match YAML frontmatter at the start of the file
+ const frontmatterRegex = /^---[\s\S]*?---\n*/;
+ return content.replace(frontmatterRegex, "");
+}
+
+/**
+ * Extract plain text from markdown content
+ * Removes JSX components, imports, code blocks, and other non-text elements
+ */
+function extractTextContent(mdxContent: string): string {
+ let content = mdxContent;
+
+ // Remove import statements
+ content = content.replace(/^import\s+.*$/gm, "");
+
+ // Remove export statements
+ content = content.replace(/^export\s+.*$/gm, "");
+
+ // Remove code blocks (fenced)
+ content = content.replace(/```[\s\S]*?```/g, "");
+
+ // Remove inline code
+ content = content.replace(/`[^`]+`/g, "");
+
+ // Remove JSX components (self-closing and with children)
+ content = content.replace(/<[A-Z][^>]*\/>/g, ""); // Self-closing like
+ content = content.replace(/<[A-Z][^>]*>[\s\S]*?<\/[A-Z][^>]*>/g, ""); // With children
+
+ // Remove HTML-style components
+ content = content.replace(/<[a-z][^>]*>[\s\S]*?<\/[a-z][^>]*>/g, "");
+
+ // Remove remaining HTML/JSX tags
+ content = content.replace(/<[^>]+>/g, "");
+
+ // Remove markdown links but keep the text
+ content = content.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1");
+
+ // Remove markdown images
+ content = content.replace(/!\[([^\]]*)\]\([^)]+\)/g, "");
+
+ // Remove markdown emphasis markers
+ content = content.replace(/\*\*([^*]+)\*\*/g, "$1"); // Bold
+ content = content.replace(/\*([^*]+)\*/g, "$1"); // Italic
+ content = content.replace(/__([^_]+)__/g, "$1"); // Bold
+ content = content.replace(/_([^_]+)_/g, "$1"); // Italic
+
+ // Remove heading markers
+ content = content.replace(/^#{1,6}\s+/gm, "");
+
+ // Remove horizontal rules
+ content = content.replace(/^[-*_]{3,}$/gm, "");
+
+ // Remove list markers
+ content = content.replace(/^\s*[-*+]\s+/gm, "");
+ content = content.replace(/^\s*\d+\.\s+/gm, "");
+
+ // Remove blockquote markers
+ content = content.replace(/^\s*>\s*/gm, "");
+
+ // Normalize whitespace
+ content = content.replace(/\n{3,}/g, "\n\n"); // Multiple newlines to double
+ content = content.replace(/[ \t]+/g, " "); // Multiple spaces to single
+
+ return content.trim();
+}
+
+/**
+ * Extract headings with their content from markdown
+ */
+function extractHeadings(mdxContent: string): Heading[] {
+ // Remove frontmatter first
+ const contentWithoutFrontmatter = removeFrontmatter(mdxContent);
+
+ const headingRegex = /^(#{2,3})\s+(.+)$/gm;
+ const headings: Heading[] = [];
+ let match;
+
+ const matches: Array<{ index: number; level: number; title: string }> = [];
+
+ while ((match = headingRegex.exec(contentWithoutFrontmatter)) !== null) {
+ matches.push({
+ index: match.index,
+ level: match[1].length,
+ title: match[2].trim(),
+ });
+ }
+
+ // Extract content for each heading
+ for (let i = 0; i < matches.length; i++) {
+ const current = matches[i];
+ const next = matches[i + 1];
+
+ const contentStart =
+ current.index + `${"#".repeat(current.level)} ${current.title}`.length;
+ const contentEnd = next ? next.index : contentWithoutFrontmatter.length;
+ const rawContent = contentWithoutFrontmatter.slice(
+ contentStart,
+ contentEnd,
+ );
+
+ const cleanContent = extractTextContent(rawContent);
+
+ // Only include headings with meaningful content
+ if (cleanContent.length > 20) {
+ headings.push({
+ level: current.level,
+ title: current.title,
+ slug: slugify(current.title),
+ content: cleanContent.slice(0, MAX_CONTENT_LENGTH),
+ });
+ }
+ }
+
+ return headings;
+}
+
+/**
+ * Get the intro content (content before the first heading)
+ */
+function getIntroContent(mdxContent: string): string {
+ const contentWithoutFrontmatter = removeFrontmatter(mdxContent);
+
+ // Find the first H2 or H3 heading
+ const firstHeadingMatch = contentWithoutFrontmatter.match(/^#{2,3}\s+/m);
+
+ if (firstHeadingMatch && firstHeadingMatch.index !== undefined) {
+ const introRaw = contentWithoutFrontmatter.slice(
+ 0,
+ firstHeadingMatch.index,
+ );
+ return extractTextContent(introRaw).slice(0, MAX_CONTENT_LENGTH);
+ }
+
+ // No headings found, use all content
+ return extractTextContent(contentWithoutFrontmatter).slice(
+ 0,
+ MAX_CONTENT_LENGTH,
+ );
+}
+
+/**
+ * Convert file path to URL path
+ */
+function filePathToUrlPath(filePath: string): string {
+ return filePath
+ .replace(CONTENT_DIR, "")
+ .replace(/\.mdx?$/, "")
+ .replace("/index", "")
+ .replace(/^\//, ""); // Remove leading slash for objectID
+}
+
+/**
+ * Queue of items to save to Algolia
+ */
+const itemsToSave: EnhancedDocsSearchItem[] = [];
+
+async function queueItem(item: EnhancedDocsSearchItem) {
+ // Validate path doesn't start with /
+ if (item.path.startsWith("/")) {
+ console.error(`Path may not start with "/". Violating path: ${item.path}`);
+ return;
+ }
+
+ console.log(
+ `Indexing ${item.isPageLevel ? "page" : "heading"}: ${item.title} -> ${
+ item.path
+ }`,
+ );
+ itemsToSave.push(item);
+}
+
+/**
+ * Process a single MDX file and create search records
+ */
+async function processFile(filePath: string): Promise {
+ // Skip special directories
+ if (
+ filePath.includes("/__mapi-reference/") ||
+ filePath.includes("/__api-reference/") ||
+ filePath.includes("/__cli/")
+ ) {
+ return;
+ }
+
+ const content = fs.readFileSync(filePath, "utf-8");
+ const frontmatter = await parseFrontmatter(content);
+
+ if (!frontmatter || !frontmatter.title || !frontmatter.section) {
+ console.warn(`Skipping ${filePath}: missing required frontmatter`);
+ return;
+ }
+
+ const urlPath = filePathToUrlPath(filePath);
+
+ // Create page-level record
+ const introContent = getIntroContent(content);
+ const pageRecord: EnhancedDocsSearchItem = {
+ objectID: `page-${urlPath}`,
+ path: urlPath,
+ title: frontmatter.title,
+ pageTitle: frontmatter.title,
+ description: frontmatter.description,
+ content: introContent,
+ section: frontmatter.section,
+ tags: frontmatter.tags || [],
+ headingLevel: 0,
+ contentType: "document",
+ index: "pages",
+ isPageLevel: true,
+ };
+ await queueItem(pageRecord);
+ pageCount++;
+
+ // Extract and create heading-level records
+ const headings = extractHeadings(content);
+ for (const heading of headings) {
+ const headingPath = `${urlPath}#${heading.slug}`;
+ const headingRecord: EnhancedDocsSearchItem = {
+ objectID: `heading-${headingPath}`,
+ path: headingPath,
+ title: heading.title,
+ pageTitle: frontmatter.title,
+ content: heading.content,
+ section: frontmatter.section,
+ tags: frontmatter.tags || [],
+ headingLevel: heading.level,
+ contentType: "document",
+ index: "pages",
+ isPageLevel: false,
+ };
+ await queueItem(headingRecord);
+ headingCount++;
+ }
+}
+
+/**
+ * Main entry point
+ */
+async function main() {
+ console.log("š Starting docs search indexing...\n");
+
+ let skipIndexing = false;
+
+ // Check for required environment variables
+ if (!algoliaAppId || !algoliaAdminApiKey || !algoliaPagesIndexName) {
+ const missing: string[] = [];
+ if (!algoliaAppId) missing.push("NEXT_PUBLIC_ALGOLIA_APP_ID");
+ if (!algoliaAdminApiKey) missing.push("ALGOLIA_ADMIN_API_KEY");
+ if (!algoliaPagesIndexName) missing.push("NEXT_PUBLIC_ALGOLIA_INDEX_NAME");
+
+ console.warn(
+ "Missing Algolia environment variables. Continuing with script but skipping actual indexing.\n\nMissing: " +
+ missing.join(", "),
+ );
+ skipIndexing = true;
+ }
+
+ // Get all MDX/MD files
+ const files = getAllFilesInDir(CONTENT_DIR, [], DOCS_FILE_EXTENSIONS);
+ console.log(`Found ${files.length} content files to process\n`);
+
+ // Process each file
+ for (const file of files) {
+ try {
+ await processFile(file);
+ } catch (error) {
+ console.error(`Error processing ${file}:`, error);
+ }
+ }
+
+ console.log("\nš Indexing summary:");
+ console.log(` Pages indexed: ${pageCount}`);
+ console.log(` Headings indexed: ${headingCount}`);
+ console.log(` Total records: ${itemsToSave.length}`);
+
+ // Save to Algolia
+ if (!skipIndexing && itemsToSave.length > 0) {
+ console.log("\nš¤ Uploading to Algolia...");
+
+ const client = algoliasearch(algoliaAppId, algoliaAdminApiKey);
+ const index = client.initIndex(algoliaPagesIndexName);
+
+ // Save objects in batches (Algolia recommends batches of 1000)
+ const BATCH_SIZE = 1000;
+ for (let i = 0; i < itemsToSave.length; i += BATCH_SIZE) {
+ const batch = itemsToSave.slice(i, i + BATCH_SIZE);
+ await index.saveObjects(batch);
+ console.log(
+ ` Saved batch ${Math.floor(i / BATCH_SIZE) + 1}/${Math.ceil(
+ itemsToSave.length / BATCH_SIZE,
+ )}`,
+ );
+ }
+
+ console.log("\nā
Successfully indexed docs for search!");
+ } else if (skipIndexing) {
+ console.log(
+ "\nā ļø Completed processing, but skipped Algolia upload due to missing environment variables.",
+ );
+ }
+
+ process.exit(0);
+}
+
+main().catch((error) => {
+ console.error("Fatal error:", error);
+ process.exit(1);
+});
diff --git a/types.ts b/types.ts
index f3cb89ee6..71bc5ed0f 100644
--- a/types.ts
+++ b/types.ts
@@ -47,6 +47,24 @@ export type DocsSearchItem = {
index: "pages" | "endpoints";
};
+// Enhanced search item type for improved Algolia indexing
+// This extends the basic DocsSearchItem with content and heading information
+export type EnhancedDocsSearchItem = {
+ objectID: string; // Unique ID (page-path or page-path#heading-slug)
+ path: string; // URL path (with optional anchor)
+ title: string; // Page title OR heading title
+ pageTitle: string; // Always the parent page title
+ description?: string; // From frontmatter (page-level only)
+ content: string; // Text content (truncated ~300-500 words)
+ section: string; // Top-level section (Concepts, Getting Started, etc.)
+ tags: string[]; // Tags from frontmatter
+ headingLevel: number; // 0 for page, 2 for H2, 3 for H3
+ contentType: "document" | "api-reference";
+ index: "pages" | "endpoints";
+ // Ranking fields
+ isPageLevel: boolean; // True if this is a page-level record (not a heading)
+};
+
export type EndpointSearchItem = DocsSearchItem & {
method: string;
endpoint: string;