From 9cfe5bc970e04738bccf09f6e668fb42ecb45c95 Mon Sep 17 00:00:00 2001 From: sid597 Date: Sun, 23 Nov 2025 22:58:43 +0530 Subject: [PATCH 1/7] duplicate node alert on page --- .../src/components/VectorDuplicateMatches.tsx | 174 ++++++++++++++++++ apps/roam/src/utils/hyde.ts | 62 ++++++- .../utils/initializeObserversAndListeners.ts | 8 + apps/roam/src/utils/useNodeContext.ts | 69 +++++++ 4 files changed, 311 insertions(+), 2 deletions(-) create mode 100644 apps/roam/src/components/VectorDuplicateMatches.tsx create mode 100644 apps/roam/src/utils/useNodeContext.ts diff --git a/apps/roam/src/components/VectorDuplicateMatches.tsx b/apps/roam/src/components/VectorDuplicateMatches.tsx new file mode 100644 index 000000000..03fbb990b --- /dev/null +++ b/apps/roam/src/components/VectorDuplicateMatches.tsx @@ -0,0 +1,174 @@ +import React, { useEffect, useState } from "react"; +import { Collapse, Spinner, Icon } from "@blueprintjs/core"; +import normalizePageTitle from "roamjs-components/queries/normalizePageTitle"; +import type { VectorMatch } from "~/utils/hyde"; +import type { Result } from "~/utils/types"; +import { findSimilarNodesVectorOnly } from "~/utils/hyde"; +import { useNodeContext } from "~/utils/useNodeContext"; +import ReactDOM from "react-dom"; + +const VectorDuplicateMatches = ({ pageTitle }: { pageTitle: string }) => { + const [isOpen, setIsOpen] = useState(false); + const [suggestionsLoading, setSuggestionsLoading] = useState(false); + const [hasSearched, setHasSearched] = useState(false); + const [suggestions, setSuggestions] = useState([]); + + const nodeContext = useNodeContext(pageTitle); + + useEffect(() => { + let isCancelled = false; + const fetchSuggestions = async () => { + if (!isOpen || hasSearched) return; + if (!nodeContext || !nodeContext.searchText.trim()) return; + + const { searchText, pageUid } = nodeContext; + + setSuggestionsLoading(true); + try { + const raw: VectorMatch[] = await findSimilarNodesVectorOnly({ + text: searchText, + threshold: 0.3, + limit: 20, + }); + const normalize = (value: string) => + normalizePageTitle(value || "") + .trim() + .toLowerCase(); + const normalizedPageTitle = normalize(pageTitle); + const normalizedSearchText = normalize(searchText); + const results: VectorMatch[] = raw.filter((candidate: VectorMatch) => { + const sameUid = !!pageUid && candidate.node.uid === pageUid; + const normalizedCandidateText = normalize(candidate.node.text); + const sameTitle = normalizedCandidateText === normalizedPageTitle; + const sameContent = normalizedCandidateText === normalizedSearchText; + return !sameUid && !sameTitle && !sameContent; + }); + if (!isCancelled) { + setSuggestions(results); + setSuggestionsLoading(false); + setHasSearched(true); + } + } catch (error: unknown) { + console.error("Error fetching vector duplicates:", error); + if (!isCancelled) { + setSuggestionsLoading(false); + } + } + }; + void fetchSuggestions(); + return () => { + isCancelled = true; + }; + }, [isOpen, hasSearched, nodeContext, pageTitle]); + + const handleSuggestionClick = async (node: Result) => { + await window.roamAlphaAPI.ui.mainWindow.openPage({ + page: { uid: node.uid }, + }); + }; + + if (!nodeContext) { + return null; + } + + const hasSuggestions = suggestions.length > 0; + + return ( +
+
{ + setIsOpen(!isOpen); + }} + > +
+ +
Plain Vector Search Matches
+
+ {hasSearched && !suggestionsLoading && hasSuggestions && ( + + {suggestions.length} + + )} +
+ + +
+ {suggestionsLoading && ( +
+ + + Searching for duplicates... + +
+ )} + + {!suggestionsLoading && hasSearched && !hasSuggestions && ( +

No matches found.

+ )} + + {!suggestionsLoading && hasSearched && hasSuggestions && ( + + )} +
+
+
+ ); +}; + +export const renderPossibleDuplicates = ( + h1: HTMLHeadingElement, + title: string, +) => { + const titleContainer = h1.parentElement; + if (!titleContainer || !titleContainer.parentElement) { + return; + } + const headerContainer = titleContainer.parentElement; + const VECTOR_CONTAINER_ID = "discourse-graph-duplicates-vector"; + + let vectorContainer = document.getElementById(VECTOR_CONTAINER_ID); + if (vectorContainer && vectorContainer.dataset.pageTitle !== title) { + /*eslint-disable-next-line react/no-deprecated*/ + ReactDOM.unmountComponentAtNode(vectorContainer); + /*eslint-disable-next-line react/no-deprecated*/ + vectorContainer.remove(); + vectorContainer = null; + } + if (!vectorContainer) { + vectorContainer = document.createElement("div"); + vectorContainer.id = VECTOR_CONTAINER_ID; + vectorContainer.dataset.pageTitle = title; + vectorContainer.className = "w-full mt-2"; + + headerContainer.insertBefore(vectorContainer, titleContainer.nextSibling); + } else if ( + vectorContainer.parentElement !== headerContainer || + vectorContainer.previousElementSibling !== titleContainer + ) { + headerContainer.insertBefore(vectorContainer, titleContainer.nextSibling); + } + + /*eslint-disable-next-line react/no-deprecated*/ + ReactDOM.render( + React.createElement(VectorDuplicateMatches, { pageTitle: title }), + vectorContainer, + ); + /*eslint-disable-next-line react/no-deprecated*/ +}; diff --git a/apps/roam/src/utils/hyde.ts b/apps/roam/src/utils/hyde.ts index 361d38725..65e8dc14f 100644 --- a/apps/roam/src/utils/hyde.ts +++ b/apps/roam/src/utils/hyde.ts @@ -58,7 +58,7 @@ type EmbeddingFunc = (text: string) => Promise; type SearchFunc = (params: { queryEmbedding: EmbeddingVectorType; - indexData: CandidateNodeWithEmbedding[]; + indexData: { uid: string; text: string }[]; }) => Promise; const API_CONFIG = { @@ -173,7 +173,7 @@ const createEmbedding: EmbeddingFunc = async ( } }; -const searchEmbeddings: SearchFunc = async ({ +export const searchEmbeddings: SearchFunc = async ({ queryEmbedding, indexData, }): Promise => { @@ -530,3 +530,61 @@ export const performHydeSearch = async ({ } return []; }; + +export type VectorMatch = { + node: Result; + score: number; +}; + +export const findSimilarNodesVectorOnly = async ({ + text, + threshold = 0.4, + limit = 15, +}: { + text: string; + threshold?: number; + limit?: number; +}): Promise => { + if (!text.trim()) { + return []; + } + + try { + const supabase = await getLoggedInClient(); + if (!supabase) return []; + + const queryEmbedding = await createEmbedding(text); + + const { data, error } = await supabase.rpc("match_content_embeddings", { + query_embedding: JSON.stringify(queryEmbedding), + match_threshold: threshold, + match_count: limit, + }); + + if (error) { + console.error("Vector search failed:", error); + throw error; + } + + if (!data || !Array.isArray(data)) return []; + + const results: VectorMatch[] = data.map( + (item: { + roam_uid: string; + text_content: string; + similarity: number; + }) => ({ + node: { + uid: item.roam_uid, + text: item.text_content, + }, + score: item.similarity, + }), + ); + + return results; + } catch (error) { + console.error("Error in vector-only similar nodes search:", error); + return []; + } +}; diff --git a/apps/roam/src/utils/initializeObserversAndListeners.ts b/apps/roam/src/utils/initializeObserversAndListeners.ts index e8ac5378a..8295b45be 100644 --- a/apps/roam/src/utils/initializeObserversAndListeners.ts +++ b/apps/roam/src/utils/initializeObserversAndListeners.ts @@ -54,6 +54,9 @@ import { getUidAndBooleanSetting } from "./getExportSettings"; import { getCleanTagText } from "~/components/settings/NodeConfig"; import getPleasingColors from "@repo/utils/getPleasingColors"; import { colord } from "colord"; +import { renderPossibleDuplicates } from "~/components/VectorDuplicateMatches"; +import getPageUidByPageTitle from "roamjs-components/queries/getPageUidByPageTitle"; +import isDiscourseNode from "./isDiscourseNode"; const debounce = (fn: () => void, delay = 250) => { let timeout: number; @@ -85,6 +88,11 @@ export const initObservers = async ({ const title = getPageTitleValueByHtmlElement(h1); const props = { title, h1, onloadArgs }; + const uid = getPageUidByPageTitle(title); + if (isDiscourseNode(uid)) { + renderPossibleDuplicates(h1, title); + } + if (isNodeConfigPage(title)) renderNodeConfigPage(props); else if (isQueryPage(props)) renderQueryPage(props); else if (isCurrentPageCanvas(props)) renderTldrawCanvas(props); diff --git a/apps/roam/src/utils/useNodeContext.ts b/apps/roam/src/utils/useNodeContext.ts new file mode 100644 index 000000000..98e8fe2e1 --- /dev/null +++ b/apps/roam/src/utils/useNodeContext.ts @@ -0,0 +1,69 @@ +import { useEffect, useState } from "react"; +import getDiscourseNodes, { DiscourseNode } from "~/utils/getDiscourseNodes"; +import findDiscourseNode from "~/utils/findDiscourseNode"; +import matchDiscourseNode from "~/utils/matchDiscourseNode"; +import getDiscourseNodeFormatExpression from "~/utils/getDiscourseNodeFormatExpression"; +import getPageUidByPageTitle from "roamjs-components/queries/getPageUidByPageTitle"; + +export type NodeContext = { + pageUid: string | null; + searchText: string; +}; + +const extractContentFromTitle = ( + title: string, + node: DiscourseNode, +): string => { + if (!node.format) return title; + const placeholderRegex = /{([\w\d-]+)}/g; + const placeholders: string[] = []; + let placeholderMatch: RegExpExecArray | null = null; + while ((placeholderMatch = placeholderRegex.exec(node.format))) { + placeholders.push(placeholderMatch[1]); + } + const expression = getDiscourseNodeFormatExpression(node.format); + const expressionMatch = expression.exec(title); + if (!expressionMatch || expressionMatch.length <= 1) { + return title; + } + const contentIndex = placeholders.findIndex( + (name) => name.toLowerCase() === "content", + ); + if (contentIndex >= 0) { + return expressionMatch[contentIndex + 1]?.trim() || title; + } + return expressionMatch[1]?.trim() || title; +}; + +export const useNodeContext = (pageTitle: string): NodeContext | null => { + const [nodeContext, setNodeContext] = useState(null); + + useEffect(() => { + const discourseNodes = getDiscourseNodes(); + const pageUid = getPageUidByPageTitle(pageTitle) || null; + let matchedNode: DiscourseNode | null = null; + + if (pageUid) { + const found = findDiscourseNode(pageUid, discourseNodes); + if (found) { + matchedNode = found; + } + } + + if (!matchedNode) { + matchedNode = + discourseNodes.find((node) => + matchDiscourseNode({ ...node, title: pageTitle }), + ) || null; + } + + if (matchedNode) { + const searchText = extractContentFromTitle(pageTitle, matchedNode); + setNodeContext({ searchText, pageUid }); + } else { + setNodeContext(null); + } + }, [pageTitle]); + + return nodeContext; +}; From efdc4297fc0e26f57138938f7f76cf1510eca8b1 Mon Sep 17 00:00:00 2001 From: sid597 Date: Mon, 24 Nov 2025 15:47:46 +0530 Subject: [PATCH 2/7] add to node create dialog, only show small list, fix lint errors, fix bug to show in all open pages --- apps/roam/src/components/CreateNodeDialog.tsx | 16 ++++ .../src/components/VectorDuplicateMatches.tsx | 89 ++++++++++++------- 2 files changed, 73 insertions(+), 32 deletions(-) diff --git a/apps/roam/src/components/CreateNodeDialog.tsx b/apps/roam/src/components/CreateNodeDialog.tsx index 4211f7c56..1884d4395 100644 --- a/apps/roam/src/components/CreateNodeDialog.tsx +++ b/apps/roam/src/components/CreateNodeDialog.tsx @@ -12,6 +12,7 @@ import getDiscourseNodes, { import { getNewDiscourseNodeText } from "~/utils/formatUtils"; import MenuItemSelect from "roamjs-components/components/MenuItemSelect"; import createBlock from "roamjs-components/writes/createBlock"; +import { VectorDuplicateMatches } from "./VectorDuplicateMatches"; export type CreateNodeDialogProps = { onClose: () => void; @@ -34,11 +35,21 @@ const CreateNodeDialog = ({ discourseNodes[0]; const [title, setTitle] = useState(initialTitle); + const [debouncedTitle, setDebouncedTitle] = useState(initialTitle); const [selectedType, setSelectedType] = useState(defaultNodeType); const [loading, setLoading] = useState(false); const inputRef = useRef(null); + useEffect(() => { + const handler = setTimeout(() => { + setDebouncedTitle(title); + }, 500); + return () => { + clearTimeout(handler); + }; + }, [title]); + useEffect(() => { if (inputRef.current) { inputRef.current.focus(); @@ -136,6 +147,11 @@ const CreateNodeDialog = ({ onChange={(e) => setTitle(e.currentTarget.value)} inputRef={inputRef} /> +