From 9050426c0a7170a1aa168c75fcf5c394b43c4dba Mon Sep 17 00:00:00 2001 From: Evgenia Karunus Date: Tue, 6 May 2025 20:36:42 +0500 Subject: [PATCH 01/11] jixia_db.py - use `dict` instead of `tuple` in db call --- database/jixia_db.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/database/jixia_db.py b/database/jixia_db.py index 94d0e46..09074c4 100644 --- a/database/jixia_db.py +++ b/database/jixia_db.py @@ -77,24 +77,25 @@ def load_declaration(module: LeanName): (Jsonb(module),), ) (source,) = cursor.fetchone() + values = ( - ( - Jsonb(module), - i, - Jsonb(d.name) if d.kind != "example" else None, - d.modifiers.visibility != "private" and d.kind != "example", - d.modifiers.docstring, - d.kind, - d.signature.pp if d.signature.pp is not None else source[d.signature.range.as_slice()].decode(), - source[d.value.range.as_slice()].decode() if d.value is not None else None, - ) + { + "module_name": Jsonb(module), + "index" : i, + "name" : Jsonb(d.name) if d.kind != "example" else None, + "visible" : d.modifiers.visibility != "private" and d.kind != "example", + "docstring" : d.modifiers.docstring, + "kind" : d.kind, + "signature" : d.signature.pp if d.signature.pp is not None else source[d.signature.range.as_slice()].decode(), + "value" : source[d.value.range.as_slice()].decode() if d.value is not None else None, + } for i, d in enumerate(declarations) if not is_internal(d.name) and d.kind != "proofWanted" ) cursor.executemany( """ INSERT INTO declaration (module_name, index, name, visible, docstring, kind, signature, value) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT DO NOTHING + VALUES (%(module_name)s, %(index)s, %(name)s, %(visible)s, %(docstring)s, %(kind)s, %(signature)s, %(value)s) ON CONFLICT DO NOTHING """, values, ) From 83ff6f30e88daefc458b1ba89336e58c7a6e1c9d Mon Sep 17 00:00:00 2001 From: Evgenia Karunus Date: Tue, 6 May 2025 21:06:21 +0500 Subject: [PATCH 02/11] jixia_db.py - fix "None is not iterable" pylance error --- database/jixia_db.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/database/jixia_db.py b/database/jixia_db.py index 09074c4..e9134f8 100644 --- a/database/jixia_db.py +++ b/database/jixia_db.py @@ -68,19 +68,23 @@ def load_symbol(module: LeanName): values, ) - def load_declaration(module: LeanName): - declarations = project.load_info(module, Declaration) + def load_declaration(module_name: LeanName): + declarations = project.load_info(module_name, Declaration) cursor.execute( """ SELECT content FROM module WHERE name = %s """, - (Jsonb(module),), + (Jsonb(module_name),), ) - (source,) = cursor.fetchone() + module = cursor.fetchone() + if (module is None): + logger.warn("couldn't find a module with name '%s'", Jsonb(module_name)) + return + (source,) = module values = ( { - "module_name": Jsonb(module), + "module_name": Jsonb(module_name), "index" : i, "name" : Jsonb(d.name) if d.kind != "example" else None, "visible" : d.modifiers.visibility != "private" and d.kind != "example", From 89a39a5f60bae5971077cde6a7abf4f1585e9378 Mon Sep 17 00:00:00 2001 From: Evgenia Karunus Date: Tue, 6 May 2025 21:22:00 +0500 Subject: [PATCH 03/11] jixia_db.py - use a for-loop instead of python's list comprehension --- database/jixia_db.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/database/jixia_db.py b/database/jixia_db.py index e9134f8..1b12a1e 100644 --- a/database/jixia_db.py +++ b/database/jixia_db.py @@ -82,26 +82,26 @@ def load_declaration(module_name: LeanName): return (source,) = module - values = ( - { + db_declarations = [] + for index, decl in enumerate(declarations): + if is_internal(decl.name) or decl.kind == "proofWanted": continue + value = { "module_name": Jsonb(module_name), - "index" : i, - "name" : Jsonb(d.name) if d.kind != "example" else None, - "visible" : d.modifiers.visibility != "private" and d.kind != "example", - "docstring" : d.modifiers.docstring, - "kind" : d.kind, - "signature" : d.signature.pp if d.signature.pp is not None else source[d.signature.range.as_slice()].decode(), - "value" : source[d.value.range.as_slice()].decode() if d.value is not None else None, + "index" : index, + "name" : Jsonb(decl.name) if decl.kind != "example" else None, + "visible" : decl.modifiers.visibility != "private" and decl.kind != "example", + "docstring" : decl.modifiers.docstring, + "kind" : decl.kind, + "signature" : decl.signature.pp if decl.signature.pp is not None else source[decl.signature.range.as_slice()].decode(), + "value" : source[decl.value.range.as_slice()].decode() if decl.value is not None else None, } - for i, d in enumerate(declarations) - if not is_internal(d.name) and d.kind != "proofWanted" - ) + db_declarations.append(value) cursor.executemany( """ INSERT INTO declaration (module_name, index, name, visible, docstring, kind, signature, value) VALUES (%(module_name)s, %(index)s, %(name)s, %(visible)s, %(docstring)s, %(kind)s, %(signature)s, %(value)s) ON CONFLICT DO NOTHING """, - values, + db_declarations, ) def topological_sort(): From 59fe0066c14e2ef0c4fc071696e620b44338284f Mon Sep 17 00:00:00 2001 From: Evgenia Karunus Date: Tue, 6 May 2025 21:42:10 +0500 Subject: [PATCH 04/11] jixia_db.py - fix two ""as_slice" is not a known attribute of "None"" pylance errors --- database/jixia_db.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/database/jixia_db.py b/database/jixia_db.py index 1b12a1e..041b79d 100644 --- a/database/jixia_db.py +++ b/database/jixia_db.py @@ -10,6 +10,19 @@ logger = logging.getLogger(__name__) +def _get_signature(declaration: Declaration, module_content): + if declaration.signature.pp is not None: + return declaration.signature.pp + elif declaration.signature.range is not None: + return module_content[declaration.signature.range.as_slice()].decode() + else: + return '' + +def _get_value(declaration: Declaration, module_content): + if declaration.value is not None and declaration.value.range is not None: + return module_content[declaration.value.range.as_slice()].decode() + else: + return None def load_data(project: LeanProject, prefixes: list[LeanName], conn: Connection): def load_module(data: Iterable[LeanName], base_dir: Path): @@ -80,22 +93,21 @@ def load_declaration(module_name: LeanName): if (module is None): logger.warn("couldn't find a module with name '%s'", Jsonb(module_name)) return - (source,) = module + (module_content,) = module db_declarations = [] for index, decl in enumerate(declarations): if is_internal(decl.name) or decl.kind == "proofWanted": continue - value = { + db_declarations.append({ "module_name": Jsonb(module_name), "index" : index, "name" : Jsonb(decl.name) if decl.kind != "example" else None, "visible" : decl.modifiers.visibility != "private" and decl.kind != "example", "docstring" : decl.modifiers.docstring, "kind" : decl.kind, - "signature" : decl.signature.pp if decl.signature.pp is not None else source[decl.signature.range.as_slice()].decode(), - "value" : source[decl.value.range.as_slice()].decode() if decl.value is not None else None, - } - db_declarations.append(value) + "signature" : _get_signature(decl, module_content), + "value" : _get_value(decl, module_content) + }) cursor.executemany( """ INSERT INTO declaration (module_name, index, name, visible, docstring, kind, signature, value) From 448c10c608be424c880227399c4a1f2966cc69c4 Mon Sep 17 00:00:00 2001 From: Evgenia Karunus Date: Tue, 6 May 2025 22:17:12 +0500 Subject: [PATCH 05/11] jixia_db.py - make ruff happy --- database/jixia_db.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/database/jixia_db.py b/database/jixia_db.py index 041b79d..03cfc9a 100644 --- a/database/jixia_db.py +++ b/database/jixia_db.py @@ -90,14 +90,15 @@ def load_declaration(module_name: LeanName): (Jsonb(module_name),), ) module = cursor.fetchone() - if (module is None): + if module is None: logger.warn("couldn't find a module with name '%s'", Jsonb(module_name)) return (module_content,) = module db_declarations = [] for index, decl in enumerate(declarations): - if is_internal(decl.name) or decl.kind == "proofWanted": continue + if is_internal(decl.name) or decl.kind == "proofWanted": + continue db_declarations.append({ "module_name": Jsonb(module_name), "index" : index, @@ -106,7 +107,7 @@ def load_declaration(module_name: LeanName): "docstring" : decl.modifiers.docstring, "kind" : decl.kind, "signature" : _get_signature(decl, module_content), - "value" : _get_value(decl, module_content) + "value" : _get_value(decl, module_content), }) cursor.executemany( """ From 3227c30a058b9a17c269c96452fa4f47778c482c Mon Sep 17 00:00:00 2001 From: Evgenia Karunus Date: Thu, 8 May 2025 00:02:45 +0500 Subject: [PATCH 06/11] jixia_db.py - refactor "first lean; then project" folder parsing --- database/jixia_db.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/database/jixia_db.py b/database/jixia_db.py index 03cfc9a..b9a6574 100644 --- a/database/jixia_db.py +++ b/database/jixia_db.py @@ -140,19 +140,23 @@ def topological_sort(): """) with conn.cursor() as cursor: - lean_sysroot = Path(os.environ["LEAN_SYSROOT"]) - lean_src = lean_sysroot / "src" / "lean" - all_modules = [] - for d in project.root, lean_src: - results = project.batch_run_jixia( - base_dir=d, - prefixes=prefixes, - plugins=["module", "declaration", "symbol"], - ) - modules = [r[0] for r in results] - load_module(modules, d) - all_modules += modules + path_to_project = project.root + project_modules = [r[0] for r in project.batch_run_jixia( + base_dir=path_to_project, + prefixes=prefixes, + plugins=["module", "declaration", "symbol"], + )] + load_module(project_modules, path_to_project) + + path_to_lean = Path(os.environ["LEAN_SYSROOT"]) / "src" / "lean" + lean_modules = [r[0] for r in project.batch_run_jixia( + base_dir=path_to_lean, + prefixes=prefixes, + plugins=["module", "declaration", "symbol"], + )] + load_module(lean_modules, path_to_lean) + all_modules = lean_modules + project_modules for m in all_modules: load_symbol(m) for m in all_modules: From bc3a1a3f3d3d0fb53c4fa0ab9df4ddb9f7fca6b2 Mon Sep 17 00:00:00 2001 From: Evgenia Karunus Date: Thu, 8 May 2025 00:09:56 +0500 Subject: [PATCH 07/11] jixia_db.py - refactor `load_module()` (use dicts instead of tuples in db calls) --- database/jixia_db.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/database/jixia_db.py b/database/jixia_db.py index b9a6574..7d93697 100644 --- a/database/jixia_db.py +++ b/database/jixia_db.py @@ -25,14 +25,19 @@ def _get_value(declaration: Declaration, module_content): return None def load_data(project: LeanProject, prefixes: list[LeanName], conn: Connection): - def load_module(data: Iterable[LeanName], base_dir: Path): - values = ((Jsonb(m), project.path_of_module(m, base_dir).read_bytes(), project.load_module_info(m).docstring) for m in data) - cursor.executemany( - """ - INSERT INTO module (name, content, docstring) VALUES (%s, %s, %s) ON CONFLICT DO NOTHING - """, - values, - ) + def load_module(module_names: Iterable[LeanName], base_dir: Path): + for module_name in module_names: + db_module = { + "name": Jsonb(module_name), + "content": project.path_of_module(module_name, base_dir).read_bytes(), + "docstring": project.load_module_info(module_name).docstring + } + cursor.execute( + """ + INSERT INTO module (name, content, docstring) VALUES (%(name)s, %(content)s, %(docstring)s) ON CONFLICT DO NOTHING + """, + db_module + ) def load_symbol(module: LeanName): symbols = [s for s in project.load_info(module, Symbol) if not is_internal(s.name)] From d14aab73c268b08ec49abcd0df83cf6de773dc4b Mon Sep 17 00:00:00 2001 From: Evgenia Karunus Date: Thu, 8 May 2025 22:56:51 +0500 Subject: [PATCH 08/11] jixia_db.py, create_schema.py - merge tables declaration+symbol [`make jixia_db` works] --- database/create_schema.py | 28 ++++---- database/jixia_db.py | 141 ++++++++++++++++---------------------- 2 files changed, 72 insertions(+), 97 deletions(-) diff --git a/database/create_schema.py b/database/create_schema.py index c8d9f04..ab0c94d 100644 --- a/database/create_schema.py +++ b/database/create_schema.py @@ -27,43 +27,40 @@ def create_schema(conn: Connection): ) """, """ - CREATE TABLE symbol ( - name JSONB PRIMARY KEY, - module_name JSONB REFERENCES module(name) NOT NULL, - type TEXT NOT NULL, - is_prop BOOLEAN NOT NULL - ) - """, - """ CREATE TABLE declaration ( module_name JSONB REFERENCES module(name) NOT NULL, + name JSONB, + index INTEGER NOT NULL, - name JSONB UNIQUE REFERENCES symbol(name), visible BOOLEAN NOT NULL, docstring TEXT, kind declaration_kind NOT NULL, signature TEXT NOT NULL, value TEXT, - PRIMARY KEY (module_name, index) + + symbol_type TEXT NOT NULL, + symbol_is_prop BOOLEAN NOT NULL, + + PRIMARY KEY (name) ) """, """ CREATE TABLE dependency ( - source JSONB REFERENCES symbol(name) NOT NULL, - target JSONB REFERENCES symbol(name) NOT NULL, + source JSONB NOT NULL, + target JSONB NOT NULL, on_type BOOLEAN NOT NULL, PRIMARY KEY (source, target, on_type) ) """, """ CREATE TABLE level ( - symbol_name JSONB PRIMARY KEY REFERENCES symbol(name) NOT NULL, + symbol_name JSONB PRIMARY KEY REFERENCES declaration(name) NOT NULL, level INTEGER NOT NULL ) """, """ CREATE TABLE informal ( - symbol_name JSONB PRIMARY KEY REFERENCES symbol(name) NOT NULL, + symbol_name JSONB PRIMARY KEY REFERENCES declaration(name) NOT NULL, name TEXT NOT NULL, description TEXT NOT NULL ) @@ -71,12 +68,11 @@ def create_schema(conn: Connection): """ CREATE VIEW record AS SELECT - d.module_name, d.index, d.kind, d.name, d.signature, s.type, d.value, d.docstring, + d.module_name, d.index, d.kind, d.name, d.signature, d.symbol_type, d.value, d.docstring, i.name AS informal_name, i.description AS informal_description FROM declaration d INNER JOIN informal i ON d.name = i.symbol_name - INNER JOIN symbol s ON d.name = s.name """, ] diff --git a/database/jixia_db.py b/database/jixia_db.py index 7d93697..4d83272 100644 --- a/database/jixia_db.py +++ b/database/jixia_db.py @@ -24,6 +24,12 @@ def _get_value(declaration: Declaration, module_content): else: return None +def _find_declaration(declarations: list[Declaration], target_name): + for index, declaration in enumerate(declarations): + if declaration.name == target_name: + return declaration, index + return None, None + def load_data(project: LeanProject, prefixes: list[LeanName], conn: Connection): def load_module(module_names: Iterable[LeanName], base_dir: Path): for module_name in module_names: @@ -34,100 +40,79 @@ def load_module(module_names: Iterable[LeanName], base_dir: Path): } cursor.execute( """ - INSERT INTO module (name, content, docstring) VALUES (%(name)s, %(content)s, %(docstring)s) ON CONFLICT DO NOTHING + INSERT INTO module (name, content, docstring) VALUES (%(name)s, %(content)s, %(docstring)s) """, db_module ) - def load_symbol(module: LeanName): - symbols = [s for s in project.load_info(module, Symbol) if not is_internal(s.name)] - values = ((Jsonb(s.name), Jsonb(module), s.type, s.is_prop) for s in symbols) - cursor.executemany( - """ - INSERT INTO symbol (name, module_name, type, is_prop) VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING - """, - values, - ) - for s in symbols: - values = ( - { - "source": Jsonb(s.name), - "target": Jsonb(t), - } - for t in s.type_references - if not is_internal(t) - ) - cursor.executemany( - """ - INSERT INTO dependency (source, target, on_type) - SELECT %(source)s, %(target)s, TRUE - WHERE EXISTS(SELECT 1 FROM symbol WHERE name = %(target)s) - ON CONFLICT DO NOTHING - """, - values, - ) + symbols = project.load_info(module_name, Symbol) + declarations = project.load_info(module_name, Declaration) + for symbol in symbols: + declaration, index = _find_declaration(declarations, symbol.name) + if ( + is_internal(symbol.name) or + declaration is None or + declaration.kind == "proofWanted" + ): + continue - if s.value_references is not None: - values = ( + cursor.execute( + """ + INSERT INTO declaration (module_name, index, name, visible, docstring, kind, signature, value, symbol_type, symbol_is_prop) + VALUES (%(module_name)s, %(index)s, %(name)s, %(visible)s, %(docstring)s, %(kind)s, %(signature)s, %(value)s, %(symbol_type)s, %(symbol_is_prop)s) + """, { - "source": Jsonb(s.name), - "target": Jsonb(t), + "module_name": Jsonb(module_name), + "name" : Jsonb(declaration.name) if declaration.kind != "example" else None, + "index" : index, + "visible" : declaration.modifiers.visibility != "private" and declaration.kind != "example", + "docstring" : declaration.modifiers.docstring, + "kind" : declaration.kind, + "signature" : _get_signature(declaration, db_module["content"]), + "value" : _get_value(declaration, db_module["content"]), + "symbol_type": symbol.type, + "symbol_is_prop": symbol.is_prop } - for t in s.value_references - if not is_internal(t) ) + + db_deps = [] + for ref_name in symbol.type_references: + if is_internal(ref_name): + continue + db_deps.append({ + "source": Jsonb(symbol.name), + "target": Jsonb(ref_name), + "on_type": True + }) + for ref_name in (symbol.value_references or []): + if is_internal(ref_name): + continue + db_deps.append({ + "source": Jsonb(symbol.name), + "target": Jsonb(ref_name), + "on_type": False + }) cursor.executemany( """ INSERT INTO dependency (source, target, on_type) - SELECT %(source)s, %(target)s, FALSE - WHERE EXISTS(SELECT 1 FROM symbol WHERE name = %(target)s) - ON CONFLICT DO NOTHING + VALUES (%(source)s, %(target)s, %(on_type)s) """, - values, + db_deps, ) - def load_declaration(module_name: LeanName): - declarations = project.load_info(module_name, Declaration) - cursor.execute( - """ - SELECT content FROM module WHERE name = %s - """, - (Jsonb(module_name),), - ) - module = cursor.fetchone() - if module is None: - logger.warn("couldn't find a module with name '%s'", Jsonb(module_name)) - return - (module_content,) = module - - db_declarations = [] - for index, decl in enumerate(declarations): - if is_internal(decl.name) or decl.kind == "proofWanted": - continue - db_declarations.append({ - "module_name": Jsonb(module_name), - "index" : index, - "name" : Jsonb(decl.name) if decl.kind != "example" else None, - "visible" : decl.modifiers.visibility != "private" and decl.kind != "example", - "docstring" : decl.modifiers.docstring, - "kind" : decl.kind, - "signature" : _get_signature(decl, module_content), - "value" : _get_value(decl, module_content), - }) - cursor.executemany( - """ - INSERT INTO declaration (module_name, index, name, visible, docstring, kind, signature, value) - VALUES (%(module_name)s, %(index)s, %(name)s, %(visible)s, %(docstring)s, %(kind)s, %(signature)s, %(value)s) ON CONFLICT DO NOTHING - """, - db_declarations, - ) - def topological_sort(): logger.info("performing topological sort") + # Delete dependencies where target doesn't exist in declaration table + cursor.execute(""" + DELETE FROM dependency d + WHERE NOT EXISTS (SELECT 1 FROM declaration dec WHERE dec.name = d.target) + """) + logger.info("Deleted %d invalid dependencies", cursor.rowcount) + cursor.execute(""" INSERT INTO level (symbol_name, level) SELECT name, 0 - FROM symbol v + FROM declaration v WHERE NOT EXISTS (SELECT 1 FROM dependency e WHERE e.source = v.name) """) while cursor.rowcount: @@ -160,10 +145,4 @@ def topological_sort(): plugins=["module", "declaration", "symbol"], )] load_module(lean_modules, path_to_lean) - - all_modules = lean_modules + project_modules - for m in all_modules: - load_symbol(m) - for m in all_modules: - load_declaration(m) topological_sort() From 0a12821d2d31cca1c1a7cada2c2db2fd9b8f6038 Mon Sep 17 00:00:00 2001 From: Evgenia Karunus Date: Thu, 8 May 2025 23:00:15 +0500 Subject: [PATCH 09/11] create_schema.py - merge tables informal+declaration [`make jixia_db` works] --- database/create_schema.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/database/create_schema.py b/database/create_schema.py index ab0c94d..bb37ca6 100644 --- a/database/create_schema.py +++ b/database/create_schema.py @@ -41,6 +41,9 @@ def create_schema(conn: Connection): symbol_type TEXT NOT NULL, symbol_is_prop BOOLEAN NOT NULL, + informal_name TEXT, + informal_description TEXT, + PRIMARY KEY (name) ) """, @@ -57,23 +60,7 @@ def create_schema(conn: Connection): symbol_name JSONB PRIMARY KEY REFERENCES declaration(name) NOT NULL, level INTEGER NOT NULL ) - """, """ - CREATE TABLE informal ( - symbol_name JSONB PRIMARY KEY REFERENCES declaration(name) NOT NULL, - name TEXT NOT NULL, - description TEXT NOT NULL - ) - """, - """ - CREATE VIEW record AS - SELECT - d.module_name, d.index, d.kind, d.name, d.signature, d.symbol_type, d.value, d.docstring, - i.name AS informal_name, i.description AS informal_description - FROM - declaration d - INNER JOIN informal i ON d.name = i.symbol_name - """, ] with conn.cursor() as cursor: From 16183bee70e5b858cc67d34a1e644fcadd499483 Mon Sep 17 00:00:00 2001 From: Evgenia Karunus Date: Fri, 9 May 2025 00:01:16 +0500 Subject: [PATCH 10/11] informalize.py - made `make informal` work with merged tables --- database/informalize.py | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/database/informalize.py b/database/informalize.py index 11123d7..d590d11 100644 --- a/database/informalize.py +++ b/database/informalize.py @@ -16,12 +16,10 @@ def find_neighbor(conn: Connection, module_name: LeanName, index: int, num_neigh with conn.cursor(row_factory=args_row(TranslatedItem)) as cursor: cursor.execute( """ - SELECT d.name, d.signature, i.name, i.description - FROM - declaration d - LEFT JOIN informal i ON d.name = i.symbol_name + SELECT name, signature, informal_name, informal_description + FROM declaration WHERE - d.module_name = %s AND d.index >= %s AND d.index <= %s + module_name = %s AND index >= %s AND index <= %s """, (Jsonb(module_name), index - num_neighbor, index + num_neighbor), ) @@ -32,11 +30,10 @@ def find_dependency(conn: Connection, name: LeanName) -> list[TranslatedItem]: with conn.cursor(row_factory=args_row(TranslatedItem)) as cursor: cursor.execute( """ - SELECT d.name, d.signature, i.name, i.description + SELECT name, signature, informal_name, informal_description FROM declaration d INNER JOIN dependency e ON d.name = e.target - LEFT JOIN informal i ON d.name = i.symbol_name WHERE e.source = %s """, @@ -57,15 +54,14 @@ def generate_informal(conn: Connection, batch_size: int = 50, limit_level: int | with conn.cursor() as cursor, conn.cursor() as insert_cursor: for level in range(max_level + 1): query = """ - SELECT s.name, d.signature, d.value, d.docstring, d.kind, m.docstring, d.module_name, d.index - FROM - symbol s - INNER JOIN declaration d ON s.name = d.name - INNER JOIN module m ON s.module_name = m.name - INNER JOIN level l ON s.name = l.symbol_name - WHERE - l.level = %s AND - (NOT EXISTS(SELECT 1 FROM informal i WHERE i.symbol_name = s.name)) + SELECT d.name, d.signature, d.value, d.docstring, d.kind, m.docstring, d.module_name, d.index + FROM + declaration d + INNER JOIN module m ON d.module_name = m.name + INNER JOIN level l ON d.name = l.symbol_name + WHERE + l.level = %s AND + (d.informal_description IS NULL OR d.informal_name IS NULL) """ if limit_num_per_level: cursor.execute(query + " LIMIT %s", (level, limit_num_per_level)) @@ -84,10 +80,15 @@ async def translate_and_insert(name: LeanName, data: TranslationInput): informal_name, informal_description = result insert_cursor.execute( """ - INSERT INTO informal (symbol_name, name, description) - VALUES (%s, %s, %s) + UPDATE declaration + SET informal_name = %(informal_name)s, informal_description = %(informal_description)s + WHERE name = %(name)s """, - (Jsonb(name), informal_name, informal_description), + { + "name": Jsonb(name), + "informal_name": informal_name, + "informal_description": informal_description + }, ) tasks.clear() From 6a01956393b17e7136879d5cc1dc69e71f8ff3ec Mon Sep 17 00:00:00 2001 From: Evgenia Karunus Date: Fri, 9 May 2025 00:04:49 +0500 Subject: [PATCH 11/11] vector_db.py - made `make index` work with merged tables --- database/vector_db.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/database/vector_db.py b/database/vector_db.py index d70c4e4..153e2e6 100644 --- a/database/vector_db.py +++ b/database/vector_db.py @@ -22,10 +22,9 @@ def create_vector_db(conn: Connection, path: str, batch_size: int): with conn.cursor() as cursor: cursor.execute(""" - SELECT d.module_name, d.index, d.kind, d.name, d.signature, i.name, i.description - FROM - declaration d INNER JOIN informal i ON d.name = i.symbol_name - WHERE d.visible = TRUE + SELECT module_name, index, kind, name, signature, informal_name, informal_description + FROM declaration + WHERE visible = TRUE """) batch_doc = []