From 3e6fdb9aaeb3cf44d0526a55bfa1c75b8492a98c Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Wed, 22 Oct 2025 12:54:14 -0700 Subject: [PATCH 1/2] Initial support for cypher --- fquery/cypher_builder.py | 73 ++++++++++++++++++++++++++++++++++++++++ fquery/query.py | 6 ++++ tests/test_cypher.py | 39 +++++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 fquery/cypher_builder.py create mode 100644 tests/test_cypher.py diff --git a/fquery/cypher_builder.py b/fquery/cypher_builder.py new file mode 100644 index 0000000..c647790 --- /dev/null +++ b/fquery/cypher_builder.py @@ -0,0 +1,73 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import ast +import operator + +from .visitor import Visitor + +# inspired from pandas.core.computation.ops +_cmp_ops_syms = (">", "<", ">=", "<=", "==", "!=") +_cmp_ops_funcs = ( + operator.gt, + operator.lt, + operator.ge, + operator.le, + operator.eq, + operator.ne, +) +_cmp_ops_dict = dict(zip(_cmp_ops_syms, _cmp_ops_funcs)) + + +class CypherBuilderVisitor(Visitor): + def __init__(self, id1s): + self.cypher = None + self.cypher_clauses = [] + self.visited = set() + + @staticmethod + def table_from_query(query): + query_name = query.__class__.__name__.lower() + # UserQuery -> user -> User + query_name = query_name.split("query")[0] + return query_name.capitalize() + + async def visit_leaf(self, query): + label = self.table_from_query(query) + sorted_clauses = sorted(self.cypher_clauses, key=lambda x: x[0]) + clauses = [c for _, c in sorted_clauses] + qstr = f"MATCH (u:{label})" + if clauses: + qstr += "\n" + "\n".join(clauses) + if query in self.visited: + # Prevent infinite recursion + return + else: + self.visited.add(query) + for q in query.edges: + await self.visit(q) + self.cypher = qstr + + async def visit_project(self, query): + proj = ", ".join([f"u.{x}" if x != ":id" else "u.id" for x in query.projector]) + self.cypher_clauses.append((2, f"RETURN {proj}")) + await self.visit(query.child) + + async def visit_take(self, query): + self.cypher_clauses.append((4, f"LIMIT {query._count}")) + await self.visit(query.child) + + async def visit_where(self, query): + # TODO: more general lazy expression evaluator + left, op, right = query._expr.value.split() + right = ast.literal_eval(right) + table, field = left.split(".") if "." in left else (self.cypher, left) + self.cypher_clauses.append((1, f"WHERE u.{field} {op} {right}")) + await self.visit(query.child) + + async def visit_order_by(self, query): + key = query._expr.value + table, field = key.split(".") if "." in key else (self.cypher, key) + self.cypher_clauses.append((3, f"ORDER BY u.{field}")) + await self.visit(query.child) \ No newline at end of file diff --git a/fquery/query.py b/fquery/query.py index 7341235..208ba50 100644 --- a/fquery/query.py +++ b/fquery/query.py @@ -12,6 +12,7 @@ from .async_utils import wait_for from .execute import AbstractSyntaxTreeVisitor from .malloy_builder import MalloyBuilderVisitor +from .cypher_builder import CypherBuilderVisitor from .polars_builder import PolarsBuilderVisitor from .sql_builder import SQLBuilderVisitor from .view_model import ViewModel, get_edges, get_return_type @@ -257,6 +258,11 @@ def to_malloy(self) -> str: wait_for(visitor.visit(self)) return visitor.malloy + def to_cypher(self) -> str: + visitor = CypherBuilderVisitor([]) + wait_for(visitor.visit(self)) + return visitor.cypher + def to_polars(self) -> Tree: visitor = PolarsBuilderVisitor([]) wait_for(visitor.visit(self)) diff --git a/tests/test_cypher.py b/tests/test_cypher.py new file mode 100644 index 0000000..7483e79 --- /dev/null +++ b/tests/test_cypher.py @@ -0,0 +1,39 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import ast +import random +import textwrap +import unittest + +from .mock_user import UserQuery + + +class CypherTests(unittest.TestCase): + def setUp(self): + random.seed(100) + self.maxDiff = None + + def test_project(self): + cypher_q = ( + UserQuery(range(1, 10)) + .project([":id", "name"]) + .where(ast.Expr("user.age >= 16")) + .order_by(ast.Expr("user.age")) + .take(3) + .to_cypher() + ) + expected = textwrap.dedent( + """\ + MATCH (u:User) + WHERE u.age >= 16 + RETURN u.id, u.name + ORDER BY u.age + LIMIT 3""" + ) + self.assertEqual(expected, cypher_q) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From 5d45cf86fda88b8e1572da665384952a344a527b Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Wed, 22 Oct 2025 13:05:25 -0700 Subject: [PATCH 2/2] Support more cypher queries --- README.md | 1 - fquery/cypher_builder.py | 114 +++++++++++++++++++---- fquery/query.py | 15 ++- tests/test_cypher.py | 44 ++++++++- tests/test_data/test_data_edge_count.txt | 1 - tests/test_data/test_data_edge_let.txt | 1 - 6 files changed, 154 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 213c147..817d9ba 100644 --- a/README.md +++ b/README.md @@ -44,4 +44,3 @@ fquery with Django and get easy access to graphql functionality This project is made available under the Apache License, version 2.0. See [LICENSE.txt](license.txt) for details. - diff --git a/fquery/cypher_builder.py b/fquery/cypher_builder.py index c647790..3f0c0e5 100644 --- a/fquery/cypher_builder.py +++ b/fquery/cypher_builder.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Arun Sharma, 2025 # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. @@ -23,8 +23,15 @@ class CypherBuilderVisitor(Visitor): def __init__(self, id1s): self.cypher = None - self.cypher_clauses = [] + self.match_parts = [] + self.current_node = "u" + self.where_clauses = [] + self.return_clause = "" + self.order_by_clause = "" + self.limit_clause = "" self.visited = set() + self.node_counter = 0 + self.root_label = None @staticmethod def table_from_query(query): @@ -33,13 +40,15 @@ def table_from_query(query): query_name = query_name.split("query")[0] return query_name.capitalize() + def _get_next_node_var(self): + self.node_counter += 1 + return f"n{self.node_counter}" + async def visit_leaf(self, query): - label = self.table_from_query(query) - sorted_clauses = sorted(self.cypher_clauses, key=lambda x: x[0]) - clauses = [c for _, c in sorted_clauses] - qstr = f"MATCH (u:{label})" - if clauses: - qstr += "\n" + "\n".join(clauses) + if not self.root_label: + self.root_label = self.table_from_query(query) + self.match_parts = [f"({self.current_node}:{self.root_label})"] + if query in self.visited: # Prevent infinite recursion return @@ -47,27 +56,100 @@ async def visit_leaf(self, query): self.visited.add(query) for q in query.edges: await self.visit(q) - self.cypher = qstr async def visit_project(self, query): - proj = ", ".join([f"u.{x}" if x != ":id" else "u.id" for x in query.projector]) - self.cypher_clauses.append((2, f"RETURN {proj}")) await self.visit(query.child) + proj = ", ".join( + [ + f"{self.current_node}.{x}" if x != ":id" else f"{self.current_node}.id" + for x in query.projector + ] + ) + self.return_clause = f"RETURN {proj}" async def visit_take(self, query): - self.cypher_clauses.append((4, f"LIMIT {query._count}")) await self.visit(query.child) + self.limit_clause = f"LIMIT {query._count}" async def visit_where(self, query): + await self.visit(query.child) # TODO: more general lazy expression evaluator left, op, right = query._expr.value.split() right = ast.literal_eval(right) table, field = left.split(".") if "." in left else (self.cypher, left) - self.cypher_clauses.append((1, f"WHERE u.{field} {op} {right}")) - await self.visit(query.child) + self.where_clauses.append(f"{self.current_node}.{field} {op} {right}") async def visit_order_by(self, query): + await self.visit(query.child) key = query._expr.value table, field = key.split(".") if "." in key else (self.cypher, key) - self.cypher_clauses.append((3, f"ORDER BY u.{field}")) - await self.visit(query.child) \ No newline at end of file + self.order_by_clause = f"ORDER BY {self.current_node}.{field}" + + async def visit_edge(self, query): + # Ensure we have the root label and initial match part + if not self.root_label: + # Find the root query + root_query = query + while hasattr(root_query, "child") and root_query.child: + root_query = root_query.child + if hasattr(root_query, "__class__"): + self.root_label = self.table_from_query(root_query) + self.match_parts = [f"({self.current_node}:{self.root_label})"] + + edge_name = query.edge_name + + # Check if this is part of a multi-hop pattern of the same edge type + # Look ahead to see if the child has another edge of the same type + has_same_edge_child = ( + hasattr(query, "child") + and hasattr(query.child, "OP") + and query.child.OP.name == "EDGE" + and query.child.edge_name == edge_name + ) + + if has_same_edge_child: + # Count the total number of consecutive edges of the same type + hops = 1 # current edge + current_query = query.child + while ( + hasattr(current_query, "OP") + and current_query.OP.name == "EDGE" + and current_query.edge_name == edge_name + ): + hops += 1 + current_query = current_query.child + + # This is the start of a multi-hop pattern (e.g., friend-of-friend, etc.) + self.match_parts = [ + f"(a:{self.root_label})", + f"[e:{edge_name.upper()}*{hops}..{hops}]", + f"(b:{self.root_label})", + ] + self.current_node = "b" + # Skip visiting the intermediate edges and visit the query after the chain + await self.visit(current_query) + else: + # Regular edge traversal + child_label = self.table_from_query(query._unbound) + next_node = self._get_next_node_var() + relationship = f"[:{edge_name.upper()}]" + self.match_parts.append(f"{relationship}->({next_node}:{child_label})") + self.current_node = next_node + await self.visit(query.child) + + async def visit_union(self, query): + # UNION in Cypher - this is complex, would need to handle multiple queries + # For now, just visit the child + await self.visit(query.child) + + async def visit_count(self, query): + await self.visit(query.child) + self.return_clause = "RETURN count(*)" + + async def visit_nest(self, query): + # Nesting - not directly supported in Cypher + await self.visit(query.child) + + async def visit_let(self, query): + # LET - for renaming, could be handled with AS in RETURN + await self.visit(query.child) diff --git a/fquery/query.py b/fquery/query.py index 208ba50..af39b01 100644 --- a/fquery/query.py +++ b/fquery/query.py @@ -10,9 +10,9 @@ from typing import Dict, List, Optional, Tuple, Type, Union from .async_utils import wait_for +from .cypher_builder import CypherBuilderVisitor from .execute import AbstractSyntaxTreeVisitor from .malloy_builder import MalloyBuilderVisitor -from .cypher_builder import CypherBuilderVisitor from .polars_builder import PolarsBuilderVisitor from .sql_builder import SQLBuilderVisitor from .view_model import ViewModel, get_edges, get_return_type @@ -261,7 +261,18 @@ def to_malloy(self) -> str: def to_cypher(self) -> str: visitor = CypherBuilderVisitor([]) wait_for(visitor.visit(self)) - return visitor.cypher + # Build the final query + match_pattern = "MATCH " + "-".join(visitor.match_parts) + qstr = match_pattern + if visitor.where_clauses: + qstr += "\nWHERE " + " AND ".join(visitor.where_clauses) + if visitor.return_clause: + qstr += "\n" + visitor.return_clause + if visitor.order_by_clause: + qstr += "\n" + visitor.order_by_clause + if visitor.limit_clause: + qstr += "\n" + visitor.limit_clause + return qstr def to_polars(self) -> Tree: visitor = PolarsBuilderVisitor([]) diff --git a/tests/test_cypher.py b/tests/test_cypher.py index 7483e79..c585db8 100644 --- a/tests/test_cypher.py +++ b/tests/test_cypher.py @@ -34,6 +34,48 @@ def test_project(self): ) self.assertEqual(expected, cypher_q) + def test_sync_edge_project(self): + cypher_q = ( + UserQuery(range(1, 5)) + .edge("friends") + .project(["name", ":id"]) + .take(3) + .to_cypher() + ) + expected = textwrap.dedent( + """\ + MATCH (u:User)-[:FRIENDS]->(n1:User) + RETURN n1.name, n1.id + LIMIT 3""" + ) + self.assertEqual(expected, cypher_q) + + def test_sync_two_hop_project(self): + cypher_q = ( + UserQuery([1]) + .edge("friends") + .edge("friends") + .project(["name", ":id"]) + .take(3) + .to_cypher() + ) + expected = textwrap.dedent( + """\ + MATCH (a:User)-[e:FRIENDS*2..2]-(b:User) + RETURN b.name, b.id + LIMIT 3""" + ) + self.assertEqual(expected, cypher_q) + + def test_count(self): + cypher_q = UserQuery(range(1, 10)).count().to_cypher() + expected = textwrap.dedent( + """\ + MATCH (u:User) + RETURN count(*)""" + ) + self.assertEqual(expected, cypher_q) + if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/test_data/test_data_edge_count.txt b/tests/test_data/test_data_edge_count.txt index 4a335f9..93eb13b 100644 --- a/tests/test_data/test_data_edge_count.txt +++ b/tests/test_data/test_data_edge_count.txt @@ -40,4 +40,3 @@ ] } ] - diff --git a/tests/test_data/test_data_edge_let.txt b/tests/test_data/test_data_edge_let.txt index 51f70f7..cd15950 100644 --- a/tests/test_data/test_data_edge_let.txt +++ b/tests/test_data/test_data_edge_let.txt @@ -40,4 +40,3 @@ ] } ] -