Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
249 changes: 218 additions & 31 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ members = [
"web3-utils",
"grc20-core",
"grc20-macros",
"grc20-sdk",
"grc20-sdk", "mcp-server",
]
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,13 @@ Schema introspection
npx get-graphql-schema http://127.0.0.1:8080/graphql > api/schema.graphql
```

## MCP Server
```bash
CFLAGS='-std=gnu17' cargo run --bin mcp-server -- \
--neo4j-uri neo4j://localhost:7687 \
--neo4j-user neo4j \
--neo4j-pass neo4j
```

## GRC20 CLI
Coming soon™️
9 changes: 5 additions & 4 deletions api/src/schema/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,12 +318,12 @@ impl RootQuery {
.map(|triple| Triple::new(triple, space_id, version_index)))
}

async fn search<'a, S: ScalarValue>(
async fn search_triples<'a, S: ScalarValue>(
&'a self,
executor: &'a Executor<'_, '_, KnowledgeGraph, S>,
query: String,
#[graphql(default = 100)] first: i32,
// #[graphql(default = 0)] skip: i32,
#[graphql(default = 0)] skip: i32,
) -> FieldResult<Vec<Triple>> {
let embedding = executor
.context()
Expand All @@ -336,8 +336,9 @@ impl RootQuery {
.map(|v| v as f64)
.collect::<Vec<_>>();

let query = mapping::triple::semantic_search(&executor.context().neo4j, embedding)
.limit(first as usize);
let query = mapping::triple::search(&executor.context().neo4j, embedding)
.limit(first as usize)
.skip(skip as usize);

Ok(query
.send()
Expand Down
1 change: 1 addition & 0 deletions grc20-core/src/mapping/entity/find_many.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ impl<T: FromAttributes> QueryStream<Entity<T>> for FindManyQuery<Entity<T>> {
"e",
"attrs",
"types",
None,
"RETURN e{.*, attrs: attrs, types: types}",
),
);
Expand Down
1 change: 1 addition & 0 deletions grc20-core/src/mapping/entity/find_one.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ impl<T: FromAttributes> Query<Option<Entity<T>>> for FindOneQuery<Entity<T>> {
"e",
"attrs",
"types",
None,
"RETURN e{.*, attrs: attrs, types: types}",
),
)
Expand Down
6 changes: 6 additions & 0 deletions grc20-core/src/mapping/entity/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@ pub mod find_one;
pub mod insert_many;
pub mod insert_one;
pub mod models;
pub mod semantic_search;
pub mod utils;

pub use delete_one::DeleteOneQuery;
pub use find_many::FindManyQuery;
pub use find_one::FindOneQuery;
pub use insert_one::InsertOneQuery;
pub use models::{Entity, EntityNode, EntityNodeRef, SystemProperties};
pub use semantic_search::SemanticSearchQuery;
pub use utils::{EntityFilter, EntityRelationFilter};

use crate::block::BlockMetadata;
Expand Down Expand Up @@ -40,6 +42,10 @@ pub fn find_many<T>(neo4j: &neo4rs::Graph) -> FindManyQuery<T> {
FindManyQuery::new(neo4j)
}

pub fn search<T>(neo4j: &neo4rs::Graph, vector: Vec<f64>) -> SemanticSearchQuery<T> {
SemanticSearchQuery::new(neo4j, vector)
}

pub fn insert_one<T>(
neo4j: &neo4rs::Graph,
block: &BlockMetadata,
Expand Down
218 changes: 218 additions & 0 deletions grc20-core/src/mapping/entity/semantic_search.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
use futures::{Stream, StreamExt, TryStreamExt};

use crate::{
entity::utils::MatchEntity,
error::DatabaseError,
mapping::{
query_utils::VersionFilter, AttributeNode, FromAttributes, PropFilter, QueryBuilder,
QueryStream, Subquery,
},
};

use super::{Entity, EntityFilter, EntityNode};

pub struct SemanticSearchQuery<T> {
neo4j: neo4rs::Graph,
vector: Vec<f64>,
filter: EntityFilter,
space_id: Option<PropFilter<String>>,
version: VersionFilter,
limit: usize,
skip: Option<usize>,

_marker: std::marker::PhantomData<T>,
}

impl<T> SemanticSearchQuery<T> {
pub fn new(neo4j: &neo4rs::Graph, vector: Vec<f64>) -> Self {
Self {
neo4j: neo4j.clone(),
vector,
filter: EntityFilter::default(),
space_id: None,
version: VersionFilter::default(),
limit: 100,
skip: None,

_marker: std::marker::PhantomData,
}
}

pub fn filter(mut self, filter: EntityFilter) -> Self {
self.filter = filter;
self
}

pub fn space_id(mut self, filter: PropFilter<String>) -> Self {
self.space_id = Some(filter);
self
}

pub fn version(mut self, version: impl Into<String>) -> Self {
self.version.version_mut(version.into());
self
}

pub fn limit(mut self, limit: usize) -> Self {
self.limit = limit;
self
}

pub fn limit_opt(mut self, limit: Option<usize>) -> Self {
if let Some(limit) = limit {
self.limit = limit;
}
self
}

pub fn skip(mut self, skip: usize) -> Self {
self.skip = Some(skip);
self
}

pub fn skip_opt(mut self, skip: Option<usize>) -> Self {
self.skip = skip;
self
}

fn subquery(&self) -> QueryBuilder {
const QUERY: &str = r#"
CALL db.index.vector.queryNodes('vector_index', $limit * $effective_search_ratio, $vector)
YIELD node AS n, score AS score
MATCH (e:Entity) -[r:ATTRIBUTE]-> (n)
"#;

// Exact neighbor search using vector index (very expensive but allows prefiltering)
// const QUERY: &str = const_format::formatcp!(
// r#"
// MATCH (e:Entity) -[r:ATTRIBUTE]-> (a:Attribute:Indexed)
// WHERE r.max_version IS null
// AND a.embedding IS NOT NULL
// WITH e, a, r, vector.similarity.cosine(a.embedding, $vector) AS score
// ORDER BY score DESC
// WHERE score IS NOT null
// LIMIT $limit
// RETURN a{{.*, entity: e.id, space_version: r.min_version, space_id: r.space_id, score: score}}
// "#,
// );

QueryBuilder::default()
.subquery(QUERY)
.subquery(self.filter.subquery("e"))
.limit(self.limit)
.skip_opt(self.skip)
.params("vector", self.vector.clone())
.params("effective_search_ratio", EFFECTIVE_SEARCH_RATIO)
.params("limit", self.limit as i64)
}
}

#[derive(Clone, Debug, PartialEq)]
pub struct SemanticSearchResult<T> {
pub entity: T,
pub score: f64,
}

const EFFECTIVE_SEARCH_RATIO: f64 = 10000.0; // Adjust this ratio based on your needs

impl QueryStream<SemanticSearchResult<EntityNode>> for SemanticSearchQuery<EntityNode> {
async fn send(
self,
) -> Result<
impl Stream<Item = Result<SemanticSearchResult<EntityNode>, DatabaseError>>,
DatabaseError,
> {
let query = self.subquery().r#return("DISTINCT e, score");

if cfg!(debug_assertions) || cfg!(test) {
tracing::info!(
"entity_node::FindManyQuery::<EntityNode>:\n{}\nparams:{:?}",
query.compile(),
query.params()
);
};

#[derive(Debug, serde::Deserialize)]
struct RowResult {
e: EntityNode,
score: f64,
}

Ok(self
.neo4j
.execute(query.build())
.await?
.into_stream_as::<RowResult>()
.map_err(DatabaseError::from)
.and_then(|row| async move {
Ok(SemanticSearchResult {
entity: row.e,
score: row.score,
})
}))
}
}

impl<T: FromAttributes> QueryStream<SemanticSearchResult<Entity<T>>>
for SemanticSearchQuery<Entity<T>>
{
async fn send(
self,
) -> Result<
impl Stream<Item = Result<SemanticSearchResult<Entity<T>>, DatabaseError>>,
DatabaseError,
> {
let match_entity = MatchEntity::new(&self.space_id, &self.version);

let query = self.subquery().with(
vec!["e".to_string(), "score".to_string()],
match_entity.chain(
"e",
"attrs",
"types",
Some(vec!["score".to_string()]),
"RETURN e{.*, attrs: attrs, types: types, score: score}",
),
);

if cfg!(debug_assertions) || cfg!(test) {
tracing::info!(
"entity_node::FindManyQuery::<Entity<T>>:\n{}\nparams:{:?}",
query.compile(),
query.params
);
};

#[derive(Debug, serde::Deserialize)]
struct RowResult {
#[serde(flatten)]
node: EntityNode,
attrs: Vec<AttributeNode>,
types: Vec<EntityNode>,
score: f64,
}

let stream = self
.neo4j
.execute(query.build())
.await?
.into_stream_as::<RowResult>()
.map_err(DatabaseError::from)
.map(|row_result| {
row_result.and_then(|row| {
T::from_attributes(row.attrs.into())
.map(|data| SemanticSearchResult {
entity: Entity {
node: row.node,
attributes: data,
types: row.types.into_iter().map(|t| t.id).collect(),
},
score: row.score,
})
.map_err(DatabaseError::from)
})
});

Ok(stream)
}
}
25 changes: 15 additions & 10 deletions grc20-core/src/mapping/entity/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ pub struct EntityFilter {
pub(crate) id: Option<PropFilter<String>>,
pub(crate) attributes: Vec<AttributeFilter>,
pub(crate) relations: Option<EntityRelationFilter>,
/// Used to check if the entity exists in the space (i.e.: the entity
/// has at least one attribute in the space).
pub(crate) space_id: Option<PropFilter<String>>,
}

Expand Down Expand Up @@ -284,6 +286,7 @@ impl<'a> MatchEntity<'a> {
node_var: impl Into<String>,
attributes_node_var: impl Into<String>,
types_node_var: impl Into<String>,
extra_vars: Option<Vec<String>>,
next: impl Subquery,
) -> QueryBuilder {
let node_var = node_var.into();
Expand All @@ -294,21 +297,23 @@ impl<'a> MatchEntity<'a> {
// let attributes_node_var = format!("{entity_node_var}_attributes");
// let types_node_var = format!("{entity_node_var}_types");

let with_vars = vec![
node_var.clone(),
format!("COLLECT(DISTINCT {attributes_node_var}{{.*}}) AS {attributes_node_var}"),
format!("COLLECT(DISTINCT {types_node_var}{{.*}}) AS {types_node_var}"),
];
let with_vars = if let Some(extra_vars) = extra_vars {
with_vars.into_iter().chain(extra_vars).collect()
} else {
with_vars
};

QueryBuilder::default()
.subquery(
self.match_attributes
.subquery(&node_var, &attributes_node_var),
)
.subquery(self.match_types.subquery(&node_var, &types_node_var))
.with(
vec![
node_var,
format!(
"COLLECT(DISTINCT {attributes_node_var}{{.*}}) AS {attributes_node_var}"
),
format!("COLLECT(DISTINCT {types_node_var}{{.*}}) AS {types_node_var}"),
],
next,
)
.with(with_vars, next)
}
}
Loading