diff --git a/grc20-core/src/mapping/entity/mod.rs b/grc20-core/src/mapping/entity/mod.rs index 93b8efb..3a103f9 100644 --- a/grc20-core/src/mapping/entity/mod.rs +++ b/grc20-core/src/mapping/entity/mod.rs @@ -6,6 +6,7 @@ pub mod find_path; pub mod insert_many; pub mod insert_one; pub mod models; +pub mod prefiltered_semantic_search; pub mod search_with_traversals; pub mod semantic_search; pub mod utils; @@ -16,6 +17,7 @@ pub use find_one::FindOneQuery; pub use find_path::FindPathQuery; pub use insert_one::InsertOneQuery; pub use models::{Entity, EntityNode, EntityNodeRef, SystemProperties}; +pub use prefiltered_semantic_search::PrefilteredSemanticSearchQuery; pub use search_with_traversals::SearchWithTraversals; pub use semantic_search::SemanticSearchQuery; pub use utils::{EntityFilter, EntityRelationFilter, TypesFilter}; @@ -132,6 +134,13 @@ pub fn search(neo4j: &neo4rs::Graph, vector: Vec) -> SemanticSearchQuery SemanticSearchQuery::new(neo4j, vector) } +pub fn prefiltered_search( + neo4j: &neo4rs::Graph, + vector: Vec, +) -> PrefilteredSemanticSearchQuery { + PrefilteredSemanticSearchQuery::new(neo4j, vector) +} + pub fn search_from_restictions( neo4j: &neo4rs::Graph, vector: Vec, diff --git a/grc20-core/src/mapping/entity/prefiltered_semantic_search.rs b/grc20-core/src/mapping/entity/prefiltered_semantic_search.rs new file mode 100644 index 0000000..55b0cdd --- /dev/null +++ b/grc20-core/src/mapping/entity/prefiltered_semantic_search.rs @@ -0,0 +1,212 @@ +use futures::{Stream, StreamExt, TryStreamExt}; + +use crate::{ + entity::utils::MatchEntity, + error::DatabaseError, + mapping::{ + query_utils::VersionFilter, AttributeNode, FromAttributes, PropFilter, QueryBuilder, + QueryStream, Subquery, + }, +}; + +use super::{Entity, EntityFilter, EntityNode}; + +pub struct PrefilteredSemanticSearchQuery { + neo4j: neo4rs::Graph, + vector: Vec, + filter: EntityFilter, + space_id: Option>, + version: VersionFilter, + threshold: f64, + limit: usize, + skip: Option, + + _marker: std::marker::PhantomData, +} + +impl PrefilteredSemanticSearchQuery { + pub fn new(neo4j: &neo4rs::Graph, vector: Vec) -> Self { + Self { + neo4j: neo4j.clone(), + vector, + filter: EntityFilter::default(), + space_id: None, + version: VersionFilter::default(), + limit: 100, + threshold: 0.75, + skip: None, + + _marker: std::marker::PhantomData, + } + } + + pub fn filter(mut self, filter: EntityFilter) -> Self { + self.filter = filter; + self + } + + pub fn space_id(mut self, filter: PropFilter) -> Self { + self.space_id = Some(filter); + self + } + + pub fn version(mut self, version: impl Into) -> Self { + self.version.version_mut(version.into()); + self + } + + pub fn limit(mut self, limit: usize) -> Self { + self.limit = limit; + self + } + + pub fn limit_opt(mut self, limit: Option) -> Self { + if let Some(limit) = limit { + self.limit = limit; + } + self + } + + pub fn threshold(mut self, threshold: f64) -> Self { + self.threshold = threshold; + self + } + + pub fn skip(mut self, skip: usize) -> Self { + self.skip = Some(skip); + self + } + + pub fn skip_opt(mut self, skip: Option) -> Self { + self.skip = skip; + self + } + + fn subquery(&self) -> QueryBuilder { + const QUERY: &str = const_format::formatcp!( + r#" + MATCH (e:Entity) -[r:ATTRIBUTE]-> (a:Attribute:Indexed) + WHERE r.max_version IS null + AND a.embedding IS NOT NULL + WITH e, a, r, vector.similarity.cosine(a.embedding, $vector) AS score + ORDER BY score DESC + WHERE score > $threshold + "#, + ); + + QueryBuilder::default() + .subquery(self.filter.subquery("e")) + .subquery(QUERY) + .limit(self.limit) + .skip_opt(self.skip) + .params("vector", self.vector.clone()) + .params("limit", self.limit as i64) + .params("threshold", self.threshold) + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct SemanticSearchResult { + pub entity: T, + pub score: f64, +} +impl QueryStream> for PrefilteredSemanticSearchQuery { + async fn send( + self, + ) -> Result< + impl Stream, DatabaseError>>, + DatabaseError, + > { + let query = self.subquery().r#return("DISTINCT e, score"); + + if cfg!(debug_assertions) || cfg!(test) { + tracing::info!( + "entity_node::PrefilteredSemanticSearch:::\n{}", + query.compile() + ); + }; + + #[derive(Debug, serde::Deserialize)] + struct RowResult { + e: EntityNode, + score: f64, + } + + Ok(self + .neo4j + .execute(query.build()) + .await? + .into_stream_as::() + .map_err(DatabaseError::from) + .and_then(|row| async move { + Ok(SemanticSearchResult { + entity: row.e, + score: row.score, + }) + })) + } +} + +impl QueryStream>> + for PrefilteredSemanticSearchQuery> +{ + async fn send( + self, + ) -> Result< + impl Stream>, DatabaseError>>, + DatabaseError, + > { + let match_entity = MatchEntity::new(&self.space_id, &self.version); + + let query = self.subquery().with( + vec!["e".to_string(), "score".to_string()], + match_entity.chain( + "e", + "attrs", + "types", + Some(vec!["score".to_string()]), + "RETURN e{.*, attrs: attrs, types: types, score: score}", + ), + ); + + if cfg!(debug_assertions) || cfg!(test) { + tracing::info!( + "entity_node::PrefilteredSemanticSearch::>:\n{}\nparams:{:?}", + query.compile(), + query.params + ); + }; + + #[derive(Debug, serde::Deserialize)] + struct RowResult { + #[serde(flatten)] + node: EntityNode, + attrs: Vec, + types: Vec, + score: f64, + } + + let stream = self + .neo4j + .execute(query.build()) + .await? + .into_stream_as::() + .map_err(DatabaseError::from) + .map(|row_result| { + row_result.and_then(|row| { + T::from_attributes(row.attrs.into()) + .map(|data| SemanticSearchResult { + entity: Entity { + node: row.node, + attributes: data, + types: row.types.into_iter().map(|t| t.id).collect(), + }, + score: row.score, + }) + .map_err(DatabaseError::from) + }) + }); + + Ok(stream) + } +} diff --git a/grc20-core/src/mapping/entity/search_with_traversals.rs b/grc20-core/src/mapping/entity/search_with_traversals.rs index ade6741..1e08c76 100644 --- a/grc20-core/src/mapping/entity/search_with_traversals.rs +++ b/grc20-core/src/mapping/entity/search_with_traversals.rs @@ -86,7 +86,7 @@ impl SearchWithTraversals { fn subquery(&self) -> QueryBuilder { const QUERY: &str = r#" - CALL db.index.vector.queryNodes('vector_index', $limit * $effective_search_ratio, $vector) + CALL db.index.vector.queryNodes('vector_index', $effective_search_ratio, $vector) YIELD node AS n, score AS score WHERE score > $threshold MATCH (e:Entity) -[r:ATTRIBUTE]-> (n) diff --git a/grc20-core/src/mapping/entity/semantic_search.rs b/grc20-core/src/mapping/entity/semantic_search.rs index 22c181a..a0155a6 100644 --- a/grc20-core/src/mapping/entity/semantic_search.rs +++ b/grc20-core/src/mapping/entity/semantic_search.rs @@ -77,7 +77,7 @@ impl SemanticSearchQuery { fn subquery(&self) -> QueryBuilder { const QUERY: &str = r#" - CALL db.index.vector.queryNodes('vector_index', $limit * $effective_search_ratio, $vector) + CALL db.index.vector.queryNodes('vector_index', $effective_search_ratio, $vector) YIELD node AS n, score AS score MATCH (e:Entity) -[r:ATTRIBUTE]-> (n) "#; @@ -123,9 +123,8 @@ impl QueryStream> for SemanticSearchQuery:\n{}\nparams:{:?}", - query.compile(), - query.params() + "entity_node::FindManyQuery:::\n{}", + query.compile() ); }; diff --git a/grc20-core/src/mapping/mod.rs b/grc20-core/src/mapping/mod.rs index dc9f1fd..3ee43e4 100644 --- a/grc20-core/src/mapping/mod.rs +++ b/grc20-core/src/mapping/mod.rs @@ -28,7 +28,7 @@ pub use value::{Options, Value, ValueType}; use crate::{error::DatabaseError, indexer_ids}; -pub const EFFECTIVE_SEARCH_RATIO: f64 = 100000.0; +pub const EFFECTIVE_SEARCH_RATIO: f64 = 1000000.0; pub fn new_version_index(block_number: u64, idx: usize) -> String { format!("{block_number:016}:{idx:04}") diff --git a/grc20-core/src/mapping/relation/utils.rs b/grc20-core/src/mapping/relation/utils.rs index fba8398..ee3f848 100644 --- a/grc20-core/src/mapping/relation/utils.rs +++ b/grc20-core/src/mapping/relation/utils.rs @@ -43,7 +43,7 @@ impl RelationFilter { .map(|from_filter| from_filter.subquery(from)), ) .subquery_opt(self.to_.as_ref().map(|to_filter| to_filter.subquery(to))) - .subquery(MatchQuery::new(format!( + .subquery(MatchQuery::new_optional(format!( "(rt:Entity {{id: {edge_var}.relation_type}})" ))) .subquery_opt(self.relation_type.as_ref().map(|rt| rt.subquery("rt"))) diff --git a/grc20-core/src/mapping/triple.rs b/grc20-core/src/mapping/triple.rs index c917c9b..dd3485e 100644 --- a/grc20-core/src/mapping/triple.rs +++ b/grc20-core/src/mapping/triple.rs @@ -688,7 +688,7 @@ impl QueryStream for SemanticSearchQuery { { const QUERY: &str = const_format::formatcp!( r#" - CALL db.index.vector.queryNodes('vector_index', $limit * $effective_search_ratio, $vector) + CALL db.index.vector.queryNodes('vector_index', $effective_search_ratio, $vector) YIELD node AS n, score AS score ORDER BY score DESC LIMIT $limit diff --git a/mcp-server/resources/get_entity_info_description.md b/mcp-server/resources/get_entity_info_description.md index 97bf061..fc17802 100644 --- a/mcp-server/resources/get_entity_info_description.md +++ b/mcp-server/resources/get_entity_info_description.md @@ -35,87 +35,27 @@ ToolResult> "name": "Product Engineer at Geo", "relation_id": "KPTqdNpCusxfM37KbKPX8w", "relation_type": "Related spaces" - }, - { - "id": "NcQ3h9jeJSavVd8iFsUxvD", - "name": "Senior Civil Engineer @ Golden Gate Bridge, Highway & Transportation District", - "relation_id": "AqpNtJ3XxaY4fqRCyoXbdt", - "relation_type": "Cities" - }, - { - "id": "4ojV4dS1pV2tRnzXTpcMKJ", - "name": "Senior Plan Check Engineer (FT - Hybrid) @ CSG Consultants, Inc.", - "relation_id": "3AX4j43nywT5eBRV3s6AXi", - "relation_type": "Cities" - }, - { - "id": "QoakYWCuv85FVuYdSmonxr", - "name": "Senior Civil Engineer - Land Development (FT - Hybrid) @ CSG Consultants, Inc.", - "relation_id": "8GEF1i3LK4Z56THjE8dVku", - "relation_type": "Cities" - }, - { - "id": "JuV7jLoypebzLhkma6oZoU", - "name": "Lead Django Backend Engineer @ Textme Inc", - "relation_id": "46aBsQyBq15DimJ2i1DX4a", - "relation_type": "Cities" - }, - { - "id": "RTmcYhLVmmfgUn9L3D1J3y", - "name": "Chief Engineer @ Wyndham Hotels & Resorts", - "relation_id": "8uYxjzkkdjskDQAeTQomvc", - "relation_type": "Cities" - } + }, ... ], "outbound_relations": [ - { - "id": "CUoEazCD7EmzXPTFFY8gGY", - "name": "No name", - "relation_id": "5WeSkkE1XXvGJGmXj9VUQ8", - "relation_type": "Cover" - }, { "id": "7gzF671tq5JTZ13naG4tnr", "name": "Space", "relation_id": "WUZCXE1UGRtxdNQpGug8Tf", "relation_type": "Types" }, + { + "id": "CUoEazCD7EmzXPTFFY8gGY", + "name": "No name", + "relation_id": "5WeSkkE1XXvGJGmXj9VUQ8", + "relation_type": "Cover" + }, { "id": "D6Wy4bdtdoUrG3PDZceHr", "name": "City", "relation_id": "ARMj8fjJtdCwbtZa1f3jwe", "relation_type": "Types" - }, - { - "id": "AhidiWYnQ8fAbHqfzdU74k", - "name": "Upcoming events", - "relation_id": "V1ikGW9riu7dAP8rMgZq3u", - "relation_type": "Blocks" - }, - { - "id": "T6iKbwZ17iv4dRdR9Qw7qV", - "name": "Trending restaurants", - "relation_id": "CvGXCmGXE7ofsgZeWad28p", - "relation_type": "Blocks" - }, - { - "id": "X18WRE36mjwQ7gu3LKaLJS", - "name": "Neighborhoods", - "relation_id": "Uxpsee9LoTgJqMFfAQyJP6", - "relation_type": "Blocks" - }, - { - "id": "HeC2pygci2tnvjTt5aEnBV", - "name": "Top goals", - "relation_id": "5WMTAzCnZH9Bsevou9GQ3K", - "relation_type": "Blocks" - }, - { - "id": "5YtYFsnWq1jupvh5AjM2ni", - "name": "Culture", - "relation_id": "5TmxfepRr1THMRkGWenj5G", - "relation_type": "Tabs" - } + }, ... ] } ``` diff --git a/mcp-server/resources/name_search_entity_description.md b/mcp-server/resources/name_search_entity_description.md deleted file mode 100644 index 8377684..0000000 --- a/mcp-server/resources/name_search_entity_description.md +++ /dev/null @@ -1,81 +0,0 @@ -This request allows you to get Entities from a name/description search and traversal from that query by using relation name. - -Example Query: Find employees that works at The Graph. - -ToolCall> -``` -name_search_entity( - { - "query": "The Graph", - "traversal_filter": { - "relation_type_id": "Works at", - "direction": "From" - } - } -) -``` - -ToolResult> -``` -{ - "entities": [ - { - "description": "Founder & CEO of Geo. Cofounder of The Graph, Edge & Node, House of Web3. Building a vibrant decentralized future.", - "id": "9HsfMWYHr9suYdMrtssqiX", - "name": "Yaniv Tal" - }, - { - "description": "Developer Relations Engineer", - "id": "22MGz47c9WHtRiHuSEPkcG", - "name": "Kevin Jones" - }, - { - "description": "Description will go here", - "id": "JYTfEcdmdjiNzBg469gE83", - "name": "Pedro Diogo" - } - ] -} -``` - -Example Query: Find all the articles written by employees that works at The Graph. - -ToolCall> -``` -name_search_entity( - { - "query": "The Graph", - "traversal_filter": { - "relation_type_id": "Works at", - "direction": "From", - "traversal_filter": { - "relation_type_id": "Author", - "direction": "From" - } - } - } -) -``` - -ToolResult> -``` -{ - "entities": [ - { - "description": "A fresh look at what web3 is and what the missing pieces have been for making it a reality.", - "id": "XYo6aR3VqFQSEcf6AeTikW", - "name": "Knowledge graphs are web3" - }, - { - "description": "A new standard is here for structuring knowledge. GRC-20 will reshape how we make applications composable and redefine web3.", - "id": "5FkVvS4mTz6Ge7wHkAUMRk", - "name": "Introducing GRC-20: A knowledge graph standard for web3" - }, - { - "description": "How do you know what is true? Who do you trust? Everybody has a point of view, but no one is an authority. As humanity we need a way to aggregate our knowledge into something we can trust. We need a system.", - "id": "5WHP8BuoCdSiqtfy87SYWG", - "name": "Governing public knowledge" - } - ] -} -``` diff --git a/mcp-server/resources/search_entity_description.md b/mcp-server/resources/search_entity_description.md index 6ba6e33..ae3a46a 100644 --- a/mcp-server/resources/search_entity_description.md +++ b/mcp-server/resources/search_entity_description.md @@ -1,103 +1,80 @@ -This request allows you to get the Entities from a name/description search and traversal from that query if needed. +This request allows you to get Entities from a name/description search and traversal from that query by using relation name. - -Example Query: Can you give me information about San Francisco? +Example Query: Find employees that works at The Graph. ToolCall> ``` -search_entity({ -"query": "San Francisco" -}) +search_entity( + { + "query": "The Graph", + "traversal_filter": { + "relation_type": "Works at", + "direction": "From" + } + } +) ``` -Tool Result> + +ToolResult> ``` { "entities": [ { - "description": "A vibrant city known for its iconic Golden Gate Bridge, steep rolling hills, historic cable cars, and a rich cultural tapestry including diverse neighborhoods like the Castro and the Mission District.", - "id": "3qayfdjYyPv1dAYf8gPL5r", - "name": "San Francisco" - }, - { - "description": null, - "id": "W5ZEpuy3Tij1XSXtJLruQ5", - "name": "SF Bay Area" - }, - { - "description": null, - "id": "RHoJT3hNVaw7m5fLLtZ8WQ", - "name": "California" - }, - { - "description": null, - "id": "Sh1qtjr4i92ZD6YGPeu5a2", - "name": "Abundant housing in San Francisco" - }, - { - "description": null, - "id": "UqLf9fTVKHkDs3LzP9zHpH", - "name": "Public safety in San Francisco" - }, - { - "description": null, - "id": "BeyiZ6oLqLMaSXiG41Yxtf", - "name": "City" - }, - { - "description": null, - "id": "D6Wy4bdtdoUrG3PDZceHr", - "name": "City" - }, - { - "description": null, - "id": "JWVrgUXmjS75PqNX2hry5q", - "name": "Clean streets in San Francisco" + "description": "Founder & CEO of Geo. Cofounder of The Graph, Edge & Node, House of Web3. Building a vibrant decentralized future.", + "id": "9HsfMWYHr9suYdMrtssqiX", + "name": "Yaniv Tal" }, { - "description": null, - "id": "DcA2c7ooFTgEdtaRcaj7Z1", - "name": "Revitalizing downtown San Francisco" + "description": "Developer Relations Engineer", + "id": "22MGz47c9WHtRiHuSEPkcG", + "name": "Kevin Jones" }, { - "description": null, - "id": "KWBLj9czHBBmYUT98rnxVM", - "name": "Location" + "description": "Description will go here", + "id": "JYTfEcdmdjiNzBg469gE83", + "name": "Pedro Diogo" } ] } ``` -Another Query: Give me the employees that work at The Graph? +Example Query: Find all the articles written by employees that works at The Graph. -Work_at id: U1uCAzXsRSTP4vFwo1JwJG ToolCall> ``` -search_entity({ -"query": "The Graph", -"traversal_filter": { - "relation_type_id": "U1uCAzXsRSTP4vFwo1JwJG", - "direction": "From" -} -}) +search_entity( + { + "query": "The Graph", + "traversal_filter": { + "relation_type": "Works at", + "direction": "From", + "traversal_filter": { + "relation_type": "Author", + "direction": "From" + } + } + } +) ``` + ToolResult> ``` { "entities": [ { - "description": "Founder & CEO of Geo. Cofounder of The Graph, Edge & Node, House of Web3. Building a vibrant decentralized future.", - "id": "9HsfMWYHr9suYdMrtssqiX", - "name": "Yaniv Tal" + "description": "A fresh look at what web3 is and what the missing pieces have been for making it a reality.", + "id": "XYo6aR3VqFQSEcf6AeTikW", + "name": "Knowledge graphs are web3" }, { - "description": "Developer Relations Engineer", - "id": "22MGz47c9WHtRiHuSEPkcG", - "name": "Kevin Jones" + "description": "A new standard is here for structuring knowledge. GRC-20 will reshape how we make applications composable and redefine web3.", + "id": "5FkVvS4mTz6Ge7wHkAUMRk", + "name": "Introducing GRC-20: A knowledge graph standard for web3" }, { - "description": "Description will go here", - "id": "JYTfEcdmdjiNzBg469gE83", - "name": "Pedro Diogo" + "description": "How do you know what is true? Who do you trust? Everybody has a point of view, but no one is an authority. As humanity we need a way to aggregate our knowledge into something we can trust. We need a system.", + "id": "5WHP8BuoCdSiqtfy87SYWG", + "name": "Governing public knowledge" } ] } diff --git a/mcp-server/resources/search_entity_using_ids_description.md b/mcp-server/resources/search_entity_using_ids_description.md new file mode 100644 index 0000000..740f54a --- /dev/null +++ b/mcp-server/resources/search_entity_using_ids_description.md @@ -0,0 +1,38 @@ +This request allows you to get the Entities from a name/description search and traversal from that query if needed. + + +Example Query: Give me the employees that work at The Graph? + +Work_at id: U1uCAzXsRSTP4vFwo1JwJG +ToolCall> +``` +search_entity_using_ids({ +"query": "The Graph", +"traversal_filter": { + "relation_type": "U1uCAzXsRSTP4vFwo1JwJG", + "direction": "From" +} +}) +``` +ToolResult> +``` +{ + "entities": [ + { + "description": "Founder & CEO of Geo. Cofounder of The Graph, Edge & Node, House of Web3. Building a vibrant decentralized future.", + "id": "9HsfMWYHr9suYdMrtssqiX", + "name": "Yaniv Tal" + }, + { + "description": "Developer Relations Engineer", + "id": "22MGz47c9WHtRiHuSEPkcG", + "name": "Kevin Jones" + }, + { + "description": "Description will go here", + "id": "JYTfEcdmdjiNzBg469gE83", + "name": "Pedro Diogo" + } + ] +} +``` diff --git a/mcp-server/resources/search_properties_description.md b/mcp-server/resources/search_properties_description.md deleted file mode 100644 index 7b1c41f..0000000 --- a/mcp-server/resources/search_properties_description.md +++ /dev/null @@ -1,25 +0,0 @@ -This request allows you to search by name for the ATTRIBUTES (properties) that can be used to describe an Entity. - - -ToolCall> search_properties("Authors") -ToolResult> -``` -[ - [ - { - "attribute_name": "Name", - "attribute_value": "Authors", - "entity_id": "JzFpgguvcCaKhbQYPHsrNT" - } - ], - [ - { - "attribute_name": "Name", - "attribute_value": "Owners", - "entity_id": "RwDfM3vUvyLwSNYv6sWhc9" - } - ] -] -``` - -Since all the Relations are also of the type Entity. they can be queried by their id for more information. diff --git a/mcp-server/resources/search_relation_type_description.md b/mcp-server/resources/search_relation_type_description.md index 02134b4..e709bcf 100644 --- a/mcp-server/resources/search_relation_type_description.md +++ b/mcp-server/resources/search_relation_type_description.md @@ -4,40 +4,26 @@ ToolCall> search_relation_types("works at") ToolResult> ``` [ - [ - { - "attribute_name": "Name", - "attribute_value": "Works at", - "entity_id": "U1uCAzXsRSTP4vFwo1JwJG" - }, - { - "attribute_name": "Is type property", - "attribute_value": "0", - "entity_id": "U1uCAzXsRSTP4vFwo1JwJG" - } - ], - [ - { - "attribute_name": "Name", - "attribute_value": "Worked at", - "entity_id": "8fvqALeBDwEExJsDeTcvnV" - }, - { - "attribute_name": "Is type property", - "attribute_value": "0", - "entity_id": "8fvqALeBDwEExJsDeTcvnV" - }, - { - "attribute_name": "Name", - "attribute_value": "Worked at", - "entity_id": "8fvqALeBDwEExJsDeTcvnV" - }, - { - "attribute_name": "Description", - "attribute_value": "A project that someone worked at in the past. Details about the role can be added as properties on the relation.", - "entity_id": "8fvqALeBDwEExJsDeTcvnV" - } - ] + { + "description": null, + "entity_id": "U1uCAzXsRSTP4vFwo1JwJG", + "name": "Works at" + }, + { + "description": "A project that someone worked at in the past. Details about the role can be added as properties on the relation.", + "entity_id": "8fvqALeBDwEExJsDeTcvnV", + "name": "Worked at" + }, + { + "description": "The supervisor to this position. In the case of a clerkship, the supervising judge.", + "entity_id": "WnzSw9CWE7mtgwRokF8Qxh", + "name": "Supervisor" + }, + { + "description": null, + "entity_id": "Gri4x41WSPUtpwG8BzhTpa", + "name": "Tasks" + }, ... ] ``` diff --git a/mcp-server/resources/search_space_description.md b/mcp-server/resources/search_space_description.md deleted file mode 100644 index 3224922..0000000 --- a/mcp-server/resources/search_space_description.md +++ /dev/null @@ -1,40 +0,0 @@ -This request allows you to find a Space from it's name or description. The spaces are where the attributes and relations are and may be useful to specify when querying entities and relations. - -ToolCall> -``` -search_space("San Francisco") -``` - -ToolResult> -``` -[ - [ - { - "attribute_name": "Description", - "attribute_value": "A vibrant city known for its iconic Golden Gate Bridge, steep rolling hills, historic cable cars, and a rich cultural tapestry including diverse neighborhoods like the Castro and the Mission District.", - "entity_id": "3qayfdjYyPv1dAYf8gPL5r" - }, - { - "attribute_name": "Name", - "attribute_value": "San Francisco", - "entity_id": "3qayfdjYyPv1dAYf8gPL5r" - } - ], - [ - { - "attribute_name": "Name", - "attribute_value": "SF Bay Area", - "entity_id": "W5ZEpuy3Tij1XSXtJLruQ5" - } - ], - [ - { - "attribute_name": "Name", - "attribute_value": "California", - "entity_id": "RHoJT3hNVaw7m5fLLtZ8WQ" - } - ] -] -``` - -Eventually, space will be used to narrow research or help format result diff --git a/mcp-server/resources/search_type_description.md b/mcp-server/resources/search_type_description.md index 478367f..f90b4fc 100644 --- a/mcp-server/resources/search_type_description.md +++ b/mcp-server/resources/search_type_description.md @@ -1,35 +1,25 @@ This request allows you to search by name for a basic type of the Knowledge Graph(KG) like Person or Event. This will give back the type with it's name, id and description. -ToolCall> search_type("University") +ToolCall> search_types("University") ToolResult> ``` [ - [ - { - "attribute_name": "Description", - "attribute_value": "An institution of higher education offering undergraduate and graduate degrees, research opportunities, and specialized academic programs.", - "entity_id": "L8iozarUyS8bkcUiS6kPqV" - }, - { - "attribute_name": "Name", - "attribute_value": "University", - "entity_id": "L8iozarUyS8bkcUiS6kPqV" - } - ], - [ - { - "attribute_name": "Description", - "attribute_value": "An educational institution where students acquire knowledge, skills, and credentials through structured learning programs.", - "entity_id": "M89C7wwdJVaCW9rAVQpJbY" - }, - { - "attribute_name": "Name", - "attribute_value": "School", - "entity_id": "M89C7wwdJVaCW9rAVQpJbY" - } - ] + { + "description": "An institution of higher education offering undergraduate and graduate degrees, research opportunities, and specialized academic programs.", + "entity_id": "L8iozarUyS8bkcUiS6kPqV", + "name": "University" + }, + { + "description": "An educational institution where students acquire knowledge, skills, and credentials through structured learning programs.", + "entity_id": "M89C7wwdJVaCW9rAVQpJbY", + "name": "School" + }, + { + "description": null, + "entity_id": "ExCjm3rzYVfpMRwDchdrE", + "name": "Academic field" + }, ... ] ``` - Since all the types are also of the type Entity. they can be queried by their id for more information. diff --git a/mcp-server/src/input_types.rs b/mcp-server/src/input_types.rs index 4fa59af..1464cb1 100644 --- a/mcp-server/src/input_types.rs +++ b/mcp-server/src/input_types.rs @@ -8,7 +8,7 @@ pub struct SearchTraversalInputFilter { #[derive(Debug, serde::Deserialize, serde::Serialize, schemars::JsonSchema)] pub struct TraversalFilter { pub direction: RelationDirection, - pub relation_type_id: String, + pub relation_type: String, #[serde(skip_serializing_if = "Option::is_none")] pub traversal_filter: Option>, } diff --git a/mcp-server/src/main.rs b/mcp-server/src/main.rs index 3147abe..f872489 100644 --- a/mcp-server/src/main.rs +++ b/mcp-server/src/main.rs @@ -1,18 +1,14 @@ use clap::{Args, Parser}; use fastembed::{EmbeddingModel, InitOptions, TextEmbedding}; -use futures::{TryStreamExt, future::join_all, pin_mut}; +use futures::{TryStreamExt, future::join_all}; use grc20_core::{ entity::{ self, Entity, EntityFilter, EntityNode, EntityRelationFilter, utils::TraverseRelation, }, mapping::{ - Query, QueryStream, RelationEdge, Triple, prop_filter, - query_utils::RelationDirection, - triple::{self, SemanticSearchResult}, + Query, QueryStream, RelationEdge, prop_filter, query_utils::RelationDirection, triple, }, - neo4rs, - relation::{self, RelationFilter}, - system_ids, + neo4rs, relation, system_ids, }; use grc20_sdk::models::BaseEntity; use mcp_server::input_types::{self, SearchTraversalInputFilter}; @@ -24,7 +20,7 @@ use rmcp::{ transport::sse_server::{SseServer, SseServerConfig}, }; use serde_json::{Value, json}; -use std::{collections::HashSet, sync::Arc}; +use std::{collections::HashSet, sync::Arc, time::Instant, vec}; use tracing_subscriber::{ layer::SubscriberExt, util::SubscriberInitExt, @@ -113,87 +109,6 @@ impl KnowledgeGraph { RawResource::new(uri, name.to_string()).no_annotation() } - async fn search( - &self, - query: String, - limit: Option, - ) -> Result, McpError> { - let embedding = self - .embedding_model - .embed(vec![&query], None) - .expect("Failed to get embedding") - .pop() - .expect("Embedding is empty") - .into_iter() - .map(|v| v as f64) - .collect::>(); - - let limit = limit.unwrap_or(10); - - let semantic_search_triples = triple::search(&self.neo4j, embedding) - .limit(limit) - .send() - .await - .map_err(|e| { - McpError::internal_error( - "search_types_failed", - Some(json!({ "error": e.to_string() })), - ) - })? - .try_collect::>() - .await - .map_err(|e| { - McpError::internal_error( - "search_types_failed", - Some(json!({ "error": e.to_string() })), - ) - })?; - Ok(semantic_search_triples) - } - - async fn get_ids_from_search( - &self, - search_triples: Vec, - create_relation_filter: impl Fn(SemanticSearchResult) -> RelationFilter, - ) -> Result, McpError> { - let mut seen_ids: HashSet = HashSet::new(); - let mut result_ids: Vec = Vec::new(); - - for semantic_search_triple in search_triples { - let filtered_for_types = relation::find_many::>(&self.neo4j) - .filter(create_relation_filter(semantic_search_triple)) - .send() - .await; - - //We only need to get the first relation since they would share the same entity id - if let Ok(stream) = filtered_for_types { - pin_mut!(stream); - if let Some(edge) = stream.try_next().await.ok().flatten() { - let id = edge.from.id; - if seen_ids.insert(id.clone()) { - result_ids.push(id); - } - } - } - } - Ok(result_ids) - } - - async fn format_triples_detailled( - &self, - triples: Result, ErrorData>, - ) -> Vec { - if let Ok(triples) = triples { - join_all(triples.into_iter().map(|triple| async move {json!({ - "entity_id": triple.entity, - "attribute_name": self.get_name_of_id(triple.attribute).await.unwrap_or("No attribute name".to_string()), - "attribute_value": String::try_from(triple.value).unwrap_or("No value".to_string()) - })})).await.to_vec() - } else { - Vec::new() - } - } - #[tool(description = include_str!("../resources/search_type_description.md"))] async fn search_types( &self, @@ -201,55 +116,21 @@ impl KnowledgeGraph { #[schemars(description = "The query string to search for types")] query: String, ) -> Result { - let semantic_search_triples = self.search(query, Some(10)).await.unwrap_or_default(); - - let create_relation_filter = |search_result: SemanticSearchResult| { - RelationFilter::default() - .from_(EntityFilter::default().id(prop_filter::value(search_result.triple.entity))) - .relation_type( - EntityFilter::default().id(prop_filter::value(system_ids::TYPES_ATTRIBUTE)), - ) - .to_(EntityFilter::default().id(prop_filter::value(system_ids::SCHEMA_TYPE))) - }; + let filter_type = EntityFilter::default().relations( + EntityRelationFilter::default() + .relation_type(prop_filter::value(system_ids::TYPES_ATTRIBUTE)) + .to_id(prop_filter::value(system_ids::SCHEMA_TYPE)), + ); - let result_types = self - .get_ids_from_search(semantic_search_triples, &create_relation_filter) + let search_ids = self + .query_search(query, None, filter_type) .await .unwrap_or_default(); - let entities: Vec, McpError>> = - join_all(result_types.into_iter().map(|id| async { - triple::find_many(&self.neo4j) - .entity_id(prop_filter::value(id)) - .send() - .await - .map_err(|e| { - McpError::internal_error( - "search_types_failed", - Some(json!({ "error": e.to_string() })), - ) - })? - .try_collect::>() - .await - .map_err(|e| { - McpError::internal_error( - "search_types_failed", - Some(json!({ "error": e.to_string() })), - ) - }) - })) - .await - .to_vec(); - Ok(CallToolResult::success( - join_all( - entities - .into_iter() - .map(|result: Result, _>| async { - Content::json(self.format_triples_detailled(result).await) - .expect("Failed to create JSON content") - }), - ) + join_all(search_ids.into_iter().map(|id| async { + Content::json(self.format_from_id(id).await).expect("Failed to create JSON content") + })) .await .to_vec(), )) @@ -262,197 +143,34 @@ impl KnowledgeGraph { #[schemars(description = "The query string to search for relation types")] query: String, ) -> Result { - let semantic_search_triples = self.search(query, Some(10)).await.unwrap_or_default(); - - let create_relation_filter = |search_result: SemanticSearchResult| { - RelationFilter::default() - .from_(EntityFilter::default().id(prop_filter::value(search_result.triple.entity))) - .relation_type( - EntityFilter::default() - .id(prop_filter::value(system_ids::VALUE_TYPE_ATTRIBUTE)), - ) - .to_( - EntityFilter::default() - .id(prop_filter::value(system_ids::RELATION_SCHEMA_TYPE)), - ) - }; - - let result_types = self - .get_ids_from_search(semantic_search_triples, &create_relation_filter) - .await - .unwrap_or_default(); - - let entities: Vec, McpError>> = - join_all(result_types.into_iter().map(|id| async { - triple::find_many(&self.neo4j) - .entity_id(prop_filter::value(id)) - .send() - .await - .map_err(|e| { - McpError::internal_error( - "search_types_failed", - Some(json!({ "error": e.to_string() })), - ) - })? - .try_collect::>() - .await - .map_err(|e| { - McpError::internal_error( - "search_types_failed", - Some(json!({ "error": e.to_string() })), - ) - }) - })) - .await - .to_vec(); - - Ok(CallToolResult::success( - join_all( - entities - .into_iter() - .map(|result: Result, _>| async { - Content::json(self.format_triples_detailled(result).await) - .expect("Failed to create JSON content") - }), - ) - .await - .to_vec(), - )) - } - - #[tool(description = include_str!("../resources/search_space_description.md"))] - async fn search_space( - &self, - #[tool(param)] - #[schemars(description = "The query string to search for space")] - query: String, - ) -> Result { - let semantic_search_triples = self.search(query, Some(10)).await.unwrap_or_default(); - - let create_relation_filter = |search_result: SemanticSearchResult| { - RelationFilter::default() - .from_(EntityFilter::default().id(prop_filter::value(search_result.triple.entity))) - .relation_type( - EntityFilter::default().id(prop_filter::value(system_ids::TYPES_ATTRIBUTE)), - ) - .to_(EntityFilter::default().id(prop_filter::value(system_ids::SPACE_TYPE))) - }; + let filter_relation_type = EntityFilter::default().relations( + EntityRelationFilter::default() + .relation_type(prop_filter::value(system_ids::VALUE_TYPE_ATTRIBUTE)) + .to_id(prop_filter::value(system_ids::RELATION_SCHEMA_TYPE)), + ); - let result_types = self - .get_ids_from_search(semantic_search_triples, &create_relation_filter) + let search_ids = self + .query_search(query, None, filter_relation_type) .await .unwrap_or_default(); - let entities: Vec, McpError>> = - join_all(result_types.into_iter().map(|id| async { - triple::find_many(&self.neo4j) - .entity_id(prop_filter::value(id)) - .send() - .await - .map_err(|e| { - McpError::internal_error( - "search_types_failed", - Some(json!({ "error": e.to_string() })), - ) - })? - .try_collect::>() - .await - .map_err(|e| { - McpError::internal_error( - "search_types_failed", - Some(json!({ "error": e.to_string() })), - ) - }) - })) - .await - .to_vec(); - Ok(CallToolResult::success( - join_all( - entities - .into_iter() - .map(|result: Result, _>| async { - Content::json(self.format_triples_detailled(result).await) - .expect("Failed to create JSON content") - }), - ) - .await - .to_vec(), - )) - } - - #[tool(description = include_str!("../resources/search_properties_description.md"))] - async fn search_properties( - &self, - #[tool(param)] - #[schemars(description = "The query string to search for properties")] - query: String, - ) -> Result { - let semantic_search_triples = self.search(query, Some(10)).await.unwrap_or_default(); - - let create_relation_filter = |search_result: SemanticSearchResult| { - RelationFilter::default() - .from_(EntityFilter::default().id(prop_filter::value(search_result.triple.entity))) - .relation_type( - EntityFilter::default().id(prop_filter::value(system_ids::TYPES_ATTRIBUTE)), - ) - .to_(EntityFilter::default().id(prop_filter::value(system_ids::ATTRIBUTE))) - }; - - let result_types = self - .get_ids_from_search(semantic_search_triples, &create_relation_filter) - .await - .unwrap_or_default(); - - let entities: Vec, McpError>> = - join_all(result_types.into_iter().map(|id| async { - triple::find_many(&self.neo4j) - .entity_id(prop_filter::value(id)) - .send() - .await - .map_err(|e| { - McpError::internal_error( - "search_types_failed", - Some(json!({ "error": e.to_string() })), - ) - })? - .try_collect::>() - .await - .map_err(|e| { - McpError::internal_error( - "search_types_failed", - Some(json!({ "error": e.to_string() })), - ) - }) + join_all(search_ids.into_iter().map(|id| async { + Content::json(self.format_from_id(id).await).expect("Failed to create JSON content") })) .await - .to_vec(); - - Ok(CallToolResult::success( - join_all( - entities - .into_iter() - .map(|result: Result, _>| async { - Content::json(self.format_triples_detailled(result).await) - .expect("Failed to create JSON content") - }), - ) - .await .to_vec(), )) } - #[tool(description = include_str!("../resources/search_entity_description.md"))] - async fn search_entity( + #[tool(description = include_str!("../resources/search_entity_using_ids_description.md"))] + async fn search_entity_using_ids( &self, #[tool(param)] #[schemars(description = "A filter of the relation(s) to traverse from the query")] search_traversal_filter: SearchTraversalInputFilter, ) -> Result { - tracing::info!( - "SearchTraversalFilter query: {}", - search_traversal_filter.query - ); + tracing::info!("SearchTraversalFilter query: {:?}", search_traversal_filter); let embedding = self .embedding_model @@ -464,10 +182,22 @@ impl KnowledgeGraph { .map(|v| v as f64) .collect::>(); - let traversal_filters: Vec<_> = search_traversal_filter - .traversal_filter - .map(|relation_filter| relation_filter.into_iter().collect()) - .unwrap_or_default(); + let traversal_filters: Vec> = + match search_traversal_filter.traversal_filter { + Some(traversal_filter_input) => { + join_all(traversal_filter_input.into_iter().map(|filter| async move { + Ok(TraverseRelation::default() + .direction(match filter.direction { + input_types::RelationDirection::From => RelationDirection::From, + input_types::RelationDirection::To => RelationDirection::To, + }) + .relation_type_id(prop_filter::value(filter.relation_type))) + })) + .await + .to_vec() + } + None => Vec::new(), + }; let results_search = traversal_filters .into_iter() @@ -476,17 +206,12 @@ impl KnowledgeGraph { &self.neo4j, embedding.clone(), ), - |query, filter| { - query.filter( - EntityFilter::default().traverse_relation( - TraverseRelation::default() - .relation_type_id(filter.relation_type_id) - .direction(match filter.direction { - input_types::RelationDirection::From => RelationDirection::From, - input_types::RelationDirection::To => RelationDirection::To, - }), - ), - ) + |query, result_traversal_filter: Result<_, McpError>| match result_traversal_filter + { + Ok(traversal_filter) => { + query.filter(EntityFilter::default().traverse_relation(traversal_filter)) + } + Err(_) => query, }, ) .limit(10) @@ -507,27 +232,25 @@ impl KnowledgeGraph { ) })?; - let entities_vec: Vec<_> = results_search - .into_iter() - .map(|result| { - json!({ - "id": result.entity.id(), - "name": result.entity.attributes.name, - "description": result.entity.attributes.description, - }) - }) - .collect::>(); - Ok(CallToolResult::success(vec![ - Content::json(json!({ - "entities": entities_vec, - })) + Content::json( + results_search + .into_iter() + .map(|result| { + json!({ + "id": result.entity.id(), + "name": result.entity.attributes.name, + "description": result.entity.attributes.description, + }) + }) + .collect::>(), + ) .expect("Failed to create JSON content"), ])) } - #[tool(description = include_str!("../resources/name_search_entity_description.md"))] - async fn name_search_entity( + #[tool(description = include_str!("../resources/search_entity_description.md"))] + async fn search_entity( &self, #[tool(param)] #[schemars(description = "A filter of the relation(s) to traverse from the query")] @@ -545,49 +268,23 @@ impl KnowledgeGraph { .map(|v| v as f64) .collect::>(); + let start_filters = Instant::now(); + let traversal_filters: Vec> = match search_traversal_filter.traversal_filter { - Some(traversal_filter) => { - join_all(traversal_filter.into_iter().map(|filter| async move { - let rel_embedding = self - .embedding_model - .embed(vec![&filter.relation_type_id], None) - .expect("Failed to get embedding") - .pop() - .expect("Embedding is empty") - .into_iter() - .map(|v| v as f64) - .collect::>(); - - let rel_results = entity::search::(&self.neo4j, rel_embedding) - .filter( - entity::EntityFilter::default().relations( - EntityRelationFilter::default() - .relation_type(system_ids::VALUE_TYPE_ATTRIBUTE) - .to_id(system_ids::RELATION_SCHEMA_TYPE), - ), - ) - .limit(10) - .send() - .await - .map_err(|e| { - McpError::internal_error( - "search_relation_types", - Some(json!({ "error": e.to_string() })), - ) - })? - .try_collect::>() + Some(traversal_filter_input) => { + join_all(traversal_filter_input.into_iter().map(|filter| async move { + let filter_relation_type = EntityFilter::default().relations( + EntityRelationFilter::default() + .relation_type(prop_filter::value(system_ids::VALUE_TYPE_ATTRIBUTE)) + .to_id(prop_filter::value(system_ids::RELATION_SCHEMA_TYPE)), + ); + + let relation_ids = self + .query_search(filter.relation_type, None, filter_relation_type) .await - .map_err(|e| { - McpError::internal_error( - "search_relation_types", - Some(json!({ "error": e.to_string() })), - ) - })?; - let relation_ids: Vec = rel_results - .into_iter() - .map(|sem_search| sem_search.entity.id) - .collect(); + .unwrap_or_default(); + Ok(TraverseRelation::default() .direction(match filter.direction { input_types::RelationDirection::From => RelationDirection::From, @@ -601,6 +298,8 @@ impl KnowledgeGraph { None => Vec::new(), }; + let end_filters = Instant::now(); + let results_search = traversal_filters .into_iter() .fold( @@ -608,8 +307,11 @@ impl KnowledgeGraph { &self.neo4j, embedding.clone(), ), - |query, result_ids: Result<_, McpError>| match result_ids { - Ok(ids) => query.filter(EntityFilter::default().traverse_relation(ids)), + |query, result_traversal_filter: Result<_, McpError>| match result_traversal_filter + { + Ok(traversal_filter) => { + query.filter(EntityFilter::default().traverse_relation(traversal_filter)) + } Err(_) => query, }, ) @@ -631,21 +333,26 @@ impl KnowledgeGraph { ) })?; - let entities_vec: Vec<_> = results_search - .into_iter() - .map(|result| { - json!({ - "id": result.entity.id(), - "name": result.entity.attributes.name, - "description": result.entity.attributes.description, - }) - }) - .collect::>(); + let end_search = Instant::now(); + let search_time = end_search - end_filters; + let filters_time = end_filters - start_filters; + + tracing::info!("filters time: {filters_time:?}"); + tracing::info!("search time: {search_time:?}"); Ok(CallToolResult::success(vec![ - Content::json(json!({ - "entities": entities_vec, - })) + Content::json( + results_search + .into_iter() + .map(|result| { + json!({ + "id": result.entity.id(), + "name": result.entity.attributes.name, + "description": result.entity.attributes.description, + }) + }) + .collect::>(), + ) .expect("Failed to create JSON content"), ])) } @@ -726,7 +433,7 @@ impl KnowledgeGraph { .map(|result| async move { json!({ "relation_id": result.id, - "relation_type": self.get_name_of_id(result.relation_type).await.unwrap_or("No relation type".to_string()), + "relation_type": self.get_name_of_id(result.relation_type.clone()).await.unwrap_or(result.relation_type.to_string()), "id": if is_inbound {result.from.id.clone()} else {result.to.id.clone()}, "name": self.get_name_of_id(if is_inbound {result.from.id.clone()} else {result.to.id.clone()}).await.unwrap_or("No name".to_string()), }) @@ -780,8 +487,6 @@ impl KnowledgeGraph { .into_iter() .collect::>(); - tracing::info!("Found {} relations", relations.len()); - Ok(CallToolResult::success( join_all(relations .into_iter() @@ -797,6 +502,81 @@ impl KnowledgeGraph { )) } + async fn query_search( + &self, + query: String, + limit: Option, + filter: EntityFilter, + ) -> Result, McpError> { + let embedding = self + .embedding_model + .embed(vec![&query], None) + .expect("Failed to get embedding") + .pop() + .expect("Embedding is empty") + .into_iter() + .map(|v| v as f64) + .collect::>(); + + let limit = limit.unwrap_or(10); + let semantic_search_triples = + entity::prefiltered_search::(&self.neo4j, embedding.clone()) + .filter(filter.clone()) + .limit(limit) + .send() + .await + .map_err(|e| { + tracing::error!("Error: {e:?}"); + McpError::internal_error( + "search_types_failed", + Some(json!({ "error": e.to_string() })), + ) + })? + .try_collect::>() + .await + .map_err(|e| { + tracing::error!("Error changing to vec: {e:?}"); + McpError::internal_error( + "search_types_failed", + Some(json!({ "error": e.to_string() })), + ) + })?; + + let mut ids: Vec = semantic_search_triples + .into_iter() + .map(|semantic_search_result| semantic_search_result.entity.id) + .collect(); + + self.dedup(&mut ids); + + Ok(ids) + } + + fn dedup(&self, v: &mut Vec) { + let mut set = HashSet::new(); + + v.retain(|x| set.insert(x.clone())); + } + + async fn format_from_id(&self, id: String) -> Value { + let mut value = json!({ + "entity_not_found_with_id": id.clone(), + }); + + let result_entity = entity::find_one::>(&self.neo4j, id) + .send() + .await; + + if let Ok(Some(entity_node)) = result_entity { + value = json!({ + "entity_id": entity_node.id(), + "name": entity_node.attributes.name, + "description": entity_node.attributes.description + }); + }; + value + } + async fn get_name_of_id(&self, id: String) -> Result { let entity = entity::find_one::>(&self.neo4j, &id) .send() diff --git a/sink/examples/seed_data.rs b/sink/examples/seed_data.rs index d5640d3..3d7258c 100644 --- a/sink/examples/seed_data.rs +++ b/sink/examples/seed_data.rs @@ -131,27 +131,20 @@ async fn main() -> anyhow::Result<()> { insert_relation( &neo4j, FR_QC_SPACE_ID, - system_ids::TYPES_ATTRIBUTE, - system_ids::SPACE_TYPE, - indexer_ids::INDEXER_SPACE_ID, - ) - .await?; - insert_relation( - &neo4j, + indexer_ids::PARENT_SPACE, FR_SPACE_ID, - system_ids::TYPES_ATTRIBUTE, - system_ids::SPACE_TYPE, indexer_ids::INDEXER_SPACE_ID, ) .await?; insert_relation( &neo4j, + FR_SPACE_ID, + indexer_ids::PARENT_SPACE, system_ids::ROOT_SPACE_ID, - system_ids::TYPES_ATTRIBUTE, - system_ids::SPACE_TYPE, indexer_ids::INDEXER_SPACE_ID, ) .await?; + insert_attribute_with_embedding( &neo4j, &embedding_model, @@ -422,7 +415,7 @@ async fn main() -> anyhow::Result<()> { &neo4j, &embedding_model, "Software Engineering", - None, + Some("Description of the software engineering program at Polytl."), [PROGRAM_TYPE], [], [(DIRECTOR_PROP, OLIVIER_GENDREAU_ID)], @@ -441,11 +434,31 @@ async fn main() -> anyhow::Result<()> { ) .await?; + insert_attribute_with_embedding( + &neo4j, + &embedding_model, + SOFTWARE_ENGINEERING_ID, + system_ids::NAME_ATTRIBUTE, + "Génie logiciel", + FR_QC_SPACE_ID, + ) + .await?; + + insert_attribute_with_embedding( + &neo4j, + &embedding_model, + SOFTWARE_ENGINEERING_ID, + system_ids::DESCRIPTION_ATTRIBUTE, + "Description du programme de génie logiciel à Polytechnique Montréal.", + FR_QC_SPACE_ID, + ) + .await?; + create_entity( &neo4j, &embedding_model, "Computer Engineering", - None, + Some("Description of the Computer engineering program at Polymtl."), [PROGRAM_TYPE], [], [], @@ -459,7 +472,7 @@ async fn main() -> anyhow::Result<()> { &embedding_model, SOFTWARE_ENGINEERING_ID, system_ids::NAME_ATTRIBUTE, - "Génie informatique", + "Génie logiciel", FR_SPACE_ID, ) .await?; @@ -468,7 +481,7 @@ async fn main() -> anyhow::Result<()> { &neo4j, &embedding_model, "Civil Engineering", - None, + Some("Description of the program of civil engineering at polymtl"), [PROGRAM_TYPE], [], [], @@ -502,13 +515,47 @@ async fn main() -> anyhow::Result<()> { (PROGRAM_PROP, CIVIL_ENGINEERING_ID), (PROGRAM_PROP, SOFTWARE_ENGINEERING_ID), (PROGRAM_PROP, COMPUTER_ENGINEERING_ID), - (PROGRAM_PROP, MECANICAL_ENGINEERING_ID), ], Some(POLYMTL_ID), None, ) .await?; + insert_attribute_with_embedding( + &neo4j, + &embedding_model, + POLYMTL_ID, + system_ids::NAME_ATTRIBUTE, + "École Polytechnique Montréal", + FR_QC_SPACE_ID, + ) + .await?; + + insert_relation( + &neo4j, + POLYMTL_ID, + PROGRAM_PROP, + SOFTWARE_ENGINEERING_ID, + FR_QC_SPACE_ID, + ) + .await?; + insert_relation( + &neo4j, + POLYMTL_ID, + PROGRAM_PROP, + CIVIL_ENGINEERING_ID, + FR_QC_SPACE_ID, + ) + .await?; + insert_relation( + &neo4j, + POLYMTL_ID, + PROGRAM_PROP, + MECANICAL_ENGINEERING_ID, + FR_QC_SPACE_ID, + ) + .await?; + // Create city entities create_entity( &neo4j,