diff --git a/crates/retrochat-cli/src/commands/mod.rs b/crates/retrochat-cli/src/commands/mod.rs index 4fd2458..48efe46 100644 --- a/crates/retrochat-cli/src/commands/mod.rs +++ b/crates/retrochat-cli/src/commands/mod.rs @@ -5,6 +5,7 @@ pub mod import; pub mod init; pub mod query; pub mod setup; +pub mod summarize; pub mod watch; use clap::{Parser, Subcommand}; @@ -104,6 +105,12 @@ pub enum Commands { command: AnalysisCommands, }, + /// Generate hierarchical summaries for sessions + Summarize { + #[command(subcommand)] + command: SummarizeCommands, + }, + /// Export chat history Export { /// Output format: compact (default) or jsonl @@ -203,6 +210,30 @@ pub enum AnalysisCommands { }, } +#[derive(Subcommand)] +pub enum SummarizeCommands { + /// Summarize turns for sessions + Turns { + /// Session ID to summarize (if not provided, use --all) + session_id: Option, + /// Summarize all sessions + #[arg(long)] + all: bool, + }, + + /// Generate session-level summaries from turn summaries + Sessions { + /// Session ID to summarize (if not provided, use --all) + session_id: Option, + /// Summarize all sessions + #[arg(long)] + all: bool, + }, + + /// Show summarization status for all sessions + Status, +} + #[derive(Subcommand)] pub enum ConfigCommands { /// Get a configuration value @@ -302,6 +333,19 @@ pub async fn run_command(command: Commands) -> anyhow::Result<()> { } }, + // ═══════════════════════════════════════════════════ + // Hierarchical Summarization + // ═══════════════════════════════════════════════════ + Commands::Summarize { command } => match command { + SummarizeCommands::Turns { session_id, all } => { + self::summarize::handle_summarize_turns(session_id, all).await + } + SummarizeCommands::Sessions { session_id, all } => { + self::summarize::handle_summarize_sessions(session_id, all).await + } + SummarizeCommands::Status => self::summarize::handle_summarize_status().await, + }, + // ═══════════════════════════════════════════════════ // Export // ═══════════════════════════════════════════════════ diff --git a/crates/retrochat-cli/src/commands/summarize.rs b/crates/retrochat-cli/src/commands/summarize.rs new file mode 100644 index 0000000..afab9c4 --- /dev/null +++ b/crates/retrochat-cli/src/commands/summarize.rs @@ -0,0 +1,267 @@ +use anyhow::{Context, Result}; +use std::sync::Arc; +use uuid::Uuid; + +use retrochat_core::database::DatabaseManager; +use retrochat_core::env::apis as env_vars; +use retrochat_core::services::{ + google_ai::{GoogleAiClient, GoogleAiConfig}, + SessionSummarizer, TurnDetector, TurnSummarizer, +}; + +/// Handle the summarize turns command +pub async fn handle_summarize_turns(session_id: Option, all: bool) -> Result<()> { + let db_path = retrochat_core::database::config::get_default_db_path()?; + let db_manager = Arc::new(DatabaseManager::new(&db_path).await?); + + // Initialize Google AI client + let api_key = std::env::var(env_vars::GOOGLE_AI_API_KEY) + .context("GOOGLE_AI_API_KEY environment variable is required")?; + + let config = GoogleAiConfig::new(api_key); + let ai_client = GoogleAiClient::new(config)?; + + let summarizer = TurnSummarizer::new(&db_manager, ai_client); + + if all { + summarize_all_sessions_turns(&db_manager, &summarizer).await + } else if let Some(session_id) = session_id { + let uuid = Uuid::parse_str(&session_id).context("Invalid session ID format")?; + summarize_session_turns(&summarizer, &uuid).await + } else { + anyhow::bail!("Either provide a session ID or use --all flag") + } +} + +async fn summarize_session_turns(summarizer: &TurnSummarizer, session_id: &Uuid) -> Result<()> { + println!("Summarizing turns for session {}...", session_id); + + let count = summarizer.summarize_session(session_id).await?; + + println!("Successfully summarized {} turns", count); + Ok(()) +} + +async fn summarize_all_sessions_turns( + db_manager: &DatabaseManager, + summarizer: &TurnSummarizer, +) -> Result<()> { + use retrochat_core::database::ChatSessionRepository; + + let session_repo = ChatSessionRepository::new(db_manager); + let sessions = session_repo.get_all().await?; + + if sessions.is_empty() { + println!("No sessions found to summarize"); + return Ok(()); + } + + println!("Found {} sessions to summarize", sessions.len()); + + let mut success_count = 0; + let mut error_count = 0; + + for session in &sessions { + print!("Summarizing session {}... ", session.id); + + match summarizer.summarize_session(&session.id).await { + Ok(count) => { + println!("OK ({} turns)", count); + success_count += 1; + } + Err(e) => { + println!("FAILED: {}", e); + error_count += 1; + } + } + } + + println!( + "\nCompleted: {} success, {} errors", + success_count, error_count + ); + Ok(()) +} + +/// Handle the summarize sessions command +pub async fn handle_summarize_sessions(session_id: Option, all: bool) -> Result<()> { + let db_path = retrochat_core::database::config::get_default_db_path()?; + let db_manager = Arc::new(DatabaseManager::new(&db_path).await?); + + // Initialize Google AI client + let api_key = std::env::var(env_vars::GOOGLE_AI_API_KEY) + .context("GOOGLE_AI_API_KEY environment variable is required")?; + + let config = GoogleAiConfig::new(api_key); + let ai_client = GoogleAiClient::new(config)?; + + let summarizer = SessionSummarizer::new(&db_manager, ai_client); + + if all { + summarize_all_sessions(&db_manager, &summarizer).await + } else if let Some(session_id) = session_id { + let uuid = Uuid::parse_str(&session_id).context("Invalid session ID format")?; + summarize_single_session(&summarizer, &uuid).await + } else { + anyhow::bail!("Either provide a session ID or use --all flag") + } +} + +async fn summarize_single_session(summarizer: &SessionSummarizer, session_id: &Uuid) -> Result<()> { + println!("Generating session summary for {}...", session_id); + + let summary = summarizer.summarize_session(session_id).await?; + + println!("\nSession Summary:"); + println!(" Title: {}", summary.title); + println!(" Summary: {}", summary.summary); + if let Some(goal) = &summary.primary_goal { + println!(" Goal: {}", goal); + } + if let Some(outcome) = &summary.outcome { + println!(" Outcome: {}", outcome); + } + if let Some(tech) = &summary.technologies_used { + println!(" Technologies: {}", tech.join(", ")); + } + + Ok(()) +} + +async fn summarize_all_sessions( + db_manager: &DatabaseManager, + summarizer: &SessionSummarizer, +) -> Result<()> { + use retrochat_core::database::{ChatSessionRepository, TurnSummaryRepository}; + + let session_repo = ChatSessionRepository::new(db_manager); + let turn_summary_repo = TurnSummaryRepository::new(db_manager); + + let sessions = session_repo.get_all().await?; + + if sessions.is_empty() { + println!("No sessions found to summarize"); + return Ok(()); + } + + // Only summarize sessions that have turn summaries + let mut sessions_with_turns = Vec::new(); + for session in &sessions { + let turn_count = turn_summary_repo.count_by_session(&session.id).await?; + if turn_count > 0 { + sessions_with_turns.push(session); + } + } + + if sessions_with_turns.is_empty() { + println!("No sessions with turn summaries found. Run 'summarize turns --all' first."); + return Ok(()); + } + + println!( + "Found {} sessions with turn summaries", + sessions_with_turns.len() + ); + + let mut success_count = 0; + let mut error_count = 0; + + for session in &sessions_with_turns { + print!("Summarizing session {}... ", session.id); + + match summarizer.summarize_session(&session.id).await { + Ok(summary) => { + println!("OK: {}", summary.title); + success_count += 1; + } + Err(e) => { + println!("FAILED: {}", e); + error_count += 1; + } + } + } + + println!( + "\nCompleted: {} success, {} errors", + success_count, error_count + ); + Ok(()) +} + +/// Handle the summarize status command +pub async fn handle_summarize_status() -> Result<()> { + let db_path = retrochat_core::database::config::get_default_db_path()?; + let db_manager = Arc::new(DatabaseManager::new(&db_path).await?); + + use retrochat_core::database::{ + ChatSessionRepository, SessionSummaryRepository, TurnSummaryRepository, + }; + + let session_repo = ChatSessionRepository::new(&db_manager); + let turn_summary_repo = TurnSummaryRepository::new(&db_manager); + let session_summary_repo = SessionSummaryRepository::new(&db_manager); + + let sessions = session_repo.get_all().await?; + + if sessions.is_empty() { + println!("No sessions found"); + return Ok(()); + } + + // Detect turns for each session + let turn_detector = TurnDetector::new(&db_manager); + + println!("Session Summarization Status:"); + println!("{:-<80}", ""); + println!( + "{:<36} {:>8} {:>10} {:>10} {:>10}", + "Session ID", "Messages", "Turns", "Turn Sums", "Sess Sum" + ); + println!("{:-<80}", ""); + + let mut total_sessions = 0; + let mut sessions_with_turn_summaries = 0; + let mut sessions_with_session_summary = 0; + + for session in &sessions { + total_sessions += 1; + + let detected_turns = turn_detector.detect_turns(&session.id).await?; + let turn_summary_count = turn_summary_repo.count_by_session(&session.id).await?; + let has_session_summary = session_summary_repo.exists_for_session(&session.id).await?; + + if turn_summary_count > 0 { + sessions_with_turn_summaries += 1; + } + if has_session_summary { + sessions_with_session_summary += 1; + } + + let session_summary_status = if has_session_summary { "Yes" } else { "No" }; + + println!( + "{:<36} {:>8} {:>10} {:>10} {:>10}", + session.id, + session.message_count, + detected_turns.len(), + turn_summary_count, + session_summary_status + ); + } + + println!("{:-<80}", ""); + println!("\nSummary:"); + println!(" Total sessions: {}", total_sessions); + println!( + " Sessions with turn summaries: {} ({:.1}%)", + sessions_with_turn_summaries, + (sessions_with_turn_summaries as f64 / total_sessions as f64) * 100.0 + ); + println!( + " Sessions with session summary: {} ({:.1}%)", + sessions_with_session_summary, + (sessions_with_session_summary as f64 / total_sessions as f64) * 100.0 + ); + + Ok(()) +} diff --git a/crates/retrochat-core/.sqlx/query-16d83cb7e55768d51a99ae240dac3d305c7696877b7d1126269dadc2166ff712.json b/crates/retrochat-core/.sqlx/query-16d83cb7e55768d51a99ae240dac3d305c7696877b7d1126269dadc2166ff712.json deleted file mode 100644 index 2f69ae9..0000000 --- a/crates/retrochat-core/.sqlx/query-16d83cb7e55768d51a99ae240dac3d305c7696877b7d1126269dadc2166ff712.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "db_name": "SQLite", - "query": "SELECT session_id FROM analytics_requests WHERE id = ?", - "describe": { - "columns": [ - { - "name": "session_id", - "ordinal": 0, - "type_info": "Text" - } - ], - "parameters": { - "Right": 1 - }, - "nullable": [ - false - ] - }, - "hash": "16d83cb7e55768d51a99ae240dac3d305c7696877b7d1126269dadc2166ff712" -} diff --git a/crates/retrochat-core/.sqlx/query-1f8ef75ee4ce329f878ad852323147d541ee8f69506fbc36386ea34a5c9f6da8.json b/crates/retrochat-core/.sqlx/query-1f8ef75ee4ce329f878ad852323147d541ee8f69506fbc36386ea34a5c9f6da8.json deleted file mode 100644 index 88d0e31..0000000 --- a/crates/retrochat-core/.sqlx/query-1f8ef75ee4ce329f878ad852323147d541ee8f69506fbc36386ea34a5c9f6da8.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "db_name": "SQLite", - "query": "\n INSERT INTO analytics (\n id, analytics_request_id, session_id, generated_at,\n qualitative_output_json,\n ai_quantitative_output_json,\n metric_quantitative_output_json,\n model_used, analysis_duration_ms\n ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)\n ", - "describe": { - "columns": [], - "parameters": { - "Right": 9 - }, - "nullable": [] - }, - "hash": "1f8ef75ee4ce329f878ad852323147d541ee8f69506fbc36386ea34a5c9f6da8" -} diff --git a/crates/retrochat-core/.sqlx/query-52ec7d328d2f43ace95296caa380aeac9b75704263ada069cb87c6faa6cb4a7d.json b/crates/retrochat-core/.sqlx/query-52ec7d328d2f43ace95296caa380aeac9b75704263ada069cb87c6faa6cb4a7d.json deleted file mode 100644 index fca33d6..0000000 --- a/crates/retrochat-core/.sqlx/query-52ec7d328d2f43ace95296caa380aeac9b75704263ada069cb87c6faa6cb4a7d.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "db_name": "SQLite", - "query": "\n INSERT INTO analytics_requests (\n id, session_id, status, started_at, completed_at,\n created_by, error_message, custom_prompt\n ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)\n ", - "describe": { - "columns": [], - "parameters": { - "Right": 8 - }, - "nullable": [] - }, - "hash": "52ec7d328d2f43ace95296caa380aeac9b75704263ada069cb87c6faa6cb4a7d" -} diff --git a/crates/retrochat-core/.sqlx/query-70b55d0a9faf7bea3ba229c92f5e6f81107d887d43ca950bd85c7a7081cd96fa.json b/crates/retrochat-core/.sqlx/query-70b55d0a9faf7bea3ba229c92f5e6f81107d887d43ca950bd85c7a7081cd96fa.json deleted file mode 100644 index 927d6da..0000000 --- a/crates/retrochat-core/.sqlx/query-70b55d0a9faf7bea3ba229c92f5e6f81107d887d43ca950bd85c7a7081cd96fa.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "db_name": "SQLite", - "query": "SELECT COUNT(*) as count FROM analytics_requests WHERE status = ?", - "describe": { - "columns": [ - { - "name": "count", - "ordinal": 0, - "type_info": "Int" - } - ], - "parameters": { - "Right": 1 - }, - "nullable": [ - false - ] - }, - "hash": "70b55d0a9faf7bea3ba229c92f5e6f81107d887d43ca950bd85c7a7081cd96fa" -} diff --git a/crates/retrochat-core/.sqlx/query-76fa1a7b667a1c5a8e5cbb355261cb716bc1f40e8035eec1289702a2287e574a.json b/crates/retrochat-core/.sqlx/query-76fa1a7b667a1c5a8e5cbb355261cb716bc1f40e8035eec1289702a2287e574a.json deleted file mode 100644 index a8ed88b..0000000 --- a/crates/retrochat-core/.sqlx/query-76fa1a7b667a1c5a8e5cbb355261cb716bc1f40e8035eec1289702a2287e574a.json +++ /dev/null @@ -1,68 +0,0 @@ -{ - "db_name": "SQLite", - "query": "\n SELECT\n id, analytics_request_id, session_id, generated_at,\n qualitative_output_json,\n ai_quantitative_output_json,\n metric_quantitative_output_json,\n model_used, analysis_duration_ms\n FROM analytics\n WHERE analytics_request_id = ?\n ORDER BY generated_at DESC\n LIMIT 1\n ", - "describe": { - "columns": [ - { - "name": "id", - "ordinal": 0, - "type_info": "Text" - }, - { - "name": "analytics_request_id", - "ordinal": 1, - "type_info": "Text" - }, - { - "name": "session_id", - "ordinal": 2, - "type_info": "Text" - }, - { - "name": "generated_at", - "ordinal": 3, - "type_info": "Text" - }, - { - "name": "qualitative_output_json", - "ordinal": 4, - "type_info": "Text" - }, - { - "name": "ai_quantitative_output_json", - "ordinal": 5, - "type_info": "Text" - }, - { - "name": "metric_quantitative_output_json", - "ordinal": 6, - "type_info": "Text" - }, - { - "name": "model_used", - "ordinal": 7, - "type_info": "Text" - }, - { - "name": "analysis_duration_ms", - "ordinal": 8, - "type_info": "Int64" - } - ], - "parameters": { - "Right": 1 - }, - "nullable": [ - true, - false, - true, - false, - false, - false, - false, - true, - true - ] - }, - "hash": "76fa1a7b667a1c5a8e5cbb355261cb716bc1f40e8035eec1289702a2287e574a" -} diff --git a/crates/retrochat-core/.sqlx/query-85e79f8bee1545849066899470728dac9d6f71b720b2488ab112584e0ee19a03.json b/crates/retrochat-core/.sqlx/query-85e79f8bee1545849066899470728dac9d6f71b720b2488ab112584e0ee19a03.json deleted file mode 100644 index 26e5990..0000000 --- a/crates/retrochat-core/.sqlx/query-85e79f8bee1545849066899470728dac9d6f71b720b2488ab112584e0ee19a03.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "db_name": "SQLite", - "query": "SELECT COUNT(*) as count FROM analytics_requests WHERE status IN ('pending', 'running')", - "describe": { - "columns": [ - { - "name": "count", - "ordinal": 0, - "type_info": "Int" - } - ], - "parameters": { - "Right": 0 - }, - "nullable": [ - null - ] - }, - "hash": "85e79f8bee1545849066899470728dac9d6f71b720b2488ab112584e0ee19a03" -} diff --git a/crates/retrochat-core/.sqlx/query-a3a2bf6d2877a712b961a3dfc1d732b85a27cf0f1453e86ede0e179608f5830c.json b/crates/retrochat-core/.sqlx/query-a3a2bf6d2877a712b961a3dfc1d732b85a27cf0f1453e86ede0e179608f5830c.json deleted file mode 100644 index e838eba..0000000 --- a/crates/retrochat-core/.sqlx/query-a3a2bf6d2877a712b961a3dfc1d732b85a27cf0f1453e86ede0e179608f5830c.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "db_name": "SQLite", - "query": "SELECT * FROM analytics_requests WHERE status = ? ORDER BY started_at DESC", - "describe": { - "columns": [ - { - "name": "id", - "ordinal": 0, - "type_info": "Text" - }, - { - "name": "session_id", - "ordinal": 1, - "type_info": "Text" - }, - { - "name": "status", - "ordinal": 2, - "type_info": "Text" - }, - { - "name": "started_at", - "ordinal": 3, - "type_info": "Text" - }, - { - "name": "completed_at", - "ordinal": 4, - "type_info": "Text" - }, - { - "name": "created_by", - "ordinal": 5, - "type_info": "Text" - }, - { - "name": "error_message", - "ordinal": 6, - "type_info": "Text" - }, - { - "name": "custom_prompt", - "ordinal": 7, - "type_info": "Text" - } - ], - "parameters": { - "Right": 1 - }, - "nullable": [ - true, - false, - false, - false, - true, - true, - true, - true - ] - }, - "hash": "a3a2bf6d2877a712b961a3dfc1d732b85a27cf0f1453e86ede0e179608f5830c" -} diff --git a/crates/retrochat-core/.sqlx/query-a60c6fbf087d65237d635258999d4357ba7381aafb7b0efc8aefd8727ac7e786.json b/crates/retrochat-core/.sqlx/query-a60c6fbf087d65237d635258999d4357ba7381aafb7b0efc8aefd8727ac7e786.json deleted file mode 100644 index 86e1da5..0000000 --- a/crates/retrochat-core/.sqlx/query-a60c6fbf087d65237d635258999d4357ba7381aafb7b0efc8aefd8727ac7e786.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "db_name": "SQLite", - "query": "SELECT * FROM analytics_requests WHERE started_at >= ? ORDER BY started_at DESC", - "describe": { - "columns": [ - { - "name": "id", - "ordinal": 0, - "type_info": "Text" - }, - { - "name": "session_id", - "ordinal": 1, - "type_info": "Text" - }, - { - "name": "status", - "ordinal": 2, - "type_info": "Text" - }, - { - "name": "started_at", - "ordinal": 3, - "type_info": "Text" - }, - { - "name": "completed_at", - "ordinal": 4, - "type_info": "Text" - }, - { - "name": "created_by", - "ordinal": 5, - "type_info": "Text" - }, - { - "name": "error_message", - "ordinal": 6, - "type_info": "Text" - }, - { - "name": "custom_prompt", - "ordinal": 7, - "type_info": "Text" - } - ], - "parameters": { - "Right": 1 - }, - "nullable": [ - true, - false, - false, - false, - true, - true, - true, - true - ] - }, - "hash": "a60c6fbf087d65237d635258999d4357ba7381aafb7b0efc8aefd8727ac7e786" -} diff --git a/crates/retrochat-core/.sqlx/query-abf269f1fdf77d92aeb10e953fa8f5278519ddb5f70b25b2a0c48231add2bace.json b/crates/retrochat-core/.sqlx/query-abf269f1fdf77d92aeb10e953fa8f5278519ddb5f70b25b2a0c48231add2bace.json deleted file mode 100644 index d1d6421..0000000 --- a/crates/retrochat-core/.sqlx/query-abf269f1fdf77d92aeb10e953fa8f5278519ddb5f70b25b2a0c48231add2bace.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "db_name": "SQLite", - "query": "SELECT * FROM analytics_requests ORDER BY started_at DESC LIMIT ?", - "describe": { - "columns": [ - { - "name": "id", - "ordinal": 0, - "type_info": "Text" - }, - { - "name": "session_id", - "ordinal": 1, - "type_info": "Text" - }, - { - "name": "status", - "ordinal": 2, - "type_info": "Text" - }, - { - "name": "started_at", - "ordinal": 3, - "type_info": "Text" - }, - { - "name": "completed_at", - "ordinal": 4, - "type_info": "Text" - }, - { - "name": "created_by", - "ordinal": 5, - "type_info": "Text" - }, - { - "name": "error_message", - "ordinal": 6, - "type_info": "Text" - }, - { - "name": "custom_prompt", - "ordinal": 7, - "type_info": "Text" - } - ], - "parameters": { - "Right": 1 - }, - "nullable": [ - true, - false, - false, - false, - true, - true, - true, - true - ] - }, - "hash": "abf269f1fdf77d92aeb10e953fa8f5278519ddb5f70b25b2a0c48231add2bace" -} diff --git a/crates/retrochat-core/.sqlx/query-b090411c4cb1bd288c5f024dbb7cb87bd624a286ee2e4efdd2932937ea3ce63f.json b/crates/retrochat-core/.sqlx/query-b090411c4cb1bd288c5f024dbb7cb87bd624a286ee2e4efdd2932937ea3ce63f.json deleted file mode 100644 index cbd27ff..0000000 --- a/crates/retrochat-core/.sqlx/query-b090411c4cb1bd288c5f024dbb7cb87bd624a286ee2e4efdd2932937ea3ce63f.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "db_name": "SQLite", - "query": "SELECT * FROM analytics_requests WHERE id = ?", - "describe": { - "columns": [ - { - "name": "id", - "ordinal": 0, - "type_info": "Text" - }, - { - "name": "session_id", - "ordinal": 1, - "type_info": "Text" - }, - { - "name": "status", - "ordinal": 2, - "type_info": "Text" - }, - { - "name": "started_at", - "ordinal": 3, - "type_info": "Text" - }, - { - "name": "completed_at", - "ordinal": 4, - "type_info": "Text" - }, - { - "name": "created_by", - "ordinal": 5, - "type_info": "Text" - }, - { - "name": "error_message", - "ordinal": 6, - "type_info": "Text" - }, - { - "name": "custom_prompt", - "ordinal": 7, - "type_info": "Text" - } - ], - "parameters": { - "Right": 1 - }, - "nullable": [ - true, - false, - false, - false, - true, - true, - true, - true - ] - }, - "hash": "b090411c4cb1bd288c5f024dbb7cb87bd624a286ee2e4efdd2932937ea3ce63f" -} diff --git a/crates/retrochat-core/.sqlx/query-b9301b4ad8601f33efc34fa29d2da353b93e6c0002f1ea5eaec6937fc93486ad.json b/crates/retrochat-core/.sqlx/query-b9301b4ad8601f33efc34fa29d2da353b93e6c0002f1ea5eaec6937fc93486ad.json deleted file mode 100644 index 139a72e..0000000 --- a/crates/retrochat-core/.sqlx/query-b9301b4ad8601f33efc34fa29d2da353b93e6c0002f1ea5eaec6937fc93486ad.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "db_name": "SQLite", - "query": "SELECT * FROM analytics_requests WHERE created_by = ? ORDER BY started_at DESC", - "describe": { - "columns": [ - { - "name": "id", - "ordinal": 0, - "type_info": "Text" - }, - { - "name": "session_id", - "ordinal": 1, - "type_info": "Text" - }, - { - "name": "status", - "ordinal": 2, - "type_info": "Text" - }, - { - "name": "started_at", - "ordinal": 3, - "type_info": "Text" - }, - { - "name": "completed_at", - "ordinal": 4, - "type_info": "Text" - }, - { - "name": "created_by", - "ordinal": 5, - "type_info": "Text" - }, - { - "name": "error_message", - "ordinal": 6, - "type_info": "Text" - }, - { - "name": "custom_prompt", - "ordinal": 7, - "type_info": "Text" - } - ], - "parameters": { - "Right": 1 - }, - "nullable": [ - true, - false, - false, - false, - true, - true, - true, - true - ] - }, - "hash": "b9301b4ad8601f33efc34fa29d2da353b93e6c0002f1ea5eaec6937fc93486ad" -} diff --git a/crates/retrochat-core/.sqlx/query-cc2b8d306223e34b8105cf775d9b14c366b42454be105f69933e49287d2728db.json b/crates/retrochat-core/.sqlx/query-cc2b8d306223e34b8105cf775d9b14c366b42454be105f69933e49287d2728db.json deleted file mode 100644 index 43f052d..0000000 --- a/crates/retrochat-core/.sqlx/query-cc2b8d306223e34b8105cf775d9b14c366b42454be105f69933e49287d2728db.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "db_name": "SQLite", - "query": "SELECT * FROM analytics_requests WHERE status IN ('pending', 'running') ORDER BY started_at ASC", - "describe": { - "columns": [ - { - "name": "id", - "ordinal": 0, - "type_info": "Text" - }, - { - "name": "session_id", - "ordinal": 1, - "type_info": "Text" - }, - { - "name": "status", - "ordinal": 2, - "type_info": "Text" - }, - { - "name": "started_at", - "ordinal": 3, - "type_info": "Text" - }, - { - "name": "completed_at", - "ordinal": 4, - "type_info": "Text" - }, - { - "name": "created_by", - "ordinal": 5, - "type_info": "Text" - }, - { - "name": "error_message", - "ordinal": 6, - "type_info": "Text" - }, - { - "name": "custom_prompt", - "ordinal": 7, - "type_info": "Text" - } - ], - "parameters": { - "Right": 0 - }, - "nullable": [ - true, - false, - false, - false, - true, - true, - true, - true - ] - }, - "hash": "cc2b8d306223e34b8105cf775d9b14c366b42454be105f69933e49287d2728db" -} diff --git a/crates/retrochat-core/.sqlx/query-e09e456b7afa3cf68557e2daa8cfb499f84b7a13b2eb745c468e38a8f35ec58c.json b/crates/retrochat-core/.sqlx/query-e09e456b7afa3cf68557e2daa8cfb499f84b7a13b2eb745c468e38a8f35ec58c.json deleted file mode 100644 index 4b3205c..0000000 --- a/crates/retrochat-core/.sqlx/query-e09e456b7afa3cf68557e2daa8cfb499f84b7a13b2eb745c468e38a8f35ec58c.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "db_name": "SQLite", - "query": "SELECT * FROM analytics_requests WHERE session_id = ? ORDER BY started_at DESC", - "describe": { - "columns": [ - { - "name": "id", - "ordinal": 0, - "type_info": "Text" - }, - { - "name": "session_id", - "ordinal": 1, - "type_info": "Text" - }, - { - "name": "status", - "ordinal": 2, - "type_info": "Text" - }, - { - "name": "started_at", - "ordinal": 3, - "type_info": "Text" - }, - { - "name": "completed_at", - "ordinal": 4, - "type_info": "Text" - }, - { - "name": "created_by", - "ordinal": 5, - "type_info": "Text" - }, - { - "name": "error_message", - "ordinal": 6, - "type_info": "Text" - }, - { - "name": "custom_prompt", - "ordinal": 7, - "type_info": "Text" - } - ], - "parameters": { - "Right": 1 - }, - "nullable": [ - true, - false, - false, - false, - true, - true, - true, - true - ] - }, - "hash": "e09e456b7afa3cf68557e2daa8cfb499f84b7a13b2eb745c468e38a8f35ec58c" -} diff --git a/crates/retrochat-core/.sqlx/query-f64b389d270e02d0d2111c50691ebb2209038b90dafd66a4e5d59b5b7c3b4746.json b/crates/retrochat-core/.sqlx/query-f64b389d270e02d0d2111c50691ebb2209038b90dafd66a4e5d59b5b7c3b4746.json deleted file mode 100644 index 263fa05..0000000 --- a/crates/retrochat-core/.sqlx/query-f64b389d270e02d0d2111c50691ebb2209038b90dafd66a4e5d59b5b7c3b4746.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "db_name": "SQLite", - "query": "DELETE FROM analytics_requests WHERE completed_at IS NOT NULL AND completed_at < ?", - "describe": { - "columns": [], - "parameters": { - "Right": 1 - }, - "nullable": [] - }, - "hash": "f64b389d270e02d0d2111c50691ebb2209038b90dafd66a4e5d59b5b7c3b4746" -} diff --git a/crates/retrochat-core/.sqlx/query-fa1149b5d14759d111418649428d7779bf47c7d24a2150b8cdcf025f15482b98.json b/crates/retrochat-core/.sqlx/query-fa1149b5d14759d111418649428d7779bf47c7d24a2150b8cdcf025f15482b98.json deleted file mode 100644 index 106af1f..0000000 --- a/crates/retrochat-core/.sqlx/query-fa1149b5d14759d111418649428d7779bf47c7d24a2150b8cdcf025f15482b98.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "db_name": "SQLite", - "query": "DELETE FROM analytics_requests WHERE id = ?", - "describe": { - "columns": [], - "parameters": { - "Right": 1 - }, - "nullable": [] - }, - "hash": "fa1149b5d14759d111418649428d7779bf47c7d24a2150b8cdcf025f15482b98" -} diff --git a/crates/retrochat-core/.sqlx/query-fc4e5e50bba17e73854cd3e68d50b0815935788644dc0ef54ea7f872cd3d73ce.json b/crates/retrochat-core/.sqlx/query-fc4e5e50bba17e73854cd3e68d50b0815935788644dc0ef54ea7f872cd3d73ce.json deleted file mode 100644 index cdadcdb..0000000 --- a/crates/retrochat-core/.sqlx/query-fc4e5e50bba17e73854cd3e68d50b0815935788644dc0ef54ea7f872cd3d73ce.json +++ /dev/null @@ -1,68 +0,0 @@ -{ - "db_name": "SQLite", - "query": "\n SELECT\n id, analytics_request_id, session_id, generated_at,\n qualitative_output_json,\n ai_quantitative_output_json,\n metric_quantitative_output_json,\n model_used, analysis_duration_ms\n FROM analytics\n WHERE id = ?\n ", - "describe": { - "columns": [ - { - "name": "id", - "ordinal": 0, - "type_info": "Text" - }, - { - "name": "analytics_request_id", - "ordinal": 1, - "type_info": "Text" - }, - { - "name": "session_id", - "ordinal": 2, - "type_info": "Text" - }, - { - "name": "generated_at", - "ordinal": 3, - "type_info": "Text" - }, - { - "name": "qualitative_output_json", - "ordinal": 4, - "type_info": "Text" - }, - { - "name": "ai_quantitative_output_json", - "ordinal": 5, - "type_info": "Text" - }, - { - "name": "metric_quantitative_output_json", - "ordinal": 6, - "type_info": "Text" - }, - { - "name": "model_used", - "ordinal": 7, - "type_info": "Text" - }, - { - "name": "analysis_duration_ms", - "ordinal": 8, - "type_info": "Int64" - } - ], - "parameters": { - "Right": 1 - }, - "nullable": [ - true, - false, - true, - false, - false, - false, - false, - true, - true - ] - }, - "hash": "fc4e5e50bba17e73854cd3e68d50b0815935788644dc0ef54ea7f872cd3d73ce" -} diff --git a/crates/retrochat-core/migrations/018_add_hierarchical_storage.sql b/crates/retrochat-core/migrations/018_add_hierarchical_storage.sql new file mode 100644 index 0000000..6676c3e --- /dev/null +++ b/crates/retrochat-core/migrations/018_add_hierarchical_storage.sql @@ -0,0 +1,137 @@ +-- Migration: 018_add_hierarchical_storage.sql +-- Description: Add hierarchical summarization layer with turn and session summaries + +-- ============================================================================= +-- Table: turn_summaries +-- Purpose: LLM-generated summaries with direct references to messages +-- Lifecycle: Created async by background job, can be regenerated +-- ============================================================================= +CREATE TABLE IF NOT EXISTS turn_summaries ( + id TEXT PRIMARY KEY, + session_id TEXT NOT NULL, + turn_number INTEGER NOT NULL, -- 0-indexed within session + + -- MESSAGE BOUNDARIES (references to messages table) + start_sequence INTEGER NOT NULL, -- First message sequence in turn + end_sequence INTEGER NOT NULL, -- Last message sequence in turn + + -- LLM-GENERATED CONTENT + user_intent TEXT NOT NULL, -- "User wanted to add JWT authentication" + assistant_action TEXT NOT NULL, -- "Created auth module with JWT support" + summary TEXT NOT NULL, -- Combined summary sentence + + -- Classification + turn_type TEXT, -- 'task', 'question', 'error_fix', 'clarification', 'discussion' + + -- Extracted entities (JSON arrays) + key_topics TEXT, -- ["authentication", "JWT", "middleware"] + decisions_made TEXT, -- ["Used RS256 over HS256", "Added refresh tokens"] + code_concepts TEXT, -- ["error handling", "async/await", "middleware pattern"] + + -- CACHED TIMESTAMPS (derived from messages, cached for convenience) + started_at TEXT NOT NULL, + ended_at TEXT NOT NULL, + + -- GENERATION METADATA + model_used TEXT, + prompt_version INTEGER NOT NULL DEFAULT 1, + generated_at TEXT NOT NULL DEFAULT (datetime('now', 'utc')), + + FOREIGN KEY (session_id) REFERENCES chat_sessions(id) ON DELETE CASCADE, + UNIQUE(session_id, turn_number) +); + +-- ============================================================================= +-- Table: session_summaries +-- Purpose: LLM-generated session-level summaries +-- Lifecycle: Created after turn summaries exist, can be regenerated +-- Input: Aggregated from turn_summaries (NOT raw messages) +-- ============================================================================= +CREATE TABLE IF NOT EXISTS session_summaries ( + id TEXT PRIMARY KEY, + session_id TEXT NOT NULL, + + -- LLM-generated content + title TEXT NOT NULL, -- "JWT Authentication Implementation" + summary TEXT NOT NULL, -- 100-200 word overview + primary_goal TEXT, -- Main user objective + outcome TEXT, -- 'completed', 'partial', 'abandoned', 'ongoing' + + -- Extracted entities (JSON arrays) + key_decisions TEXT, -- ["Used JWT over sessions", "RS256 signing"] + technologies_used TEXT, -- ["JWT", "bcrypt", "axum"] + files_affected TEXT, -- ["src/auth.rs", "src/middleware.rs"] + + -- Generation metadata + model_used TEXT, + prompt_version INTEGER NOT NULL DEFAULT 1, + generated_at TEXT NOT NULL DEFAULT (datetime('now', 'utc')), + + FOREIGN KEY (session_id) REFERENCES chat_sessions(id) ON DELETE CASCADE, + UNIQUE(session_id) -- 1:1 relationship +); + +-- Indexes for turn_summaries +CREATE INDEX IF NOT EXISTS idx_turn_summaries_session ON turn_summaries(session_id); +CREATE INDEX IF NOT EXISTS idx_turn_summaries_type ON turn_summaries(turn_type); +CREATE INDEX IF NOT EXISTS idx_turn_summaries_started ON turn_summaries(started_at); + +-- Indexes for session_summaries +CREATE INDEX IF NOT EXISTS idx_session_summaries_session ON session_summaries(session_id); +CREATE INDEX IF NOT EXISTS idx_session_summaries_outcome ON session_summaries(outcome); + +-- Full-Text Search for turn_summaries +CREATE VIRTUAL TABLE IF NOT EXISTS turn_summaries_fts USING fts5( + summary, + user_intent, + assistant_action, + turn_id UNINDEXED, + content='turn_summaries', + content_rowid='rowid' +); + +-- FTS triggers for turn_summaries +CREATE TRIGGER IF NOT EXISTS turn_summaries_fts_insert AFTER INSERT ON turn_summaries BEGIN + INSERT INTO turn_summaries_fts(rowid, summary, user_intent, assistant_action, turn_id) + VALUES (NEW.rowid, NEW.summary, NEW.user_intent, NEW.assistant_action, NEW.id); +END; + +CREATE TRIGGER IF NOT EXISTS turn_summaries_fts_delete AFTER DELETE ON turn_summaries BEGIN + INSERT INTO turn_summaries_fts(turn_summaries_fts, rowid, summary, user_intent, assistant_action, turn_id) + VALUES('delete', OLD.rowid, OLD.summary, OLD.user_intent, OLD.assistant_action, OLD.id); +END; + +CREATE TRIGGER IF NOT EXISTS turn_summaries_fts_update AFTER UPDATE ON turn_summaries BEGIN + INSERT INTO turn_summaries_fts(turn_summaries_fts, rowid, summary, user_intent, assistant_action, turn_id) + VALUES('delete', OLD.rowid, OLD.summary, OLD.user_intent, OLD.assistant_action, OLD.id); + INSERT INTO turn_summaries_fts(rowid, summary, user_intent, assistant_action, turn_id) + VALUES (NEW.rowid, NEW.summary, NEW.user_intent, NEW.assistant_action, NEW.id); +END; + +-- Full-Text Search for session_summaries +CREATE VIRTUAL TABLE IF NOT EXISTS session_summaries_fts USING fts5( + title, + summary, + primary_goal, + session_id UNINDEXED, + content='session_summaries', + content_rowid='rowid' +); + +-- FTS triggers for session_summaries +CREATE TRIGGER IF NOT EXISTS session_summaries_fts_insert AFTER INSERT ON session_summaries BEGIN + INSERT INTO session_summaries_fts(rowid, title, summary, primary_goal, session_id) + VALUES (NEW.rowid, NEW.title, NEW.summary, NEW.primary_goal, NEW.session_id); +END; + +CREATE TRIGGER IF NOT EXISTS session_summaries_fts_delete AFTER DELETE ON session_summaries BEGIN + INSERT INTO session_summaries_fts(session_summaries_fts, rowid, title, summary, primary_goal, session_id) + VALUES('delete', OLD.rowid, OLD.title, OLD.summary, OLD.primary_goal, OLD.session_id); +END; + +CREATE TRIGGER IF NOT EXISTS session_summaries_fts_update AFTER UPDATE ON session_summaries BEGIN + INSERT INTO session_summaries_fts(session_summaries_fts, rowid, title, summary, primary_goal, session_id) + VALUES('delete', OLD.rowid, OLD.title, OLD.summary, OLD.primary_goal, OLD.session_id); + INSERT INTO session_summaries_fts(rowid, title, summary, primary_goal, session_id) + VALUES (NEW.rowid, NEW.title, NEW.summary, NEW.primary_goal, NEW.session_id); +END; diff --git a/crates/retrochat-core/src/database/mod.rs b/crates/retrochat-core/src/database/mod.rs index 1a220a8..f98dc15 100644 --- a/crates/retrochat-core/src/database/mod.rs +++ b/crates/retrochat-core/src/database/mod.rs @@ -7,7 +7,9 @@ pub mod message_repo; pub mod migrations; pub mod project_repo; pub mod schema; +pub mod session_summary_repo; pub mod tool_operation_repo; +pub mod turn_summary_repo; // Main repositories (now using SQLx) pub use analytics_repo::AnalyticsRepository; @@ -18,7 +20,9 @@ pub use message_repo::MessageRepository; pub use migrations::{MigrationManager, MigrationStatus}; pub use project_repo::ProjectRepository; pub use schema::{create_schema, SCHEMA_VERSION}; +pub use session_summary_repo::SessionSummaryRepository; pub use tool_operation_repo::ToolOperationRepository; +pub use turn_summary_repo::TurnSummaryRepository; // Main database structure (now using SQLx by default) pub struct Database { @@ -67,6 +71,14 @@ impl Database { ToolOperationRepository::new(&self.manager) } + pub fn turn_summary_repo(&self) -> TurnSummaryRepository { + TurnSummaryRepository::new(&self.manager) + } + + pub fn session_summary_repo(&self) -> SessionSummaryRepository { + SessionSummaryRepository::new(&self.manager) + } + pub fn migration_manager(&self) -> MigrationManager { MigrationManager::new(self.manager.pool().clone()) } diff --git a/crates/retrochat-core/src/database/session_summary_repo.rs b/crates/retrochat-core/src/database/session_summary_repo.rs new file mode 100644 index 0000000..2c02265 --- /dev/null +++ b/crates/retrochat-core/src/database/session_summary_repo.rs @@ -0,0 +1,495 @@ +use anyhow::{Context, Result as AnyhowResult}; +use chrono::{DateTime, Utc}; +use sqlx::{Pool, Row, Sqlite}; +use uuid::Uuid; + +use super::connection::DatabaseManager; +use crate::models::session_summary::{SessionOutcome, SessionSummary}; + +pub struct SessionSummaryRepository { + pool: Pool, +} + +impl SessionSummaryRepository { + pub fn new(db: &DatabaseManager) -> Self { + Self { + pool: db.pool().clone(), + } + } + + /// Create a new session summary + pub async fn create(&self, summary: &SessionSummary) -> AnyhowResult { + let generated_at = summary.generated_at.to_rfc3339(); + let outcome = summary.outcome.as_ref().map(|o| o.to_string()); + + // Serialize JSON arrays + let key_decisions_json = summary + .key_decisions + .as_ref() + .map(serde_json::to_string) + .transpose() + .context("Failed to serialize key_decisions")?; + let technologies_used_json = summary + .technologies_used + .as_ref() + .map(serde_json::to_string) + .transpose() + .context("Failed to serialize technologies_used")?; + let files_affected_json = summary + .files_affected + .as_ref() + .map(serde_json::to_string) + .transpose() + .context("Failed to serialize files_affected")?; + + sqlx::query( + r#" + INSERT INTO session_summaries ( + id, session_id, + title, summary, primary_goal, outcome, + key_decisions, technologies_used, files_affected, + model_used, prompt_version, generated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + "#, + ) + .bind(&summary.id) + .bind(&summary.session_id) + .bind(&summary.title) + .bind(&summary.summary) + .bind(&summary.primary_goal) + .bind(&outcome) + .bind(&key_decisions_json) + .bind(&technologies_used_json) + .bind(&files_affected_json) + .bind(&summary.model_used) + .bind(summary.prompt_version) + .bind(&generated_at) + .execute(&self.pool) + .await + .context("Failed to insert session summary")?; + + Ok(summary.id.clone()) + } + + /// Update an existing session summary + pub async fn update(&self, summary: &SessionSummary) -> AnyhowResult<()> { + let generated_at = summary.generated_at.to_rfc3339(); + let outcome = summary.outcome.as_ref().map(|o| o.to_string()); + + // Serialize JSON arrays + let key_decisions_json = summary + .key_decisions + .as_ref() + .map(serde_json::to_string) + .transpose() + .context("Failed to serialize key_decisions")?; + let technologies_used_json = summary + .technologies_used + .as_ref() + .map(serde_json::to_string) + .transpose() + .context("Failed to serialize technologies_used")?; + let files_affected_json = summary + .files_affected + .as_ref() + .map(serde_json::to_string) + .transpose() + .context("Failed to serialize files_affected")?; + + sqlx::query( + r#" + UPDATE session_summaries SET + title = ?, summary = ?, primary_goal = ?, outcome = ?, + key_decisions = ?, technologies_used = ?, files_affected = ?, + model_used = ?, prompt_version = ?, generated_at = ? + WHERE id = ? + "#, + ) + .bind(&summary.title) + .bind(&summary.summary) + .bind(&summary.primary_goal) + .bind(&outcome) + .bind(&key_decisions_json) + .bind(&technologies_used_json) + .bind(&files_affected_json) + .bind(&summary.model_used) + .bind(summary.prompt_version) + .bind(&generated_at) + .bind(&summary.id) + .execute(&self.pool) + .await + .context("Failed to update session summary")?; + + Ok(()) + } + + /// Get a session summary by ID + pub async fn get_by_id(&self, id: &str) -> AnyhowResult> { + let row = sqlx::query( + r#" + SELECT + id, session_id, + title, summary, primary_goal, outcome, + key_decisions, technologies_used, files_affected, + model_used, prompt_version, generated_at + FROM session_summaries + WHERE id = ? + "#, + ) + .bind(id) + .fetch_optional(&self.pool) + .await + .context("Failed to fetch session summary")?; + + row.map(|r| Self::row_to_session_summary(&r)).transpose() + } + + /// Get a session summary by session ID + pub async fn get_by_session(&self, session_id: &Uuid) -> AnyhowResult> { + let row = sqlx::query( + r#" + SELECT + id, session_id, + title, summary, primary_goal, outcome, + key_decisions, technologies_used, files_affected, + model_used, prompt_version, generated_at + FROM session_summaries + WHERE session_id = ? + "#, + ) + .bind(session_id.to_string()) + .fetch_optional(&self.pool) + .await + .context("Failed to fetch session summary")?; + + row.map(|r| Self::row_to_session_summary(&r)).transpose() + } + + /// Check if a session has a summary + pub async fn exists_for_session(&self, session_id: &Uuid) -> AnyhowResult { + let row = sqlx::query( + r#" + SELECT COUNT(*) as count FROM session_summaries + WHERE session_id = ? + "#, + ) + .bind(session_id.to_string()) + .fetch_one(&self.pool) + .await + .context("Failed to check session summary existence")?; + + let count: i64 = row.get("count"); + Ok(count > 0) + } + + /// Delete a session summary by session ID + pub async fn delete_by_session(&self, session_id: &Uuid) -> AnyhowResult { + let result = sqlx::query( + r#" + DELETE FROM session_summaries + WHERE session_id = ? + "#, + ) + .bind(session_id.to_string()) + .execute(&self.pool) + .await + .context("Failed to delete session summary")?; + + Ok(result.rows_affected()) + } + + /// Search session summaries using full-text search + pub async fn search(&self, query: &str, limit: i64) -> AnyhowResult> { + let rows = sqlx::query( + r#" + SELECT + ss.id, ss.session_id, + ss.title, ss.summary, ss.primary_goal, ss.outcome, + ss.key_decisions, ss.technologies_used, ss.files_affected, + ss.model_used, ss.prompt_version, ss.generated_at + FROM session_summaries ss + JOIN session_summaries_fts fts ON ss.rowid = fts.rowid + WHERE session_summaries_fts MATCH ? + ORDER BY rank + LIMIT ? + "#, + ) + .bind(query) + .bind(limit) + .fetch_all(&self.pool) + .await + .context("Failed to search session summaries")?; + + rows.iter().map(Self::row_to_session_summary).collect() + } + + /// List all session summaries with optional outcome filter + pub async fn list_all( + &self, + outcome: Option<&SessionOutcome>, + limit: i64, + offset: i64, + ) -> AnyhowResult> { + let rows = if let Some(outcome) = outcome { + let outcome_str = outcome.to_string(); + sqlx::query( + r#" + SELECT + id, session_id, + title, summary, primary_goal, outcome, + key_decisions, technologies_used, files_affected, + model_used, prompt_version, generated_at + FROM session_summaries + WHERE outcome = ? + ORDER BY generated_at DESC + LIMIT ? OFFSET ? + "#, + ) + .bind(&outcome_str) + .bind(limit) + .bind(offset) + .fetch_all(&self.pool) + .await + .context("Failed to list session summaries")? + } else { + sqlx::query( + r#" + SELECT + id, session_id, + title, summary, primary_goal, outcome, + key_decisions, technologies_used, files_affected, + model_used, prompt_version, generated_at + FROM session_summaries + ORDER BY generated_at DESC + LIMIT ? OFFSET ? + "#, + ) + .bind(limit) + .bind(offset) + .fetch_all(&self.pool) + .await + .context("Failed to list session summaries")? + }; + + rows.iter().map(Self::row_to_session_summary).collect() + } + + /// Convert a database row to SessionSummary + fn row_to_session_summary(row: &sqlx::sqlite::SqliteRow) -> AnyhowResult { + let generated_at_str: String = row.get("generated_at"); + let generated_at = DateTime::parse_from_rfc3339(&generated_at_str)?.with_timezone(&Utc); + + let outcome_str: Option = row.get("outcome"); + let outcome = outcome_str.and_then(|o| { + o.parse::() + .map_err(|e| { + tracing::warn!("Failed to parse session outcome '{}': {}", o, e); + e + }) + .ok() + }); + + let key_decisions_json: Option = row.get("key_decisions"); + let key_decisions: Option> = key_decisions_json + .map(|d| serde_json::from_str(&d)) + .transpose() + .context("Failed to deserialize key_decisions")?; + + let technologies_used_json: Option = row.get("technologies_used"); + let technologies_used: Option> = technologies_used_json + .map(|t| serde_json::from_str(&t)) + .transpose() + .context("Failed to deserialize technologies_used")?; + + let files_affected_json: Option = row.get("files_affected"); + let files_affected: Option> = files_affected_json + .map(|f| serde_json::from_str(&f)) + .transpose() + .context("Failed to deserialize files_affected")?; + + Ok(SessionSummary { + id: row.get("id"), + session_id: row.get("session_id"), + title: row.get("title"), + summary: row.get("summary"), + primary_goal: row.get("primary_goal"), + outcome, + key_decisions, + technologies_used, + files_affected, + model_used: row.get("model_used"), + prompt_version: row.get("prompt_version"), + generated_at, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::database::DatabaseManager; + + #[tokio::test] + async fn test_create_and_get_session_summary() { + let db = DatabaseManager::open_in_memory().await.unwrap(); + let repo = SessionSummaryRepository::new(&db); + let session_id = Uuid::new_v4(); + + // Create test session first + sqlx::query( + r#" + INSERT INTO chat_sessions (id, provider, project_name, start_time, end_time, message_count, file_path, file_hash, state) + VALUES (?, 'ClaudeCode', NULL, '2024-01-01T00:00:00Z', '2024-01-01T01:00:00Z', 5, '/test.jsonl', 'hash1', 'Imported') + "#, + ) + .bind(session_id.to_string()) + .execute(db.pool()) + .await + .unwrap(); + + let summary = SessionSummary::new( + session_id.to_string(), + "JWT Authentication Implementation".to_string(), + "Implemented JWT auth for the API".to_string(), + ) + .with_outcome(SessionOutcome::Completed) + .with_technologies_used(vec!["JWT".to_string(), "bcrypt".to_string()]); + + let id = repo.create(&summary).await.unwrap(); + assert!(!id.is_empty()); + + let fetched = repo.get_by_id(&id).await.unwrap().unwrap(); + assert_eq!(fetched.session_id, session_id.to_string()); + assert_eq!(fetched.title, "JWT Authentication Implementation"); + assert_eq!(fetched.outcome, Some(SessionOutcome::Completed)); + assert_eq!( + fetched.technologies_used, + Some(vec!["JWT".to_string(), "bcrypt".to_string()]) + ); + } + + #[tokio::test] + async fn test_get_by_session() { + let db = DatabaseManager::open_in_memory().await.unwrap(); + let repo = SessionSummaryRepository::new(&db); + let session_id = Uuid::new_v4(); + + // Create test session + sqlx::query( + r#" + INSERT INTO chat_sessions (id, provider, project_name, start_time, end_time, message_count, file_path, file_hash, state) + VALUES (?, 'ClaudeCode', NULL, '2024-01-01T00:00:00Z', '2024-01-01T01:00:00Z', 10, '/test2.jsonl', 'hash2', 'Imported') + "#, + ) + .bind(session_id.to_string()) + .execute(db.pool()) + .await + .unwrap(); + + let summary = SessionSummary::new( + session_id.to_string(), + "Title".to_string(), + "Summary".to_string(), + ); + repo.create(&summary).await.unwrap(); + + let fetched = repo.get_by_session(&session_id).await.unwrap().unwrap(); + assert_eq!(fetched.session_id, session_id.to_string()); + assert_eq!(fetched.title, "Title"); + } + + #[tokio::test] + async fn test_exists_for_session() { + let db = DatabaseManager::open_in_memory().await.unwrap(); + let repo = SessionSummaryRepository::new(&db); + let session_id = Uuid::new_v4(); + + // Create test session + sqlx::query( + r#" + INSERT INTO chat_sessions (id, provider, project_name, start_time, end_time, message_count, file_path, file_hash, state) + VALUES (?, 'ClaudeCode', NULL, '2024-01-01T00:00:00Z', '2024-01-01T01:00:00Z', 5, '/test3.jsonl', 'hash3', 'Imported') + "#, + ) + .bind(session_id.to_string()) + .execute(db.pool()) + .await + .unwrap(); + + assert!(!repo.exists_for_session(&session_id).await.unwrap()); + + let summary = SessionSummary::new( + session_id.to_string(), + "Title".to_string(), + "Summary".to_string(), + ); + repo.create(&summary).await.unwrap(); + + assert!(repo.exists_for_session(&session_id).await.unwrap()); + } + + #[tokio::test] + async fn test_delete_by_session() { + let db = DatabaseManager::open_in_memory().await.unwrap(); + let repo = SessionSummaryRepository::new(&db); + let session_id = Uuid::new_v4(); + + // Create test session + sqlx::query( + r#" + INSERT INTO chat_sessions (id, provider, project_name, start_time, end_time, message_count, file_path, file_hash, state) + VALUES (?, 'ClaudeCode', NULL, '2024-01-01T00:00:00Z', '2024-01-01T01:00:00Z', 5, '/test4.jsonl', 'hash4', 'Imported') + "#, + ) + .bind(session_id.to_string()) + .execute(db.pool()) + .await + .unwrap(); + + let summary = SessionSummary::new( + session_id.to_string(), + "Title".to_string(), + "Summary".to_string(), + ); + repo.create(&summary).await.unwrap(); + + let deleted = repo.delete_by_session(&session_id).await.unwrap(); + assert_eq!(deleted, 1); + + assert!(!repo.exists_for_session(&session_id).await.unwrap()); + } + + #[tokio::test] + async fn test_update_session_summary() { + let db = DatabaseManager::open_in_memory().await.unwrap(); + let repo = SessionSummaryRepository::new(&db); + let session_id = Uuid::new_v4(); + + // Create test session + sqlx::query( + r#" + INSERT INTO chat_sessions (id, provider, project_name, start_time, end_time, message_count, file_path, file_hash, state) + VALUES (?, 'ClaudeCode', NULL, '2024-01-01T00:00:00Z', '2024-01-01T01:00:00Z', 5, '/test5.jsonl', 'hash5', 'Imported') + "#, + ) + .bind(session_id.to_string()) + .execute(db.pool()) + .await + .unwrap(); + + let mut summary = SessionSummary::new( + session_id.to_string(), + "Original Title".to_string(), + "Original Summary".to_string(), + ); + let id = repo.create(&summary).await.unwrap(); + + summary.title = "Updated Title".to_string(); + summary.outcome = Some(SessionOutcome::Completed); + repo.update(&summary).await.unwrap(); + + let fetched = repo.get_by_id(&id).await.unwrap().unwrap(); + assert_eq!(fetched.title, "Updated Title"); + assert_eq!(fetched.outcome, Some(SessionOutcome::Completed)); + } +} diff --git a/crates/retrochat-core/src/database/turn_summary_repo.rs b/crates/retrochat-core/src/database/turn_summary_repo.rs new file mode 100644 index 0000000..813d835 --- /dev/null +++ b/crates/retrochat-core/src/database/turn_summary_repo.rs @@ -0,0 +1,447 @@ +use anyhow::{Context, Result as AnyhowResult}; +use chrono::{DateTime, Utc}; +use sqlx::{Pool, Row, Sqlite}; +use uuid::Uuid; + +use super::connection::DatabaseManager; +use crate::models::{TurnSummary, TurnType}; + +pub struct TurnSummaryRepository { + pool: Pool, +} + +impl TurnSummaryRepository { + pub fn new(db: &DatabaseManager) -> Self { + Self { + pool: db.pool().clone(), + } + } + + /// Create a new turn summary + pub async fn create(&self, summary: &TurnSummary) -> AnyhowResult { + let started_at = summary.started_at.to_rfc3339(); + let ended_at = summary.ended_at.to_rfc3339(); + let generated_at = summary.generated_at.to_rfc3339(); + let turn_type = summary.turn_type.as_ref().map(|t| t.to_string()); + + // Serialize JSON arrays + let key_topics_json = summary + .key_topics + .as_ref() + .map(serde_json::to_string) + .transpose() + .context("Failed to serialize key_topics")?; + let decisions_made_json = summary + .decisions_made + .as_ref() + .map(serde_json::to_string) + .transpose() + .context("Failed to serialize decisions_made")?; + let code_concepts_json = summary + .code_concepts + .as_ref() + .map(serde_json::to_string) + .transpose() + .context("Failed to serialize code_concepts")?; + + sqlx::query( + r#" + INSERT INTO turn_summaries ( + id, session_id, turn_number, + start_sequence, end_sequence, + user_intent, assistant_action, summary, + turn_type, key_topics, decisions_made, code_concepts, + started_at, ended_at, + model_used, prompt_version, generated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + "#, + ) + .bind(&summary.id) + .bind(&summary.session_id) + .bind(summary.turn_number) + .bind(summary.start_sequence) + .bind(summary.end_sequence) + .bind(&summary.user_intent) + .bind(&summary.assistant_action) + .bind(&summary.summary) + .bind(&turn_type) + .bind(&key_topics_json) + .bind(&decisions_made_json) + .bind(&code_concepts_json) + .bind(&started_at) + .bind(&ended_at) + .bind(&summary.model_used) + .bind(summary.prompt_version) + .bind(&generated_at) + .execute(&self.pool) + .await + .context("Failed to insert turn summary")?; + + Ok(summary.id.clone()) + } + + /// Get a turn summary by ID + pub async fn get_by_id(&self, id: &str) -> AnyhowResult> { + let row = sqlx::query( + r#" + SELECT + id, session_id, turn_number, + start_sequence, end_sequence, + user_intent, assistant_action, summary, + turn_type, key_topics, decisions_made, code_concepts, + started_at, ended_at, + model_used, prompt_version, generated_at + FROM turn_summaries + WHERE id = ? + "#, + ) + .bind(id) + .fetch_optional(&self.pool) + .await + .context("Failed to fetch turn summary")?; + + row.map(|r| Self::row_to_turn_summary(&r)).transpose() + } + + /// Get all turn summaries for a session, ordered by turn number + pub async fn get_by_session(&self, session_id: &Uuid) -> AnyhowResult> { + let rows = sqlx::query( + r#" + SELECT + id, session_id, turn_number, + start_sequence, end_sequence, + user_intent, assistant_action, summary, + turn_type, key_topics, decisions_made, code_concepts, + started_at, ended_at, + model_used, prompt_version, generated_at + FROM turn_summaries + WHERE session_id = ? + ORDER BY turn_number ASC + "#, + ) + .bind(session_id.to_string()) + .fetch_all(&self.pool) + .await + .context("Failed to fetch turn summaries for session")?; + + rows.iter().map(Self::row_to_turn_summary).collect() + } + + /// Get a specific turn summary by session and turn number + pub async fn get_by_session_and_turn( + &self, + session_id: &Uuid, + turn_number: i32, + ) -> AnyhowResult> { + let row = sqlx::query( + r#" + SELECT + id, session_id, turn_number, + start_sequence, end_sequence, + user_intent, assistant_action, summary, + turn_type, key_topics, decisions_made, code_concepts, + started_at, ended_at, + model_used, prompt_version, generated_at + FROM turn_summaries + WHERE session_id = ? AND turn_number = ? + "#, + ) + .bind(session_id.to_string()) + .bind(turn_number) + .fetch_optional(&self.pool) + .await + .context("Failed to fetch turn summary")?; + + row.map(|r| Self::row_to_turn_summary(&r)).transpose() + } + + /// Count turn summaries for a session + pub async fn count_by_session(&self, session_id: &Uuid) -> AnyhowResult { + let row = sqlx::query( + r#" + SELECT COUNT(*) as count FROM turn_summaries + WHERE session_id = ? + "#, + ) + .bind(session_id.to_string()) + .fetch_one(&self.pool) + .await + .context("Failed to count turn summaries")?; + + Ok(row.get::("count")) + } + + /// Delete all turn summaries for a session + pub async fn delete_by_session(&self, session_id: &Uuid) -> AnyhowResult { + let result = sqlx::query( + r#" + DELETE FROM turn_summaries + WHERE session_id = ? + "#, + ) + .bind(session_id.to_string()) + .execute(&self.pool) + .await + .context("Failed to delete turn summaries")?; + + Ok(result.rows_affected()) + } + + /// Search turn summaries using full-text search + pub async fn search(&self, query: &str, limit: i64) -> AnyhowResult> { + let rows = sqlx::query( + r#" + SELECT + ts.id, ts.session_id, ts.turn_number, + ts.start_sequence, ts.end_sequence, + ts.user_intent, ts.assistant_action, ts.summary, + ts.turn_type, ts.key_topics, ts.decisions_made, ts.code_concepts, + ts.started_at, ts.ended_at, + ts.model_used, ts.prompt_version, ts.generated_at + FROM turn_summaries ts + JOIN turn_summaries_fts fts ON ts.rowid = fts.rowid + WHERE turn_summaries_fts MATCH ? + ORDER BY rank + LIMIT ? + "#, + ) + .bind(query) + .bind(limit) + .fetch_all(&self.pool) + .await + .context("Failed to search turn summaries")?; + + rows.iter().map(Self::row_to_turn_summary).collect() + } + + /// Convert a database row to TurnSummary + fn row_to_turn_summary(row: &sqlx::sqlite::SqliteRow) -> AnyhowResult { + let started_at_str: String = row.get("started_at"); + let ended_at_str: String = row.get("ended_at"); + let generated_at_str: String = row.get("generated_at"); + + let started_at = DateTime::parse_from_rfc3339(&started_at_str)?.with_timezone(&Utc); + let ended_at = DateTime::parse_from_rfc3339(&ended_at_str)?.with_timezone(&Utc); + let generated_at = DateTime::parse_from_rfc3339(&generated_at_str)?.with_timezone(&Utc); + + let turn_type_str: Option = row.get("turn_type"); + let turn_type = turn_type_str.and_then(|t| { + t.parse::() + .map_err(|e| { + tracing::warn!("Failed to parse turn_type '{}': {}", t, e); + e + }) + .ok() + }); + + let key_topics_json: Option = row.get("key_topics"); + let key_topics: Option> = key_topics_json + .map(|t| serde_json::from_str(&t)) + .transpose() + .context("Failed to deserialize key_topics")?; + + let decisions_made_json: Option = row.get("decisions_made"); + let decisions_made: Option> = decisions_made_json + .map(|d| serde_json::from_str(&d)) + .transpose() + .context("Failed to deserialize decisions_made")?; + + let code_concepts_json: Option = row.get("code_concepts"); + let code_concepts: Option> = code_concepts_json + .map(|c| serde_json::from_str(&c)) + .transpose() + .context("Failed to deserialize code_concepts")?; + + Ok(TurnSummary { + id: row.get("id"), + session_id: row.get("session_id"), + turn_number: row.get("turn_number"), + start_sequence: row.get("start_sequence"), + end_sequence: row.get("end_sequence"), + user_intent: row.get("user_intent"), + assistant_action: row.get("assistant_action"), + summary: row.get("summary"), + turn_type, + key_topics, + decisions_made, + code_concepts, + started_at, + ended_at, + model_used: row.get("model_used"), + prompt_version: row.get("prompt_version"), + generated_at, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::database::DatabaseManager; + use crate::models::TurnType; + + #[tokio::test] + async fn test_create_and_get_turn_summary() { + let db = DatabaseManager::open_in_memory().await.unwrap(); + let repo = TurnSummaryRepository::new(&db); + let session_id = Uuid::new_v4(); + + // Create test session first + sqlx::query( + r#" + INSERT INTO chat_sessions (id, provider, project_name, start_time, end_time, message_count, file_path, file_hash, state) + VALUES (?, 'ClaudeCode', NULL, '2024-01-01T00:00:00Z', '2024-01-01T01:00:00Z', 5, '/test.jsonl', 'hash1', 'Imported') + "#, + ) + .bind(session_id.to_string()) + .execute(db.pool()) + .await + .unwrap(); + + let now = Utc::now(); + let summary = TurnSummary::new( + session_id.to_string(), + 0, + 1, + 5, + "Add authentication".to_string(), + "Created JWT module".to_string(), + "User wanted auth, Claude created JWT".to_string(), + now, + now, + ) + .with_turn_type(TurnType::Task) + .with_key_topics(vec!["auth".to_string(), "jwt".to_string()]); + + let id = repo.create(&summary).await.unwrap(); + assert!(!id.is_empty()); + + let fetched = repo.get_by_id(&id).await.unwrap().unwrap(); + assert_eq!(fetched.session_id, session_id.to_string()); + assert_eq!(fetched.turn_number, 0); + assert_eq!(fetched.user_intent, "Add authentication"); + assert_eq!(fetched.turn_type, Some(TurnType::Task)); + assert_eq!( + fetched.key_topics, + Some(vec!["auth".to_string(), "jwt".to_string()]) + ); + } + + #[tokio::test] + async fn test_get_by_session() { + let db = DatabaseManager::open_in_memory().await.unwrap(); + let repo = TurnSummaryRepository::new(&db); + let session_id = Uuid::new_v4(); + + // Create test session + sqlx::query( + r#" + INSERT INTO chat_sessions (id, provider, project_name, start_time, end_time, message_count, file_path, file_hash, state) + VALUES (?, 'ClaudeCode', NULL, '2024-01-01T00:00:00Z', '2024-01-01T01:00:00Z', 10, '/test2.jsonl', 'hash2', 'Imported') + "#, + ) + .bind(session_id.to_string()) + .execute(db.pool()) + .await + .unwrap(); + + let now = Utc::now(); + + // Create multiple turns + for i in 0..3 { + let summary = TurnSummary::new( + session_id.to_string(), + i, + i * 3 + 1, + (i + 1) * 3, + format!("Intent {i}"), + format!("Action {i}"), + format!("Summary {i}"), + now, + now, + ); + repo.create(&summary).await.unwrap(); + } + + let turns = repo.get_by_session(&session_id).await.unwrap(); + assert_eq!(turns.len(), 3); + assert_eq!(turns[0].turn_number, 0); + assert_eq!(turns[1].turn_number, 1); + assert_eq!(turns[2].turn_number, 2); + } + + #[tokio::test] + async fn test_count_by_session() { + let db = DatabaseManager::open_in_memory().await.unwrap(); + let repo = TurnSummaryRepository::new(&db); + let session_id = Uuid::new_v4(); + + // Create test session + sqlx::query( + r#" + INSERT INTO chat_sessions (id, provider, project_name, start_time, end_time, message_count, file_path, file_hash, state) + VALUES (?, 'ClaudeCode', NULL, '2024-01-01T00:00:00Z', '2024-01-01T01:00:00Z', 5, '/test3.jsonl', 'hash3', 'Imported') + "#, + ) + .bind(session_id.to_string()) + .execute(db.pool()) + .await + .unwrap(); + + let now = Utc::now(); + let summary = TurnSummary::new( + session_id.to_string(), + 0, + 1, + 5, + "intent".to_string(), + "action".to_string(), + "summary".to_string(), + now, + now, + ); + repo.create(&summary).await.unwrap(); + + let count = repo.count_by_session(&session_id).await.unwrap(); + assert_eq!(count, 1); + } + + #[tokio::test] + async fn test_delete_by_session() { + let db = DatabaseManager::open_in_memory().await.unwrap(); + let repo = TurnSummaryRepository::new(&db); + let session_id = Uuid::new_v4(); + + // Create test session + sqlx::query( + r#" + INSERT INTO chat_sessions (id, provider, project_name, start_time, end_time, message_count, file_path, file_hash, state) + VALUES (?, 'ClaudeCode', NULL, '2024-01-01T00:00:00Z', '2024-01-01T01:00:00Z', 5, '/test4.jsonl', 'hash4', 'Imported') + "#, + ) + .bind(session_id.to_string()) + .execute(db.pool()) + .await + .unwrap(); + + let now = Utc::now(); + let summary = TurnSummary::new( + session_id.to_string(), + 0, + 1, + 5, + "intent".to_string(), + "action".to_string(), + "summary".to_string(), + now, + now, + ); + repo.create(&summary).await.unwrap(); + + let deleted = repo.delete_by_session(&session_id).await.unwrap(); + assert_eq!(deleted, 1); + + let count = repo.count_by_session(&session_id).await.unwrap(); + assert_eq!(count, 0); + } +} diff --git a/crates/retrochat-core/src/models/mod.rs b/crates/retrochat-core/src/models/mod.rs index cd4dd6a..28c51c0 100644 --- a/crates/retrochat-core/src/models/mod.rs +++ b/crates/retrochat-core/src/models/mod.rs @@ -5,7 +5,9 @@ pub mod chat_session; pub mod message; pub mod project; pub mod provider; +pub mod session_summary; pub mod tool_operation; +pub mod turn_summary; pub use analytics::Analytics; pub use analytics_request::{AnalyticsRequest, OperationStatus}; @@ -14,4 +16,6 @@ pub use chat_session::{ChatSession, SessionState}; pub use message::{Message, MessageRole, ToolCall, ToolResult, ToolUse}; pub use project::Project; pub use provider::{ParserType, Provider, ProviderConfig, ProviderRegistry}; +pub use session_summary::{SessionOutcome, SessionSummary as GeneratedSessionSummary}; pub use tool_operation::ToolOperation; +pub use turn_summary::{DetectedTurn, TurnSummary, TurnType}; diff --git a/crates/retrochat-core/src/models/session_summary.rs b/crates/retrochat-core/src/models/session_summary.rs new file mode 100644 index 0000000..ec3c70e --- /dev/null +++ b/crates/retrochat-core/src/models/session_summary.rs @@ -0,0 +1,196 @@ +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Outcome classification for a session +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)] +pub enum SessionOutcome { + /// Session completed successfully + Completed, + /// Session partially completed + Partial, + /// Session was abandoned + Abandoned, + /// Session is ongoing (no clear end) + #[default] + Ongoing, +} + +impl std::fmt::Display for SessionOutcome { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SessionOutcome::Completed => write!(f, "completed"), + SessionOutcome::Partial => write!(f, "partial"), + SessionOutcome::Abandoned => write!(f, "abandoned"), + SessionOutcome::Ongoing => write!(f, "ongoing"), + } + } +} + +impl std::str::FromStr for SessionOutcome { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "completed" => Ok(SessionOutcome::Completed), + "partial" => Ok(SessionOutcome::Partial), + "abandoned" => Ok(SessionOutcome::Abandoned), + "ongoing" => Ok(SessionOutcome::Ongoing), + _ => Err(format!("Unknown session outcome: {s}")), + } + } +} + +/// LLM-generated session-level summary +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SessionSummary { + pub id: String, + pub session_id: String, + + // LLM-generated content + pub title: String, + pub summary: String, + pub primary_goal: Option, + pub outcome: Option, + + // Extracted entities (JSON arrays stored as vectors) + pub key_decisions: Option>, + pub technologies_used: Option>, + pub files_affected: Option>, + + // Generation metadata + pub model_used: Option, + pub prompt_version: i32, + pub generated_at: DateTime, +} + +impl SessionSummary { + /// Create a new SessionSummary with required fields + pub fn new(session_id: String, title: String, summary: String) -> Self { + Self { + id: Uuid::new_v4().to_string(), + session_id, + title, + summary, + primary_goal: None, + outcome: None, + key_decisions: None, + technologies_used: None, + files_affected: None, + model_used: None, + prompt_version: 1, + generated_at: Utc::now(), + } + } + + pub fn with_primary_goal(mut self, goal: String) -> Self { + self.primary_goal = Some(goal); + self + } + + pub fn with_outcome(mut self, outcome: SessionOutcome) -> Self { + self.outcome = Some(outcome); + self + } + + pub fn with_key_decisions(mut self, decisions: Vec) -> Self { + self.key_decisions = Some(decisions); + self + } + + pub fn with_technologies_used(mut self, technologies: Vec) -> Self { + self.technologies_used = Some(technologies); + self + } + + pub fn with_files_affected(mut self, files: Vec) -> Self { + self.files_affected = Some(files); + self + } + + pub fn with_model_used(mut self, model: String) -> Self { + self.model_used = Some(model); + self + } + + pub fn with_prompt_version(mut self, version: i32) -> Self { + self.prompt_version = version; + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_session_outcome_display() { + assert_eq!(SessionOutcome::Completed.to_string(), "completed"); + assert_eq!(SessionOutcome::Partial.to_string(), "partial"); + assert_eq!(SessionOutcome::Abandoned.to_string(), "abandoned"); + assert_eq!(SessionOutcome::Ongoing.to_string(), "ongoing"); + } + + #[test] + fn test_session_outcome_from_str() { + assert_eq!( + "completed".parse::().unwrap(), + SessionOutcome::Completed + ); + assert_eq!( + "partial".parse::().unwrap(), + SessionOutcome::Partial + ); + assert!("invalid".parse::().is_err()); + } + + #[test] + fn test_new_session_summary() { + let session_id = Uuid::new_v4().to_string(); + + let summary = SessionSummary::new( + session_id.clone(), + "JWT Authentication Implementation".to_string(), + "This session implemented JWT authentication for the API.".to_string(), + ); + + assert_eq!(summary.session_id, session_id); + assert_eq!(summary.title, "JWT Authentication Implementation"); + assert!(summary.primary_goal.is_none()); + assert!(summary.outcome.is_none()); + } + + #[test] + fn test_session_summary_builder_pattern() { + let summary = SessionSummary::new( + "session-1".to_string(), + "Title".to_string(), + "Summary".to_string(), + ) + .with_primary_goal("Implement authentication".to_string()) + .with_outcome(SessionOutcome::Completed) + .with_key_decisions(vec!["Used JWT".to_string(), "RS256 signing".to_string()]) + .with_technologies_used(vec!["JWT".to_string(), "bcrypt".to_string()]) + .with_files_affected(vec!["src/auth.rs".to_string()]) + .with_model_used("gemini-1.5-flash".to_string()); + + assert_eq!( + summary.primary_goal, + Some("Implement authentication".to_string()) + ); + assert_eq!(summary.outcome, Some(SessionOutcome::Completed)); + assert_eq!( + summary.key_decisions, + Some(vec!["Used JWT".to_string(), "RS256 signing".to_string()]) + ); + assert_eq!( + summary.technologies_used, + Some(vec!["JWT".to_string(), "bcrypt".to_string()]) + ); + assert_eq!( + summary.files_affected, + Some(vec!["src/auth.rs".to_string()]) + ); + assert_eq!(summary.model_used, Some("gemini-1.5-flash".to_string())); + } +} diff --git a/crates/retrochat-core/src/models/turn_summary.rs b/crates/retrochat-core/src/models/turn_summary.rs new file mode 100644 index 0000000..f90c0b2 --- /dev/null +++ b/crates/retrochat-core/src/models/turn_summary.rs @@ -0,0 +1,268 @@ +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Classification of turn types based on user intent +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)] +pub enum TurnType { + /// User wants to accomplish a specific task + Task, + /// User is asking a question + Question, + /// User is trying to fix an error + ErrorFix, + /// User is clarifying a previous request + Clarification, + /// General discussion or exploration + #[default] + Discussion, +} + +impl std::fmt::Display for TurnType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TurnType::Task => write!(f, "task"), + TurnType::Question => write!(f, "question"), + TurnType::ErrorFix => write!(f, "error_fix"), + TurnType::Clarification => write!(f, "clarification"), + TurnType::Discussion => write!(f, "discussion"), + } + } +} + +impl std::str::FromStr for TurnType { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "task" => Ok(TurnType::Task), + "question" => Ok(TurnType::Question), + "error_fix" => Ok(TurnType::ErrorFix), + "clarification" => Ok(TurnType::Clarification), + "discussion" => Ok(TurnType::Discussion), + _ => Err(format!("Unknown turn type: {s}")), + } + } +} + +/// LLM-generated turn summary with message boundary references +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TurnSummary { + pub id: String, + pub session_id: String, + pub turn_number: i32, + + // Message boundaries (references to messages table via sequence_number) + pub start_sequence: i32, + pub end_sequence: i32, + + // LLM-generated content + pub user_intent: String, + pub assistant_action: String, + pub summary: String, + + // Classification + pub turn_type: Option, + + // Extracted entities (JSON arrays stored as strings) + pub key_topics: Option>, + pub decisions_made: Option>, + pub code_concepts: Option>, + + // Cached timestamps (derived from messages) + pub started_at: DateTime, + pub ended_at: DateTime, + + // Generation metadata + pub model_used: Option, + pub prompt_version: i32, + pub generated_at: DateTime, +} + +impl TurnSummary { + /// Create a new TurnSummary with required fields + #[allow(clippy::too_many_arguments)] + pub fn new( + session_id: String, + turn_number: i32, + start_sequence: i32, + end_sequence: i32, + user_intent: String, + assistant_action: String, + summary: String, + started_at: DateTime, + ended_at: DateTime, + ) -> Self { + Self { + id: Uuid::new_v4().to_string(), + session_id, + turn_number, + start_sequence, + end_sequence, + user_intent, + assistant_action, + summary, + turn_type: None, + key_topics: None, + decisions_made: None, + code_concepts: None, + started_at, + ended_at, + model_used: None, + prompt_version: 1, + generated_at: Utc::now(), + } + } + + pub fn with_turn_type(mut self, turn_type: TurnType) -> Self { + self.turn_type = Some(turn_type); + self + } + + pub fn with_key_topics(mut self, topics: Vec) -> Self { + self.key_topics = Some(topics); + self + } + + pub fn with_decisions_made(mut self, decisions: Vec) -> Self { + self.decisions_made = Some(decisions); + self + } + + pub fn with_code_concepts(mut self, concepts: Vec) -> Self { + self.code_concepts = Some(concepts); + self + } + + pub fn with_model_used(mut self, model: String) -> Self { + self.model_used = Some(model); + self + } + + pub fn with_prompt_version(mut self, version: i32) -> Self { + self.prompt_version = version; + self + } + + /// Get the number of messages in this turn + pub fn message_count(&self) -> i32 { + self.end_sequence - self.start_sequence + 1 + } +} + +/// Detected turn boundaries before summarization +#[derive(Debug, Clone)] +pub struct DetectedTurn { + pub turn_number: i32, + pub start_sequence: i32, + pub end_sequence: i32, + pub started_at: DateTime, + pub ended_at: DateTime, +} + +impl DetectedTurn { + pub fn new( + turn_number: i32, + start_sequence: i32, + end_sequence: i32, + started_at: DateTime, + ended_at: DateTime, + ) -> Self { + Self { + turn_number, + start_sequence, + end_sequence, + started_at, + ended_at, + } + } + + /// Get the number of messages in this turn + pub fn message_count(&self) -> i32 { + self.end_sequence - self.start_sequence + 1 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_turn_type_display() { + assert_eq!(TurnType::Task.to_string(), "task"); + assert_eq!(TurnType::Question.to_string(), "question"); + assert_eq!(TurnType::ErrorFix.to_string(), "error_fix"); + assert_eq!(TurnType::Clarification.to_string(), "clarification"); + assert_eq!(TurnType::Discussion.to_string(), "discussion"); + } + + #[test] + fn test_turn_type_from_str() { + assert_eq!("task".parse::().unwrap(), TurnType::Task); + assert_eq!("question".parse::().unwrap(), TurnType::Question); + assert_eq!("error_fix".parse::().unwrap(), TurnType::ErrorFix); + assert!("invalid".parse::().is_err()); + } + + #[test] + fn test_new_turn_summary() { + let session_id = Uuid::new_v4().to_string(); + let now = Utc::now(); + + let summary = TurnSummary::new( + session_id.clone(), + 0, + 1, + 5, + "Add authentication".to_string(), + "Created JWT auth module".to_string(), + "User wanted auth, Claude created JWT module".to_string(), + now, + now, + ); + + assert_eq!(summary.session_id, session_id); + assert_eq!(summary.turn_number, 0); + assert_eq!(summary.start_sequence, 1); + assert_eq!(summary.end_sequence, 5); + assert_eq!(summary.message_count(), 5); + assert!(summary.turn_type.is_none()); + } + + #[test] + fn test_turn_summary_builder_pattern() { + let now = Utc::now(); + let summary = TurnSummary::new( + "session-1".to_string(), + 0, + 1, + 3, + "intent".to_string(), + "action".to_string(), + "summary".to_string(), + now, + now, + ) + .with_turn_type(TurnType::Task) + .with_key_topics(vec!["auth".to_string(), "jwt".to_string()]) + .with_model_used("gemini-1.5-flash".to_string()); + + assert_eq!(summary.turn_type, Some(TurnType::Task)); + assert_eq!( + summary.key_topics, + Some(vec!["auth".to_string(), "jwt".to_string()]) + ); + assert_eq!(summary.model_used, Some("gemini-1.5-flash".to_string())); + } + + #[test] + fn test_detected_turn() { + let now = Utc::now(); + let turn = DetectedTurn::new(0, 1, 5, now, now); + + assert_eq!(turn.turn_number, 0); + assert_eq!(turn.start_sequence, 1); + assert_eq!(turn.end_sequence, 5); + assert_eq!(turn.message_count(), 5); + } +} diff --git a/crates/retrochat-core/src/services/mod.rs b/crates/retrochat-core/src/services/mod.rs index fe7ea8e..566b9c0 100644 --- a/crates/retrochat-core/src/services/mod.rs +++ b/crates/retrochat-core/src/services/mod.rs @@ -6,6 +6,9 @@ pub mod google_ai; pub mod import_service; pub mod parser_service; pub mod query_service; +pub mod session_summarization; +pub mod turn_detection; +pub mod turn_summarization; pub mod watch_service; pub use analytics::{ @@ -29,4 +32,7 @@ pub use query_service::{ SessionAnalytics, SessionDetailRequest, SessionDetailResponse, SessionFilters, SessionSummary, SessionsQueryRequest, SessionsQueryResponse, }; +pub use session_summarization::SessionSummarizer; +pub use turn_detection::{TurnDetector, TurnMetrics}; +pub use turn_summarization::TurnSummarizer; pub use watch_service::{collect_provider_paths, detect_provider, watch_paths_for_changes}; diff --git a/crates/retrochat-core/src/services/session_summarization.rs b/crates/retrochat-core/src/services/session_summarization.rs new file mode 100644 index 0000000..454aa63 --- /dev/null +++ b/crates/retrochat-core/src/services/session_summarization.rs @@ -0,0 +1,329 @@ +use anyhow::{Context, Result as AnyhowResult}; +use regex::Regex; +use uuid::Uuid; + +use crate::database::{DatabaseManager, SessionSummaryRepository, TurnSummaryRepository}; +use crate::models::session_summary::{SessionOutcome, SessionSummary}; +use crate::models::TurnSummary; +use crate::services::google_ai::GoogleAiClient; + +/// Service for generating LLM-based session summaries from turn summaries +pub struct SessionSummarizer { + turn_summary_repo: TurnSummaryRepository, + session_summary_repo: SessionSummaryRepository, + ai_client: GoogleAiClient, +} + +impl SessionSummarizer { + pub fn new(db: &DatabaseManager, ai_client: GoogleAiClient) -> Self { + Self { + turn_summary_repo: TurnSummaryRepository::new(db), + session_summary_repo: SessionSummaryRepository::new(db), + ai_client, + } + } + + /// Summarize a session from its turn summaries + /// + /// Primary path: Generate from turn summaries (efficient, small input) + /// Fallback path: Not implemented yet (would use raw messages) + pub async fn summarize_session(&self, session_id: &Uuid) -> AnyhowResult { + // Get turn summaries for the session + let turn_summaries = self + .turn_summary_repo + .get_by_session(session_id) + .await + .context("Failed to fetch turn summaries")?; + + if turn_summaries.is_empty() { + anyhow::bail!("No turn summaries found for session. Run turn summarization first."); + } + + // Delete existing session summary if any + self.session_summary_repo + .delete_by_session(session_id) + .await + .context("Failed to delete existing session summary")?; + + // Generate summary from turn summaries + let summary = self + .generate_from_turns(session_id, &turn_summaries) + .await?; + + // Save the summary + self.session_summary_repo + .create(&summary) + .await + .context("Failed to save session summary")?; + + Ok(summary) + } + + /// Generate a session summary from turn summaries + async fn generate_from_turns( + &self, + session_id: &Uuid, + turns: &[TurnSummary], + ) -> AnyhowResult { + let prompt = self.build_session_prompt(turns); + + let analysis_request = crate::services::google_ai::models::AnalysisRequest { + prompt, + max_tokens: Some(1024), + temperature: Some(0.3), + }; + + let response = self.ai_client.analytics(analysis_request).await?; + let parsed = Self::parse_session_response(&response.text)?; + + let summary = SessionSummary::new(session_id.to_string(), parsed.title, parsed.summary) + .with_primary_goal(parsed.primary_goal) + .with_outcome(parsed.outcome) + .with_key_decisions(parsed.key_decisions) + .with_technologies_used(parsed.technologies_used) + .with_files_affected(parsed.files_affected) + .with_model_used("gemini-1.5-flash".to_string()); + + Ok(summary) + } + + /// Build a prompt for session summarization from turn summaries + fn build_session_prompt(&self, turns: &[TurnSummary]) -> String { + let mut turns_text = String::new(); + + for turn in turns { + let turn_type = turn + .turn_type + .as_ref() + .map(|t| t.to_string()) + .unwrap_or_else(|| "unknown".to_string()); + + let topics = turn + .key_topics + .as_ref() + .map(|t| t.join(", ")) + .unwrap_or_default(); + + turns_text.push_str(&format!( + "Turn {num} ({turn_type}): {summary}\n Topics: {topics}\n\n", + num = turn.turn_number + 1, + summary = turn.summary, + )); + } + + format!( + r#"Analyze the following session summary (derived from individual turn summaries) and provide a comprehensive session overview. + +## Session Turns + +{turns_text} + +## Task + +Create a high-level summary of this entire coding session by synthesizing the turn summaries above. + +## Required Output Format + +Your response MUST follow this exact format: + +TITLE: [A concise title for the session, max 60 characters, e.g., "JWT Authentication Implementation"] + +SUMMARY: [A 2-3 sentence overview of what was accomplished in the session] + +PRIMARY_GOAL: [The main objective the user was trying to achieve] + +OUTCOME: [One of: completed, partial, abandoned, ongoing] + +KEY_DECISIONS: [Comma-separated list of important decisions made] + +TECHNOLOGIES_USED: [Comma-separated list of technologies, frameworks, or tools used] + +FILES_AFFECTED: [Comma-separated list of key files that were created or modified] + +Example: + +TITLE: JWT Authentication Implementation + +SUMMARY: Implemented complete JWT-based authentication system with middleware, token validation, and refresh token support. Added comprehensive tests and updated API documentation. + +PRIMARY_GOAL: Add secure authentication to the REST API + +OUTCOME: completed + +KEY_DECISIONS: Used RS256 over HS256 for token signing, Added refresh tokens for better UX + +TECHNOLOGIES_USED: JWT, bcrypt, axum, tokio + +FILES_AFFECTED: src/auth/mod.rs, src/middleware/auth.rs, tests/auth_tests.rs"#, + turns_text = turns_text.trim() + ) + } + + /// Parse the LLM response for session summarization + fn parse_session_response(response: &str) -> AnyhowResult { + let title = Self::extract_field(response, "TITLE") + .unwrap_or_else(|| "Untitled Session".to_string()); + + let summary = Self::extract_field(response, "SUMMARY") + .unwrap_or_else(|| "No summary available".to_string()); + + let primary_goal = Self::extract_field(response, "PRIMARY_GOAL") + .unwrap_or_else(|| "Unknown goal".to_string()); + + let outcome_str = Self::extract_field(response, "OUTCOME").unwrap_or_default(); + let outcome = outcome_str + .to_lowercase() + .parse::() + .unwrap_or(SessionOutcome::Ongoing); + + let key_decisions = + Self::parse_list(&Self::extract_field(response, "KEY_DECISIONS").unwrap_or_default()); + + let technologies_used = Self::parse_list( + &Self::extract_field(response, "TECHNOLOGIES_USED").unwrap_or_default(), + ); + + let files_affected = + Self::parse_list(&Self::extract_field(response, "FILES_AFFECTED").unwrap_or_default()); + + Ok(ParsedSessionResponse { + title, + summary, + primary_goal, + outcome, + key_decisions, + technologies_used, + files_affected, + }) + } + + /// Extract a field from the response + fn extract_field(response: &str, field_name: &str) -> Option { + let pattern = format!(r"(?i){}:\s*(.+)", regex::escape(field_name)); + let re = Regex::new(&pattern).ok()?; + + re.captures(response) + .and_then(|caps| caps.get(1)) + .map(|m| m.as_str().trim().to_string()) + } + + /// Parse a comma-separated list into a Vec + fn parse_list(input: &str) -> Vec { + input + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect() + } + + /// Check if a session has been summarized + pub async fn is_session_summarized(&self, session_id: &Uuid) -> AnyhowResult { + self.session_summary_repo + .exists_for_session(session_id) + .await + } + + /// Get existing session summary + pub async fn get_session_summary( + &self, + session_id: &Uuid, + ) -> AnyhowResult> { + self.session_summary_repo.get_by_session(session_id).await + } +} + +/// Parsed response from session summarization LLM call +struct ParsedSessionResponse { + title: String, + summary: String, + primary_goal: String, + outcome: SessionOutcome, + key_decisions: Vec, + technologies_used: Vec, + files_affected: Vec, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_extract_field() { + let response = r#" +TITLE: JWT Authentication +SUMMARY: Implemented JWT auth +OUTCOME: completed +"#; + + assert_eq!( + SessionSummarizer::extract_field(response, "TITLE"), + Some("JWT Authentication".to_string()) + ); + assert_eq!( + SessionSummarizer::extract_field(response, "OUTCOME"), + Some("completed".to_string()) + ); + } + + #[test] + fn test_parse_list() { + let input = "JWT, bcrypt, axum, tokio"; + let result = SessionSummarizer::parse_list(input); + assert_eq!( + result, + vec![ + "JWT".to_string(), + "bcrypt".to_string(), + "axum".to_string(), + "tokio".to_string() + ] + ); + } + + #[test] + fn test_parse_list_empty() { + let input = ""; + let result = SessionSummarizer::parse_list(input); + assert!(result.is_empty()); + } + + #[test] + fn test_parse_session_response() { + let response = r#" +TITLE: User Authentication System + +SUMMARY: Built a complete authentication system with login, registration, and password reset functionality. + +PRIMARY_GOAL: Implement user authentication + +OUTCOME: completed + +KEY_DECISIONS: Used JWT for sessions, Added email verification + +TECHNOLOGIES_USED: JWT, bcrypt, sendgrid + +FILES_AFFECTED: src/auth.rs, src/routes/auth.rs +"#; + + let parsed = SessionSummarizer::parse_session_response(response).unwrap(); + + assert_eq!(parsed.title, "User Authentication System"); + assert!(parsed.summary.contains("authentication")); + assert_eq!(parsed.primary_goal, "Implement user authentication"); + assert_eq!(parsed.outcome, SessionOutcome::Completed); + assert_eq!(parsed.key_decisions.len(), 2); + assert_eq!(parsed.technologies_used.len(), 3); + assert_eq!(parsed.files_affected.len(), 2); + } + + #[test] + fn test_parse_session_response_missing_fields() { + let response = "Some malformed response"; + + let parsed = SessionSummarizer::parse_session_response(response).unwrap(); + + assert_eq!(parsed.title, "Untitled Session"); + assert_eq!(parsed.summary, "No summary available"); + assert_eq!(parsed.outcome, SessionOutcome::Ongoing); + } +} diff --git a/crates/retrochat-core/src/services/turn_detection.rs b/crates/retrochat-core/src/services/turn_detection.rs new file mode 100644 index 0000000..51780fa --- /dev/null +++ b/crates/retrochat-core/src/services/turn_detection.rs @@ -0,0 +1,390 @@ +use anyhow::{Context, Result as AnyhowResult}; + +use uuid::Uuid; + +use crate::database::{DatabaseManager, MessageRepository}; +use crate::models::message::MessageType; +use crate::models::{DetectedTurn, Message, MessageRole}; + +/// Service for detecting turn boundaries within chat sessions +/// +/// A turn is defined as starting with a User message and includes all following messages +/// until the next User message. +pub struct TurnDetector { + message_repo: MessageRepository, +} + +impl TurnDetector { + pub fn new(db: &DatabaseManager) -> Self { + Self { + message_repo: MessageRepository::new(db), + } + } + + /// Detect all turns in a session + /// + /// Turn boundary rules: + /// - New turn starts with User message (SimpleMessage or SlashCommand) + /// - Merge consecutive User messages into single turn + /// - Handle edge case: session starting with Assistant (turn_number = 0) + pub async fn detect_turns(&self, session_id: &Uuid) -> AnyhowResult> { + let messages = self + .message_repo + .get_by_session(session_id) + .await + .context("Failed to fetch messages for session")?; + + if messages.is_empty() { + return Ok(Vec::new()); + } + + Ok(Self::detect_turns_from_messages(&messages)) + } + + /// Pure function to detect turns from a list of messages + /// This is separated for easier testing + fn detect_turns_from_messages(messages: &[Message]) -> Vec { + if messages.is_empty() { + return Vec::new(); + } + + let mut turns = Vec::new(); + let mut current_turn_start: Option = None; + let mut turn_number = 0; + + // Check if session starts with a non-User message (edge case) + if !Self::is_turn_boundary(&messages[0]) { + // Create turn 0 for system-initiated conversation + current_turn_start = Some(0); + } + + for (i, message) in messages.iter().enumerate() { + if Self::is_turn_boundary(message) { + // Close previous turn if exists + if let Some(start_idx) = current_turn_start { + // Don't create a turn if we're at the same position + // (this handles consecutive User messages) + if start_idx < i { + let turn = Self::create_turn( + turn_number, + &messages[start_idx..i], + messages[start_idx].sequence_number as i32, + messages[i - 1].sequence_number as i32, + ); + turns.push(turn); + turn_number += 1; + } + } + current_turn_start = Some(i); + } + } + + // Close the last turn + if let Some(start_idx) = current_turn_start { + let turn = Self::create_turn( + turn_number, + &messages[start_idx..], + messages[start_idx].sequence_number as i32, + messages.last().unwrap().sequence_number as i32, + ); + turns.push(turn); + } + + turns + } + + /// Check if a message starts a new turn + fn is_turn_boundary(message: &Message) -> bool { + matches!(message.role, MessageRole::User) + && matches!( + message.message_type, + MessageType::SimpleMessage | MessageType::SlashCommand + ) + } + + /// Create a DetectedTurn from a slice of messages + fn create_turn( + turn_number: i32, + messages: &[Message], + start_sequence: i32, + end_sequence: i32, + ) -> DetectedTurn { + let started_at = messages.first().map(|m| m.timestamp).unwrap_or_default(); + let ended_at = messages.last().map(|m| m.timestamp).unwrap_or(started_at); + + DetectedTurn::new( + turn_number, + start_sequence, + end_sequence, + started_at, + ended_at, + ) + } +} + +/// Metrics computed on-demand for a turn +#[derive(Debug, Clone, Default)] +pub struct TurnMetrics { + pub message_count: i32, + pub user_message_count: i32, + pub assistant_message_count: i32, + pub system_message_count: i32, + pub tool_request_count: i32, + pub tool_result_count: i32, + pub thinking_count: i32, + pub total_tokens: i64, +} + +impl TurnMetrics { + /// Compute metrics for a turn from its messages + pub fn from_messages(messages: &[Message]) -> Self { + let mut metrics = Self::default(); + + for message in messages { + metrics.message_count += 1; + + match message.role { + MessageRole::User => metrics.user_message_count += 1, + MessageRole::Assistant => metrics.assistant_message_count += 1, + MessageRole::System => metrics.system_message_count += 1, + } + + match message.message_type { + MessageType::ToolRequest => metrics.tool_request_count += 1, + MessageType::ToolResult => metrics.tool_result_count += 1, + MessageType::Thinking => metrics.thinking_count += 1, + _ => {} + } + + if let Some(tokens) = message.token_count { + metrics.total_tokens += tokens as i64; + } + } + + metrics + } +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::Utc; + use uuid::Uuid; + + fn create_message( + session_id: Uuid, + sequence: u32, + role: MessageRole, + message_type: MessageType, + ) -> Message { + Message::new( + session_id, + role, + format!("Message {sequence}"), + Utc::now(), + sequence, + ) + .with_message_type(message_type) + } + + #[test] + fn test_empty_messages() { + let turns = TurnDetector::detect_turns_from_messages(&[]); + assert!(turns.is_empty()); + } + + #[test] + fn test_single_user_turn() { + let session_id = Uuid::new_v4(); + let messages = vec![ + create_message(session_id, 1, MessageRole::User, MessageType::SimpleMessage), + create_message( + session_id, + 2, + MessageRole::Assistant, + MessageType::SimpleMessage, + ), + create_message( + session_id, + 3, + MessageRole::Assistant, + MessageType::ToolRequest, + ), + create_message(session_id, 4, MessageRole::System, MessageType::ToolResult), + ]; + + let turns = TurnDetector::detect_turns_from_messages(&messages); + assert_eq!(turns.len(), 1); + assert_eq!(turns[0].turn_number, 0); + assert_eq!(turns[0].start_sequence, 1); + assert_eq!(turns[0].end_sequence, 4); + assert_eq!(turns[0].message_count(), 4); + } + + #[test] + fn test_multiple_turns() { + let session_id = Uuid::new_v4(); + let messages = vec![ + // Turn 0 + create_message(session_id, 1, MessageRole::User, MessageType::SimpleMessage), + create_message( + session_id, + 2, + MessageRole::Assistant, + MessageType::SimpleMessage, + ), + // Turn 1 + create_message(session_id, 3, MessageRole::User, MessageType::SimpleMessage), + create_message( + session_id, + 4, + MessageRole::Assistant, + MessageType::SimpleMessage, + ), + create_message( + session_id, + 5, + MessageRole::Assistant, + MessageType::ToolRequest, + ), + // Turn 2 + create_message(session_id, 6, MessageRole::User, MessageType::SlashCommand), + create_message( + session_id, + 7, + MessageRole::Assistant, + MessageType::SimpleMessage, + ), + ]; + + let turns = TurnDetector::detect_turns_from_messages(&messages); + assert_eq!(turns.len(), 3); + + assert_eq!(turns[0].turn_number, 0); + assert_eq!(turns[0].start_sequence, 1); + assert_eq!(turns[0].end_sequence, 2); + + assert_eq!(turns[1].turn_number, 1); + assert_eq!(turns[1].start_sequence, 3); + assert_eq!(turns[1].end_sequence, 5); + + assert_eq!(turns[2].turn_number, 2); + assert_eq!(turns[2].start_sequence, 6); + assert_eq!(turns[2].end_sequence, 7); + } + + #[test] + fn test_session_starts_with_assistant() { + let session_id = Uuid::new_v4(); + let messages = vec![ + // Turn 0 (system-initiated) + create_message( + session_id, + 1, + MessageRole::Assistant, + MessageType::SimpleMessage, + ), + create_message( + session_id, + 2, + MessageRole::Assistant, + MessageType::ToolRequest, + ), + // Turn 1 + create_message(session_id, 3, MessageRole::User, MessageType::SimpleMessage), + create_message( + session_id, + 4, + MessageRole::Assistant, + MessageType::SimpleMessage, + ), + ]; + + let turns = TurnDetector::detect_turns_from_messages(&messages); + assert_eq!(turns.len(), 2); + + assert_eq!(turns[0].turn_number, 0); + assert_eq!(turns[0].start_sequence, 1); + assert_eq!(turns[0].end_sequence, 2); + + assert_eq!(turns[1].turn_number, 1); + assert_eq!(turns[1].start_sequence, 3); + assert_eq!(turns[1].end_sequence, 4); + } + + #[test] + fn test_slash_command_starts_turn() { + let session_id = Uuid::new_v4(); + let messages = vec![ + create_message(session_id, 1, MessageRole::User, MessageType::SlashCommand), + create_message( + session_id, + 2, + MessageRole::Assistant, + MessageType::SimpleMessage, + ), + ]; + + let turns = TurnDetector::detect_turns_from_messages(&messages); + assert_eq!(turns.len(), 1); + assert_eq!(turns[0].start_sequence, 1); + } + + #[test] + fn test_tool_result_from_user_does_not_start_turn() { + let session_id = Uuid::new_v4(); + let messages = vec![ + create_message(session_id, 1, MessageRole::User, MessageType::SimpleMessage), + create_message( + session_id, + 2, + MessageRole::Assistant, + MessageType::ToolRequest, + ), + // User providing tool result should not start new turn + create_message(session_id, 3, MessageRole::User, MessageType::ToolResult), + create_message( + session_id, + 4, + MessageRole::Assistant, + MessageType::SimpleMessage, + ), + ]; + + let turns = TurnDetector::detect_turns_from_messages(&messages); + assert_eq!(turns.len(), 1); + assert_eq!(turns[0].message_count(), 4); + } + + #[test] + fn test_turn_metrics() { + let session_id = Uuid::new_v4(); + let messages = vec![ + create_message(session_id, 1, MessageRole::User, MessageType::SimpleMessage), + create_message(session_id, 2, MessageRole::Assistant, MessageType::Thinking), + create_message( + session_id, + 3, + MessageRole::Assistant, + MessageType::ToolRequest, + ), + create_message(session_id, 4, MessageRole::System, MessageType::ToolResult), + create_message( + session_id, + 5, + MessageRole::Assistant, + MessageType::SimpleMessage, + ), + ]; + + let metrics = TurnMetrics::from_messages(&messages); + + assert_eq!(metrics.message_count, 5); + assert_eq!(metrics.user_message_count, 1); + assert_eq!(metrics.assistant_message_count, 3); + assert_eq!(metrics.system_message_count, 1); + assert_eq!(metrics.tool_request_count, 1); + assert_eq!(metrics.tool_result_count, 1); + assert_eq!(metrics.thinking_count, 1); + } +} diff --git a/crates/retrochat-core/src/services/turn_summarization.rs b/crates/retrochat-core/src/services/turn_summarization.rs new file mode 100644 index 0000000..307bf07 --- /dev/null +++ b/crates/retrochat-core/src/services/turn_summarization.rs @@ -0,0 +1,411 @@ +use anyhow::{Context, Result as AnyhowResult}; +use regex::Regex; +use uuid::Uuid; + +use crate::database::{DatabaseManager, MessageRepository, TurnSummaryRepository}; +use crate::models::message::MessageType; +use crate::models::{DetectedTurn, Message, MessageRole, TurnSummary, TurnType}; +use crate::services::google_ai::GoogleAiClient; +use crate::services::turn_detection::TurnDetector; + +/// Service for generating LLM-based turn summaries +pub struct TurnSummarizer { + message_repo: MessageRepository, + turn_summary_repo: TurnSummaryRepository, + turn_detector: TurnDetector, + ai_client: GoogleAiClient, +} + +impl TurnSummarizer { + pub fn new(db: &DatabaseManager, ai_client: GoogleAiClient) -> Self { + Self { + message_repo: MessageRepository::new(db), + turn_summary_repo: TurnSummaryRepository::new(db), + turn_detector: TurnDetector::new(db), + ai_client, + } + } + + /// Summarize all turns for a session + /// + /// Returns the number of turns summarized + pub async fn summarize_session(&self, session_id: &Uuid) -> AnyhowResult { + // Detect turns + let turns = self + .turn_detector + .detect_turns(session_id) + .await + .context("Failed to detect turns")?; + + if turns.is_empty() { + return Ok(0); + } + + // Get all messages for the session + let messages = self + .message_repo + .get_by_session(session_id) + .await + .context("Failed to fetch messages")?; + + // Delete existing summaries for this session + self.turn_summary_repo + .delete_by_session(session_id) + .await + .context("Failed to delete existing turn summaries")?; + + let mut summarized_count = 0; + + // Summarize each turn + for turn in &turns { + let turn_messages: Vec<&Message> = messages + .iter() + .filter(|m| { + m.sequence_number >= turn.start_sequence as u32 + && m.sequence_number <= turn.end_sequence as u32 + }) + .collect(); + + if turn_messages.is_empty() { + continue; + } + + match self.summarize_turn(session_id, turn, &turn_messages).await { + Ok(summary) => { + self.turn_summary_repo + .create(&summary) + .await + .context("Failed to save turn summary")?; + summarized_count += 1; + } + Err(e) => { + tracing::warn!( + "Failed to summarize turn {} for session {}: {}", + turn.turn_number, + session_id, + e + ); + } + } + } + + Ok(summarized_count) + } + + /// Summarize a single turn + async fn summarize_turn( + &self, + session_id: &Uuid, + turn: &DetectedTurn, + messages: &[&Message], + ) -> AnyhowResult { + let prompt = self.build_turn_prompt(messages); + + let analysis_request = crate::services::google_ai::models::AnalysisRequest { + prompt, + max_tokens: Some(1024), + temperature: Some(0.3), // Lower temperature for more consistent output + }; + + let response = self.ai_client.analytics(analysis_request).await?; + let parsed = Self::parse_turn_response(&response.text)?; + + let summary = TurnSummary::new( + session_id.to_string(), + turn.turn_number, + turn.start_sequence, + turn.end_sequence, + parsed.user_intent, + parsed.assistant_action, + parsed.summary, + turn.started_at, + turn.ended_at, + ) + .with_turn_type(parsed.turn_type) + .with_key_topics(parsed.key_topics) + .with_model_used("gemini-1.5-flash".to_string()); + + Ok(summary) + } + + /// Build a prompt for turn summarization + fn build_turn_prompt(&self, messages: &[&Message]) -> String { + let mut transcript = String::new(); + + for msg in messages { + let role = match msg.role { + MessageRole::User => "USER", + MessageRole::Assistant => "ASSISTANT", + MessageRole::System => "SYSTEM", + }; + + let msg_type = match msg.message_type { + MessageType::ToolRequest => " [Tool Request]", + MessageType::ToolResult => " [Tool Result]", + MessageType::Thinking => " [Thinking]", + MessageType::SlashCommand => " [Command]", + MessageType::SimpleMessage => "", + }; + + transcript.push_str(&format!( + "[{role}{msg_type}]: {content}\n\n", + content = Self::truncate_content(&msg.content, 1000) + )); + } + + format!( + r#"Analyze the following turn from a coding assistant conversation and provide a structured summary. + +## Turn Transcript + +{transcript} + +## Task + +Summarize this turn by extracting: +1. What the user wanted to accomplish +2. What the assistant did in response +3. A brief combined summary +4. The type of turn (task, question, error_fix, clarification, or discussion) +5. Key topics discussed + +## Required Output Format + +Your response MUST follow this exact format: + +USER_INTENT: [One sentence describing what the user wanted] + +ASSISTANT_ACTION: [One sentence describing what the assistant did] + +SUMMARY: [One sentence combining the above into a cohesive summary] + +TURN_TYPE: [One of: task, question, error_fix, clarification, discussion] + +KEY_TOPICS: [Comma-separated list of 2-5 key topics/technologies mentioned] + +Example: + +USER_INTENT: User wanted to add JWT authentication to the API endpoints. + +ASSISTANT_ACTION: Created auth middleware and JWT validation logic. + +SUMMARY: Implemented JWT authentication with middleware for protecting API routes. + +TURN_TYPE: task + +KEY_TOPICS: JWT, authentication, middleware, API security"#, + transcript = transcript.trim() + ) + } + + /// Truncate content to a maximum number of characters, preserving word boundaries + /// Uses char_indices() to safely handle multi-byte UTF-8 characters + fn truncate_content(content: &str, max_chars: usize) -> String { + let char_count = content.chars().count(); + if char_count <= max_chars { + return content.to_string(); + } + + // Find the byte index for the max_chars boundary + let end_idx = content + .char_indices() + .nth(max_chars) + .map(|(idx, _)| idx) + .unwrap_or(content.len()); + + let truncated = &content[..end_idx]; + + // Try to break at a word boundary + if let Some(last_space) = truncated.rfind(char::is_whitespace) { + format!("{}...", &truncated[..last_space]) + } else { + format!("{}...", truncated) + } + } + + /// Parse the LLM response for turn summarization + fn parse_turn_response(response: &str) -> AnyhowResult { + let user_intent = Self::extract_field(response, "USER_INTENT") + .unwrap_or_else(|| "Unknown intent".to_string()); + + let assistant_action = Self::extract_field(response, "ASSISTANT_ACTION") + .unwrap_or_else(|| "Unknown action".to_string()); + + let summary = Self::extract_field(response, "SUMMARY") + .unwrap_or_else(|| format!("{} -> {}", user_intent, assistant_action)); + + let turn_type_str = Self::extract_field(response, "TURN_TYPE").unwrap_or_default(); + let turn_type = turn_type_str + .to_lowercase() + .parse::() + .unwrap_or(TurnType::Discussion); + + let key_topics_str = Self::extract_field(response, "KEY_TOPICS").unwrap_or_default(); + let key_topics: Vec = key_topics_str + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect(); + + Ok(ParsedTurnResponse { + user_intent, + assistant_action, + summary, + turn_type, + key_topics, + }) + } + + /// Extract a field from the response + fn extract_field(response: &str, field_name: &str) -> Option { + let pattern = format!(r"(?i){}:\s*(.+)", regex::escape(field_name)); + let re = Regex::new(&pattern).ok()?; + + re.captures(response) + .and_then(|caps| caps.get(1)) + .map(|m| m.as_str().trim().to_string()) + } + + /// Check if a session has been summarized + pub async fn is_session_summarized(&self, session_id: &Uuid) -> AnyhowResult { + let count = self.turn_summary_repo.count_by_session(session_id).await?; + Ok(count > 0) + } + + /// Get existing turn summaries for a session + pub async fn get_session_turns(&self, session_id: &Uuid) -> AnyhowResult> { + self.turn_summary_repo.get_by_session(session_id).await + } +} + +/// Parsed response from turn summarization LLM call +struct ParsedTurnResponse { + user_intent: String, + assistant_action: String, + summary: String, + turn_type: TurnType, + key_topics: Vec, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_truncate_content_short() { + let content = "Short content"; + assert_eq!(TurnSummarizer::truncate_content(content, 100), content); + } + + #[test] + fn test_truncate_content_long() { + let content = "This is a very long piece of content that needs to be truncated"; + let truncated = TurnSummarizer::truncate_content(content, 20); + assert!(truncated.chars().count() <= 23); // 20 chars + "..." + assert!(truncated.ends_with("...")); + } + + #[test] + fn test_truncate_content_unicode_emoji() { + // Each emoji is one character but multiple bytes + let content = "Hello 🎉🎊🎁🎈🎂 World"; + // Truncate at 10 chars: "Hello 🎉🎊🎁🎈" (10 chars) + let truncated = TurnSummarizer::truncate_content(content, 10); + assert!(truncated.ends_with("...")); + // Should not panic and should handle multi-byte chars correctly + assert!(truncated.chars().count() <= 13); // 10 + "..." + } + + #[test] + fn test_truncate_content_unicode_cjk() { + // CJK characters are multi-byte + let content = "안녕하세요 세계입니다"; + // Truncate at 5 chars + let truncated = TurnSummarizer::truncate_content(content, 5); + assert!(truncated.ends_with("...")); + // "안녕하세요" is 5 chars, but there's a space so it may break at "안녕하세요" + assert!(truncated.chars().count() <= 8); // 5 + "..." + } + + #[test] + fn test_truncate_content_unicode_mixed() { + // Mix of ASCII, emoji, and CJK + let content = "Hello世界🌍Test"; + let truncated = TurnSummarizer::truncate_content(content, 8); + assert!(truncated.ends_with("...")); + // Should handle mixed content without panic + } + + #[test] + fn test_extract_field() { + let response = r#" +USER_INTENT: Add authentication +ASSISTANT_ACTION: Created JWT module +SUMMARY: Implemented JWT auth +TURN_TYPE: task +KEY_TOPICS: JWT, auth, middleware +"#; + + assert_eq!( + TurnSummarizer::extract_field(response, "USER_INTENT"), + Some("Add authentication".to_string()) + ); + assert_eq!( + TurnSummarizer::extract_field(response, "TURN_TYPE"), + Some("task".to_string()) + ); + assert_eq!( + TurnSummarizer::extract_field(response, "KEY_TOPICS"), + Some("JWT, auth, middleware".to_string()) + ); + } + + #[test] + fn test_parse_turn_response() { + let response = r#" +USER_INTENT: User wanted to add logging to the application +ASSISTANT_ACTION: Created a logging module with tracing support +SUMMARY: Implemented structured logging using tracing crate +TURN_TYPE: task +KEY_TOPICS: logging, tracing, observability +"#; + + let parsed = TurnSummarizer::parse_turn_response(response).unwrap(); + + assert_eq!( + parsed.user_intent, + "User wanted to add logging to the application" + ); + assert_eq!( + parsed.assistant_action, + "Created a logging module with tracing support" + ); + assert_eq!( + parsed.summary, + "Implemented structured logging using tracing crate" + ); + assert_eq!(parsed.turn_type, TurnType::Task); + assert_eq!( + parsed.key_topics, + vec![ + "logging".to_string(), + "tracing".to_string(), + "observability".to_string() + ] + ); + } + + #[test] + fn test_parse_turn_response_missing_fields() { + let response = r#" +Some malformed response without proper fields +"#; + + let parsed = TurnSummarizer::parse_turn_response(response).unwrap(); + + assert_eq!(parsed.user_intent, "Unknown intent"); + assert_eq!(parsed.assistant_action, "Unknown action"); + assert_eq!(parsed.turn_type, TurnType::Discussion); + assert!(parsed.key_topics.is_empty()); + } +}