From 4e9045fc6eb0f73c90f56096de17a8ad36d7bb9c Mon Sep 17 00:00:00 2001 From: Jacob Heider Date: Mon, 8 Sep 2025 20:45:22 -0400 Subject: [PATCH 1/2] Improves leaderboard performance with caching Adds a caching layer to the leaderboard endpoint to reduce database load and improve response times. The cache stores project data and invalidates entries after a configurable time-to-live (TTL). Fetches missing projects from the database and updates the cache. Increments API version. --- api/Cargo.toml | 3 +- api/src/app_state.rs | 26 ++++++++++++++++ api/src/handlers.rs | 73 +++++++++++++++++++++++++++++++++++++++----- api/src/main.rs | 4 +++ api/src/utils.rs | 25 ++++++++++++++- 5 files changed, 122 insertions(+), 9 deletions(-) diff --git a/api/Cargo.toml b/api/Cargo.toml index eaa7455f..1687cd65 100644 --- a/api/Cargo.toml +++ b/api/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "chai-api" -version = "1.2.1" +version = "1.3.0" edition = "2021" authors = ["Jacob Heider "] description = "A simple REST API for the CHAI database" @@ -25,3 +25,4 @@ tokio-postgres = { version = "0.7", features = [ ] } deadpool-postgres = "0.10.0" url = "2.5.2" +dashmap = "6.1.0" diff --git a/api/src/app_state.rs b/api/src/app_state.rs index b62ec9c7..3c0693bf 100644 --- a/api/src/app_state.rs +++ b/api/src/app_state.rs @@ -1,7 +1,33 @@ +use dashmap::DashMap; use deadpool_postgres::Pool; +use serde_json::Value; use std::sync::Arc; +use std::time::{Duration, Instant}; +use uuid::Uuid; + +const TTL: Duration = Duration::from_secs(3600); // 1 hour + +#[derive(Clone)] +pub struct ProjectCacheEntry { + pub data: Arc, + pub created_at: Instant, +} + +impl ProjectCacheEntry { + pub fn new(data: Value) -> Self { + Self { + data: Arc::new(data), + created_at: Instant::now(), + } + } + + pub fn is_expired(&self) -> bool { + self.created_at.elapsed() > TTL + } +} pub struct AppState { pub pool: Pool, pub tables: Arc>, + pub project_cache: Arc>, } diff --git a/api/src/handlers.rs b/api/src/handlers.rs index 201347f3..17513be4 100644 --- a/api/src/handlers.rs +++ b/api/src/handlers.rs @@ -1,11 +1,12 @@ use actix_web::{get, post, web, HttpResponse, Responder}; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; +use std::sync::Arc; use tokio_postgres::error::SqlState; use uuid::Uuid; use crate::app_state::AppState; -use crate::utils::{get_column_names, rows_to_json, Pagination}; +use crate::utils::{get_cached_projects, get_column_names, rows_to_json, Pagination}; const RESPONSE_LIMIT: i64 = 1000; @@ -317,7 +318,7 @@ pub async fn list_projects_by_id( WHERE cp2.canon_id = c.id ) AS "packageManagers" FROM canons c - JOIN urls u_homepage ON u_homepage.id = c.url_id + JOIN urls u_homepage ON u_homepage.id = c.url_id JOIN canon_packages cp ON cp.canon_id = c.id JOIN package_urls pu ON pu.package_id = cp.package_id JOIN urls u_source ON pu.url_id = u_source.id @@ -415,8 +416,10 @@ pub async fn get_leaderboard( req: web::Json, data: web::Data, ) -> impl Responder { + let limit = req.limit.clamp(1, RESPONSE_LIMIT); + let Some(project_ids) = req.project_ids.as_deref() else { - return get_top_projects(data, req.limit).await; + return get_top_projects(data, limit).await; }; if project_ids.len() > RESPONSE_LIMIT as usize { @@ -425,8 +428,16 @@ pub async fn get_leaderboard( })); } - let limit = req.limit.clamp(1, RESPONSE_LIMIT); + // Get cached projects and identify missing ones + let (cached_projects, missing_ids) = + get_cached_projects(data.project_cache.clone(), project_ids); + // If we have all projects cached, return them sorted + if missing_ids.is_empty() { + return sort_truncate_and_return(cached_projects, limit); + } + + // Query for missing projects let query = r#" SELECT * FROM ( @@ -444,6 +455,7 @@ pub async fn get_leaderboard( JOIN package_managers pm2 ON p2.package_manager_id = pm2.id JOIN sources s ON pm2.source_id = s.id WHERE cp2.canon_id = c.id + AND cp2.canon_id = ANY($1::uuid[]) ) AS "packageManagers" FROM canons c JOIN urls u_homepage ON c.url_id = u_homepage.id @@ -462,10 +474,35 @@ pub async fn get_leaderboard( LIMIT $2"#; match data.pool.get().await { - Ok(client) => match client.query(query, &[&project_ids, &limit]).await { + Ok(client) => match client.query(query, &[&missing_ids, &limit]).await { Ok(rows) => { - let json = rows_to_json(&rows); - HttpResponse::Ok().json(json) + let fresh_projects = rows_to_json(&rows); + + // Cache the fresh projects + for project in &fresh_projects { + if let Some(project_id) = project.get("projectId").and_then(|v| v.as_str()) { + if let Ok(uuid) = Uuid::parse_str(project_id) { + data.project_cache.insert( + uuid, + crate::app_state::ProjectCacheEntry::new(project.clone()), + ); + } else { + log::info!("Failed to parse project ID as UUID: {}", project_id); + } + } else { + log::info!("No projectId found in project: {:?}", project); + } + } + + // Combine cached and fresh projects - keep Arc for cached ones + let mut all_projects: Vec> = cached_projects; + + // Convert fresh projects to Arc to match the type + let fresh_arcs: Vec> = + fresh_projects.into_iter().map(Arc::new).collect(); + all_projects.extend(fresh_arcs); + + sort_truncate_and_return(all_projects, limit) } Err(e) => { log::error!("Database query error: {e}"); @@ -481,6 +518,28 @@ pub async fn get_leaderboard( } } +// Helper function to sort, truncate, and return the final response +fn sort_truncate_and_return(projects: Vec>, limit: i64) -> actix_web::HttpResponse { + let mut projects = projects; + + // Sort projects by teaRank (descending) - Arc derefs to Value + projects.sort_by(|a, b| { + let rank_a = a.get("teaRank").and_then(|v| v.as_str()).unwrap_or("0"); + let rank_b = b.get("teaRank").and_then(|v| v.as_str()).unwrap_or("0"); + rank_b.cmp(rank_a) + }); + + // Apply limit + projects.truncate(limit as usize); + + // Convert to Vec only for the final response - Arc doesn't implement Serialize + let final_projects: Vec = projects + .into_iter() + .map(|arc_val| (*arc_val).clone()) + .collect(); + actix_web::HttpResponse::Ok().json(final_projects) +} + async fn get_top_projects(data: web::Data, limit: i64) -> HttpResponse { // get client let Ok(client) = data.pool.get().await else { diff --git a/api/src/main.rs b/api/src/main.rs index 0d7e0ac1..506c9964 100644 --- a/api/src/main.rs +++ b/api/src/main.rs @@ -5,6 +5,7 @@ mod logging; mod utils; use actix_web::{web, App, HttpServer}; +use dashmap::DashMap; use dotenv::dotenv; use std::env; use std::sync::Arc; @@ -26,6 +27,8 @@ async fn main() -> std::io::Result<()> { let bind_address = format!("{host}:{port}"); let (pool, tables) = db::initialize_db().await; + // Cache for project data to reduce database load on leaderboard routes + let project_cache = Arc::new(DashMap::new()); log::info!("Available tables: {tables:?}"); log::info!("Starting server at http://{bind_address}"); @@ -36,6 +39,7 @@ async fn main() -> std::io::Result<()> { .app_data(web::Data::new(AppState { pool: pool.clone(), tables: Arc::clone(&tables), + project_cache: Arc::clone(&project_cache), })) // HEALTH .service(heartbeat) diff --git a/api/src/utils.rs b/api/src/utils.rs index 49b0a324..b9533bde 100644 --- a/api/src/utils.rs +++ b/api/src/utils.rs @@ -1,10 +1,12 @@ use actix_web::web::Query; use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc}; +use dashmap::DashMap; use serde_json::{json, Value}; +use std::sync::Arc; use tokio_postgres::{types::Type, Row}; use uuid::Uuid; -use crate::handlers::PaginationParams; +use crate::{app_state::ProjectCacheEntry, handlers::PaginationParams}; pub fn get_column_names(rows: &[Row]) -> Vec { if let Some(row) = rows.first() { @@ -91,3 +93,24 @@ impl Pagination { } } } + +// Helper function to get cached projects and return missing ones +pub fn get_cached_projects( + cache: Arc>, + project_ids: &[Uuid], +) -> (Vec>, Vec) { + let mut cached_projects = Vec::new(); + let mut missing_ids = Vec::new(); + + for &project_id in project_ids { + if let Some(entry) = cache.get(&project_id) { + if !entry.is_expired() { + cached_projects.push(entry.data.clone()); + continue; + } + } + missing_ids.push(project_id); + } + + (cached_projects, missing_ids) +} From eb20787549ba34912fe8b01cf34033e4ad2a1c59 Mon Sep 17 00:00:00 2001 From: Jacob Heider Date: Mon, 8 Sep 2025 20:56:32 -0400 Subject: [PATCH 2/2] thank you, copilot. --- api/src/handlers.rs | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/api/src/handlers.rs b/api/src/handlers.rs index 17513be4..955e254f 100644 --- a/api/src/handlers.rs +++ b/api/src/handlers.rs @@ -455,7 +455,6 @@ pub async fn get_leaderboard( JOIN package_managers pm2 ON p2.package_manager_id = pm2.id JOIN sources s ON pm2.source_id = s.id WHERE cp2.canon_id = c.id - AND cp2.canon_id = ANY($1::uuid[]) ) AS "packageManagers" FROM canons c JOIN urls u_homepage ON c.url_id = u_homepage.id @@ -487,10 +486,10 @@ pub async fn get_leaderboard( crate::app_state::ProjectCacheEntry::new(project.clone()), ); } else { - log::info!("Failed to parse project ID as UUID: {}", project_id); + log::warn!("Failed to parse project ID as UUID: {}", project_id); } } else { - log::info!("No projectId found in project: {:?}", project); + log::warn!("No projectId found in project: {:?}", project); } } @@ -524,9 +523,17 @@ fn sort_truncate_and_return(projects: Vec>, limit: i64) -> actix_web: // Sort projects by teaRank (descending) - Arc derefs to Value projects.sort_by(|a, b| { - let rank_a = a.get("teaRank").and_then(|v| v.as_str()).unwrap_or("0"); - let rank_b = b.get("teaRank").and_then(|v| v.as_str()).unwrap_or("0"); - rank_b.cmp(rank_a) + let rank_a = a + .get("teaRank") + .and_then(|v| v.as_str()) + .and_then(|s| s.parse::().ok()) + .unwrap_or(0); + let rank_b = b + .get("teaRank") + .and_then(|v| v.as_str()) + .and_then(|s| s.parse::().ok()) + .unwrap_or(0); + rank_b.cmp(&rank_a) }); // Apply limit