From 728cd19ef06d39952708cf930d8fa945085ece92 Mon Sep 17 00:00:00 2001 From: Rishabh Agarwal Date: Sat, 4 Oct 2025 16:37:51 -0500 Subject: [PATCH] use the output from autor.py to create a JSON- or Plotly-based co-author network --- app/routers/visualize.py | 99 ++++++++++++++++++++++++++++++---------- 1 file changed, 75 insertions(+), 24 deletions(-) diff --git a/app/routers/visualize.py b/app/routers/visualize.py index f3366dbd..7fdfa6b4 100644 --- a/app/routers/visualize.py +++ b/app/routers/visualize.py @@ -1,44 +1,95 @@ # app/routers/visualize.py from fastapi import APIRouter, Query, HTTPException -from collections import Counter -import re from app.storage import db router = APIRouter() -@router.get("/concept") -def visualize_section( - paper_id: int = Query(..., description="Paper ID to visualize"), - section: str = Query("abstract", description="Section name to visualize") -): +# --------------------------- +# Endpoint: /visualize/author_graph +# --------------------------- +@router.get("/author_graph") +def visualize_author_graph(limit: int = Query(25, description="Limit number of authors in graph")): """ - Very basic visualization: return top keywords from a section of a paper. - Later we can plug into Plotly to render diagrams/charts. + Generate a co-author collaboration graph as JSON. + Can be visualized later in UI with Plotly or D3.js. """ try: - # Fetch chunks for the section - sql = """ - SELECT content - FROM paper_chunks - WHERE paper_id = %s AND section ILIKE %s; - """ with db.get_conn() as conn: with conn.cursor() as cur: - cur.execute(sql, (paper_id, section)) - texts = [r[0] for r in cur.fetchall()] + # Pull co-author pairs + cur.execute( + """ + SELECT a1.name, a2.name + FROM paper_authors pa1 + JOIN paper_authors pa2 ON pa1.paper_id = pa2.paper_id + JOIN authors a1 ON a1.id = pa1.author_id + JOIN authors a2 ON a2.id = pa2.author_id + WHERE a1.id < a2.id + LIMIT %s; + """, + (limit,), + ) + pairs = cur.fetchall() - if not texts: - return {"paper_id": paper_id, "section": section, "keywords": []} + # Build graph + nodes = {} + edges = [] + for a1, a2 in pairs: + if a1 not in nodes: + nodes[a1] = {"id": a1} + if a2 not in nodes: + nodes[a2] = {"id": a2} + edges.append({"source": a1, "target": a2}) - text = " ".join(texts) - words = re.findall(r"\b[a-zA-Z]{4,}\b", text.lower()) # only words ≥4 chars - freq = Counter(words).most_common(15) + return { + "type": "coauthor_graph", + "node_count": len(nodes), + "edge_count": len(edges), + "nodes": list(nodes.values()), + "edges": edges, + } + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +# --------------------------- +# Endpoint: /visualize/paper_structure +# --------------------------- +@router.get("/paper_structure") +def visualize_paper_structure(paper_id: int): + """ + Visualize the structure of a paper by its sections and chunk counts. + Returns JSON that can be rendered as a bar or pie chart later. + """ + try: + with db.get_conn() as conn: + with conn.cursor() as cur: + cur.execute( + """ + SELECT section, COUNT(*) AS chunks + FROM paper_chunks + WHERE paper_id = %s + GROUP BY section; + """, + (paper_id,), + ) + data = cur.fetchall() + + if not data: + raise HTTPException(status_code=404, detail="Paper not found or not chunked yet") + + sections = [{"section": d[0], "chunks": d[1]} for d in data] return { + "type": "paper_structure", "paper_id": paper_id, - "section": section, - "keywords": [{"word": w, "count": c} for w, c in freq], + "sections": sections, + "section_count": len(sections), + "total_chunks": sum(s["chunks"] for s in sections), } + except HTTPException: + raise except Exception as e: raise HTTPException(status_code=500, detail=str(e))