From 728cd19ef06d39952708cf930d8fa945085ece92 Mon Sep 17 00:00:00 2001
From: Rishabh Agarwal <aggarw266@umn.edu>
Date: Sat, 4 Oct 2025 16:37:51 -0500
Subject: [PATCH] use the output from autor.py to create a JSON- or
 Plotly-based co-author network

---
 app/routers/visualize.py | 99 ++++++++++++++++++++++++++++++----------
 1 file changed, 75 insertions(+), 24 deletions(-)

diff --git a/app/routers/visualize.py b/app/routers/visualize.py
index f3366dbd..7fdfa6b4 100644
--- a/app/routers/visualize.py
+++ b/app/routers/visualize.py
@@ -1,44 +1,95 @@
 # app/routers/visualize.py
 from fastapi import APIRouter, Query, HTTPException
-from collections import Counter
-import re
 from app.storage import db
 
 router = APIRouter()
 
-@router.get("/concept")
-def visualize_section(
-    paper_id: int = Query(..., description="Paper ID to visualize"),
-    section: str = Query("abstract", description="Section name to visualize")
-):
+# ---------------------------
+# Endpoint: /visualize/author_graph
+# ---------------------------
+@router.get("/author_graph")
+def visualize_author_graph(limit: int = Query(25, description="Limit number of authors in graph")):
     """
-    Very basic visualization: return top keywords from a section of a paper.
-    Later we can plug into Plotly to render diagrams/charts.
+    Generate a co-author collaboration graph as JSON.
+    Can be visualized later in UI with Plotly or D3.js.
     """
     try:
-        # Fetch chunks for the section
-        sql = """
-        SELECT content
-        FROM paper_chunks
-        WHERE paper_id = %s AND section ILIKE %s;
-        """
         with db.get_conn() as conn:
             with conn.cursor() as cur:
-                cur.execute(sql, (paper_id, section))
-                texts = [r[0] for r in cur.fetchall()]
+                # Pull co-author pairs
+                cur.execute(
+                    """
+                    SELECT a1.name, a2.name
+                    FROM paper_authors pa1
+                    JOIN paper_authors pa2 ON pa1.paper_id = pa2.paper_id
+                    JOIN authors a1 ON a1.id = pa1.author_id
+                    JOIN authors a2 ON a2.id = pa2.author_id
+                    WHERE a1.id < a2.id
+                    LIMIT %s;
+                    """,
+                    (limit,),
+                )
+                pairs = cur.fetchall()
 
-        if not texts:
-            return {"paper_id": paper_id, "section": section, "keywords": []}
+        # Build graph
+        nodes = {}
+        edges = []
+        for a1, a2 in pairs:
+            if a1 not in nodes:
+                nodes[a1] = {"id": a1}
+            if a2 not in nodes:
+                nodes[a2] = {"id": a2}
+            edges.append({"source": a1, "target": a2})
 
-        text = " ".join(texts)
-        words = re.findall(r"\b[a-zA-Z]{4,}\b", text.lower())  # only words ≥4 chars
-        freq = Counter(words).most_common(15)
+        return {
+            "type": "coauthor_graph",
+            "node_count": len(nodes),
+            "edge_count": len(edges),
+            "nodes": list(nodes.values()),
+            "edges": edges,
+        }
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ---------------------------
+# Endpoint: /visualize/paper_structure
+# ---------------------------
+@router.get("/paper_structure")
+def visualize_paper_structure(paper_id: int):
+    """
+    Visualize the structure of a paper by its sections and chunk counts.
+    Returns JSON that can be rendered as a bar or pie chart later.
+    """
+    try:
+        with db.get_conn() as conn:
+            with conn.cursor() as cur:
+                cur.execute(
+                    """
+                    SELECT section, COUNT(*) AS chunks
+                    FROM paper_chunks
+                    WHERE paper_id = %s
+                    GROUP BY section;
+                    """,
+                    (paper_id,),
+                )
+                data = cur.fetchall()
+
+        if not data:
+            raise HTTPException(status_code=404, detail="Paper not found or not chunked yet")
+
+        sections = [{"section": d[0], "chunks": d[1]} for d in data]
 
         return {
+            "type": "paper_structure",
             "paper_id": paper_id,
-            "section": section,
-            "keywords": [{"word": w, "count": c} for w, c in freq],
+            "sections": sections,
+            "section_count": len(sections),
+            "total_chunks": sum(s["chunks"] for s in sections),
         }
 
+    except HTTPException:
+        raise
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))