diff --git a/.gitignore b/.gitignore index e9eb0a6..ac57f41 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,10 @@ app/scripts/logs app/scripts/pyrouge_root app/scripts/running_logs +app/services/old + + + #etc app/etc/* @@ -58,6 +62,12 @@ share/python-wheels/ *.egg MANIFEST + +app/data/ +app/model +app/tools/ +app/services/papers_service/data + # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. diff --git a/app/runtime/api.py b/app/runtime/api.py index 355c838..884fcfa 100644 --- a/app/runtime/api.py +++ b/app/runtime/api.py @@ -32,7 +32,7 @@ class RecResponse(BaseModel): @app.get("/inference", response_model=RecResponse) def recommend( query: str = Query(..., description="검색 쿼리"), - top_k: int = Query(5, gt=1, le=10) # default 5 + top_k: int = Query(10, gt=1, le=10) # default 10 ): # 1) 쿼리 기준 top-k 클러스터 hits = search_clusters(query, top_k) diff --git a/app/runtime/requirements.txt b/app/runtime/requirements.txt index 087a22d..60b8699 100644 --- a/app/runtime/requirements.txt +++ b/app/runtime/requirements.txt @@ -4,4 +4,7 @@ torch sentence-transformers faiss-gpu networkx -tqdm \ No newline at end of file +tqdm +fastapi +uvicorn +pydantic \ No newline at end of file diff --git a/app/runtime/run_uvicorn.sh b/app/runtime/run_uvicorn.sh index bce9f44..015e7b2 100755 --- a/app/runtime/run_uvicorn.sh +++ b/app/runtime/run_uvicorn.sh @@ -1,3 +1,3 @@ #!/bin/bash -uvicorn runtime.api:app --reload --port 8004 \ No newline at end of file +uvicorn runtime.api:app --host 0.0.0.0 --port 8004 --reload diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py index 34855bb..6c46043 100644 --- a/app/services/graph_service/graph_service.py +++ b/app/services/graph_service/graph_service.py @@ -1,151 +1,146 @@ -import os -import json, hashlib -from typing import List, Dict, Optional, Tuple, Union -from fastapi import FastAPI, HTTPException -from pydantic import BaseModel -import aioredis -from data_util.logging import logger - -from collections import defaultdict -import requests -from fastapi import Query -from tree_mapping import extract_tree_mapping - -# ──────────────────────────────────────────────────────────────── -app = FastAPI(title="Graph Service with AI Inference") - -# Redis 초기화용 글로벌 -REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379") -redis: Optional[aioredis.Redis] = None - -# 요청 모델 -class GraphRequest(BaseModel): - root: str - top1: int = 5 - top2: int = 3 - -# 응답 트리 노드 구조 -class KeywordNode(BaseModel): - id: str - value: float - children: List["KeywordNode"] -KeywordNode.update_forward_refs() - -# 전체 응답 구조 -class GraphResponse(BaseModel): - keyword_tree: KeywordNode - -# Redis 연결 -@app.on_event("startup") -async def startup_event(): - global redis - # modern aioredis uses from_url - try: - redis = await aioredis.from_url( - REDIS_URL, - encoding="utf-8", - decode_responses=True, - max_connections=10 - ) - logger.info(f"✅ Connected to Redis at {REDIS_URL}") - except Exception as e: - logger.warning(f"⚠️ Redis 연결 실패, 캐시 미사용: {e}") - redis = None - -@app.on_event("shutdown") -async def shutdown_event(): - await redis.close() - -# 캐시 키 생성 함수 -def make_cache_key( root: str, top1: int, top2: int) -> str: - # 파라미터 조합으로 고유 키 생성 - key_str = f"{root}|{top1}|{top2}" - return "graph:" + hashlib.sha256(key_str.encode()).hexdigest() - - -# AI 서버 호출 함수 -def fetch_keywords(query: str) -> list[str]: - try: - response = requests.get( - "http://searchforest-ai:8004/inference", - params={"query": query, "top_k": 5} - ) - response.raise_for_status() - data = response.json() - keywords = [child["kw"] for child in data["results"]["children"]] - return keywords - except Exception as e: - print(f"[ERROR] AI 서버 호출 실패: {e}") - return [] - -# AI 서버 호출 + 결과 캐싱 -async def fetch_from_ai_and_cache(root: str, top1: int, top2: int): - try: - # response = requests.get("http://searchforest-ai:8004/inference", params={"query": root, "top_k": top1}) - response = requests.get("http://localhost:8004/inference", params={"query": root, "top_k": top1}) - - response.raise_for_status() - data = response.json() - - # 트리 구성 - keyword_tree = { - "id": root, - "value": 1.0, - "children": [] - } - kw2pids = {} - - for cluster in data["results"]["children"]: - cluster_kw = cluster["kw"] - subnodes = cluster.get("children", []) - child_node = { - "id": cluster_kw, - "value": cluster["sim"], - "children": [] - } - for sub in subnodes: - child_node["children"].append({"id": sub["kw"], "value": 0.8, "children": []}) - kw2pids[sub["kw"]] = sub["pids"] - - keyword_tree["children"].append(child_node) - - cache_key = make_cache_key(root, top1, top2) - if redis: - await redis.set(cache_key, json.dumps({"tree": keyword_tree, "kw2pids": kw2pids}), ex=3600) - - return keyword_tree, kw2pids - - except Exception as e: - print(f"[ERROR] AI 호출 실패: {e}") - raise - -# /graph 엔드포인트 -@app.post("/graph", response_model=GraphResponse) -async def build_graph(req: GraphRequest): - - cache_key = make_cache_key(req.root, req.top1, req.top2) - if redis: - cached = await redis.get(cache_key) - if cached: - obj = json.loads(cached) - return {"keyword_tree": obj["tree"], "kw2pids": obj["kw2pids"]} - - tree = await fetch_from_ai_and_cache(req.root, req.top1, req.top2) - - root, mapping = extract_tree_mapping(original_json) - tree = manual_tree_with_full_values(root, mapping) - tree_parsed = manual_tree_with_full_values(tree) - - return {"keyword_tree": tree_parsed, "kw2pids": kw2pids} - - -# /kw2pids 엔드포인트 (핑퐁용) -@app.get("/kw2pids") -async def get_kw2pids(query: str = Query(...), top1: int = 5, top2: int = 3): - cache_key = make_cache_key(query, top1, top2) - if redis: - cached = await redis.get(cache_key) - if cached: - obj = json.loads(cached) - return obj["kw2pids"] - return {"message": "No cached kw2pids available."} +import os +import json, hashlib +from typing import List, Dict, Optional, Tuple, Union +from fastapi import FastAPI, HTTPException, Query +from pydantic import BaseModel +import aioredis +import requests +from tree_mapping import extract_tree_mapping + +# ──────────────────────────────────────────────────────────────── +app = FastAPI(title="Graph Service with AI Inference") + +# Redis 초기화용 글로벌 +REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379") +redis: Optional[aioredis.Redis] = None + +# 요청 모델 +class GraphRequest(BaseModel): + root: str + top1: int = 5 + top2: int = 3 + +# 응답 트리 노드 구조 +class KeywordNode(BaseModel): + id: str + value: float + children: List["KeywordNode"] +KeywordNode.update_forward_refs() + +# 전체 응답 구조 +class GraphResponse(BaseModel): + keyword_tree: KeywordNode + +# Redis 연결 +@app.on_event("startup") +async def startup_event(): + global redis + # modern aioredis uses from_url + try: + redis = await aioredis.from_url( + REDIS_URL, + encoding="utf-8", + decode_responses=True, + max_connections=10 + ) + print(f"✅ Connected to Redis at {REDIS_URL}") + except Exception as e: + print(f"⚠️ Redis 연결 실패, 캐시 미사용: {e}") + redis = None + +@app.on_event("shutdown") +async def shutdown_event(): + await redis.close() + +# 캐시 키 생성 함수 +def make_cache_key( root: str, top1: int, top2: int) -> str: + # 파라미터 조합으로 고유 키 생성 + key_str = f"{root}|{top1}|{top2}" + return "graph:" + hashlib.sha256(key_str.encode()).hexdigest() + + +# AI 서버 호출 함수 +def fetch_keywords(query: str) -> list[str]: + try: + response = requests.get( + "https://2f7a-165-194-104-91.ngrok-free.app/inference" + params={"query": query, "top_k": 5} + ) + response.raise_for_status() + data = response.json() + keywords = [child["kw"] for child in data["results"]["children"]] + return keywords + except Exception as e: + print(f"[ERROR] AI 서버 호출 실패: {e}") + return [] + +# AI 서버 호출 + 결과 캐싱 +async def fetch_from_ai_and_cache(root: str, top1: int, top2: int): + try: + #response = requests.get("http://searchforest-ai:8004/inference", params={"query": root, "top_k": top1}) + response = requests.get("https://2f7a-165-194-104-91.ngrok-free.app/inference", params={"query": root, "top_k": top1}) + + # response = requests.get("http://localhost:8004/inference", params={"query": root, "top_k": top1}) + + response.raise_for_status() + data = response.json() + + tree_data = data["results"]["children"] + + # 👉 트리 포맷 맞춰 변환 + mapping = {} + for node in tree_data: + lvl1_kw = node["id"] + mapping[lvl1_kw] = { + "value": node.get("sim", 0.8), + "children": node.get("children", []) + } + + keyword_tree = manual_tree_with_full_values(root, mapping) + + # pids 추출 + kw2pids = {} + for node in tree_data: + for child in node["children"]: + kw2pids[child["id"]] = child["pids"] + + cache_key = make_cache_key(root, top1, top2) + if redis: + await redis.set(cache_key, json.dumps({"tree": keyword_tree, "kw2pids": kw2pids}), ex=3600) + + return keyword_tree, kw2pids + + except Exception as e: + print(f"[ERROR] AI 호출 실패: {e}") + raise + +# /graph 엔드포인트 +@app.post("/graph", response_model=GraphResponse) +async def build_graph(req: GraphRequest): + + cache_key = make_cache_key(req.root, req.top1, req.top2) + if redis: + cached = await redis.get(cache_key) + if cached: + obj = json.loads(cached) + return {"keyword_tree": obj["tree"], "kw2pids": obj["kw2pids"]} + + tree = await fetch_from_ai_and_cache(req.root, req.top1, req.top2) + + root, mapping = extract_tree_mapping(original_json) + tree = manual_tree_with_full_values(root, mapping) + tree_parsed = manual_tree_with_full_values(tree) + + return {"keyword_tree": tree_parsed, "kw2pids": kw2pids} + + +# /kw2pids 엔드포인트 (핑퐁용) +@app.get("/kw2pids") +async def get_kw2pids(query: str = Query(...), top1: int = 5, top2: int = 3): + cache_key = make_cache_key(query, top1, top2) + if redis: + cached = await redis.get(cache_key) + if cached: + obj = json.loads(cached) + return obj["kw2pids"] + return {"message": "No cached kw2pids available."} diff --git a/app/services/graph_service/requirements.txt b/app/services/graph_service/requirements.txt index d5a0f17..0b17cf8 100644 --- a/app/services/graph_service/requirements.txt +++ b/app/services/graph_service/requirements.txt @@ -1,4 +1,5 @@ fastapi uvicorn[standard] pydantic -aioredis \ No newline at end of file +aioredis +requests \ No newline at end of file diff --git a/app/services/papers_service/papers_service.py b/app/services/papers_service/papers_service.py index 1f02272..f732cab 100644 --- a/app/services/papers_service/papers_service.py +++ b/app/services/papers_service/papers_service.py @@ -1,146 +1,205 @@ -# papers_service/main.py -import random - -from fastapi import FastAPI, HTTPException, Query -from pydantic import BaseModel -from typing import List, Optional -import json -import os - -app = FastAPI(title="Papers Service") - - -class Author(BaseModel): - name: str - - -class Citation(BaseModel): - paperId: str - title: Optional[str] - year: Optional[int] - - -class Reference(BaseModel): - paperId: str - title: Optional[str] - year: Optional[int] - - -# --- 논문 객체 정의 --- -class Paper(BaseModel): - paper_id: str - abstract: Optional[str] - title: Optional[str] - - url: Optional[str] - venue: Optional[str] - year: Optional[int] - - reference_count: Optional[int] - citation_count: Optional[int] - influentialCitationCount: Optional[int] - - fieldsOfStudy: Optional[List[str]] - tldr: Optional[str] - authors: List[Author] - - sim_score: float - - -# --- 응답 모델 --- -class PapersResponse(BaseModel): - total_results: int - max_display: int - page: int - page_size: int - papers: List[Paper] - - - -def make_cache_key(root, top1, top2): - key_str = f"{root}|{top1}|{top2}" - return "keyword_tree:graph:" + hashlib.sha256(key_str.encode()).hexdigest() - - -def fetch_keyword_tree_from_graph_service(query: str) -> dict: - response = requests.get("http://graph-service:8002/keyword_tree", params={"query": query}) - return response.json() - -@app.get("/papers", response_model=PapersResponse) -def get_papers_by_keyword( - kw: str = Query(..., description="클릭한 키워드"), - page: int = Query(1, ge=1), - page_size: int = Query(20, ge=1, le=100) -): - - response = requests.post("http://graph-service:8002/graph", json={ - "root": kw, - "top1": 10, - "top2": 3 - }) - - - keyword_tree = response.json().get("keyword_tree") - - response = requests.get("http://graph-service:8002/keyword_tree", params={"query": "AI"}) - kw2pids = response.json() - - - - keyword_tree_json = redis.get(f"keyword_tree:{cache_key}") - keyword_tree = json.loads(keyword_tree_json) - - key = make_cache_key("pid123", "AI", 5, 3) - cached = redis.get(key) - if cached: - kw2pids = json.loads(cached) - - - if kw not in kw2pids: - raise HTTPException(status_code=404, detail=f"Keyword '{kw}' not found.") - else: - all_pids = kw2pids[kw] - - # 페이징 - total = len(all_pids) - start = (page - 1) * page_size - end = min(start + page_size, total) - sliced = all_pids[start:end] - - papers = [] - for pid in sliced: - if pid not in paper_db: - continue - entry = paper_db[pid] - papers.append(Paper( - paper_id=pid, - title=entry.get("title"), - abstract=entry.get("abstract"), - url=entry.get("url"), - venue=entry.get("venue"), - year=entry.get("year"), - reference_count=entry.get("referenceCount"), - citation_count=entry.get("citationCount"), - influentialCitationCount=entry.get("influentialCitationCount"), - fieldsOfStudy=entry.get("fieldsOfStudy"), - tldr=entry.get("tldr", {}).get("text") if entry.get("tldr") else None, - authors=[Author(name=a["name"]) for a in entry.get("authors", [])], - sim_score=random.uniform(0, 1) # Stub score - )) - - return PapersResponse( - total_results=total, - max_display=len(sliced), - page=page, - page_size=page_size, - papers=papers - ) - -@app.get("/keyword_tree") -async def get_keyword_tree(query: str = Query(...)): - cache_key = f"kw2pids:{query}" - if redis: - cached = await redis.get(cache_key) - if cached: - return json.loads(cached) - return {"message": "No cached keyword->pids mapping found."} +# papers_service/main.py +import random + +from fastapi import FastAPI, HTTPException, Query +from pydantic import BaseModel +from typing import List, Optional +import json +import os + +app = FastAPI(title="Papers Service (Stub)") + + +class Author(BaseModel): + name: str + + +class Citation(BaseModel): + paperId: str + title: Optional[str] + year: Optional[int] + + +class Reference(BaseModel): + paperId: str + title: Optional[str] + year: Optional[int] + + +# --- 논문 객체 정의 --- +class Paper(BaseModel): + paper_id: str + abstract: Optional[str] + title: Optional[str] + + url: Optional[str] + venue: Optional[str] + year: Optional[int] + + reference_count: Optional[int] + citation_count: Optional[int] + influentialCitationCount: Optional[int] + + fieldsOfStudy: Optional[List[str]] + tldr: Optional[str] + authors: List[Author] + + sim_score: float + + +# --- 응답 모델 --- +class PapersResponse(BaseModel): + total_results: int + max_display: int + page: int + page_size: int + papers: List[Paper] + +# 1) 전역 로딩 +BASE_DIR = os.path.join(os.path.dirname(__file__), "data") +PAPER_DATA_PATH = os.path.join(BASE_DIR, "inductive_test_checkpoint_collected.json") +GRAPH_KEYWORD_PATH = os.path.join(BASE_DIR, "kw2pids.json") +with open(PAPER_DATA_PATH, "r", encoding="utf-8") as f: + paper_db = json.load(f) + +# 예시: 이미 생성한 키워드 → 논문 ID 매핑 +with open(GRAPH_KEYWORD_PATH, "r", encoding="utf-8") as f: + kw2pids = json.load(f) + + +@app.get("/papers", response_model=PapersResponse) +def get_random_papers( + kw: str = Query(..., description="검색할 키워드"), + page: int = Query(1, ge=1), + page_size: int = Query(20, ge=1, le=100) +): + all_pids = list(paper_db.keys()) + sample_size = random.randint(20, 40) + sampled_pids = random.sample(all_pids, min(sample_size, len(all_pids))) + + total = len(sampled_pids) + start = (page - 1) * page_size + end = min(start + page_size, total) + sliced = sampled_pids[start:end] + + papers = [] + for pid in sliced: + entry = paper_db[pid] + papers.append(Paper( + paper_id=pid, + title=entry.get("title"), + abstract=entry.get("abstract"), + url=entry.get("url"), + venue=entry.get("venue"), + year=entry.get("year"), + reference_count=entry.get("referenceCount"), + citation_count=entry.get("citationCount"), + influentialCitationCount=entry.get("influentialCitationCount"), + fieldsOfStudy=entry.get("fieldsOfStudy"), + tldr=entry.get("tldr", {}).get("text") if entry.get("tldr") else None, + authors=[Author(name=a["name"]) for a in entry.get("authors", [])], + sim_score=random.uniform(0, 1) + )) + + return PapersResponse( + total_results=total, + max_display=len(sliced), + page=page, + page_size=page_size, + papers=papers + ) + + +def get_papers_by_keyword( + kw: str = Query(..., description="검색할 키워드"), + page: int = Query(1, ge=1), + page_size: int = Query(20, ge=1, le=100) +): + if kw not in kw2pids: + print(f"Keyword '{kw}' not found.") + all_pids = [ + "40108038", + "59572248", + "5799960", + "14188576", + "119242784" + ] + # raise HTTPException(status_code=404, detail=f"Keyword '{kw}' not found.") + else: + deduplicated_ids = [ + "13074624", + "14188576", + "14516333", + "14909482", + "15302646", + "162168808", + "198147940", + "28639198", + "40108038", + "41418788", + "51183683", + "52232173", + "53641451", + "55836730", + "56099032", + "5734610", + "5799960", + "59408549", + "59572248", + "786330", + "85459157", + "10682321", + "11501607", + "115113968", + "11534505", + "117899249", + "118489086", + "118587315", + "118751294", + "118816857", + "118849608", + "119111722", + "119144587", + "119209851", + "119241784", + "119341051", + "119471991", + "119472164" + ] + # all_pids = kw2pids[kw] + all_pids = random.sample(deduplicated_ids, 20) + + # 페이징 + total = len(all_pids) + start = (page - 1) * page_size + end = min(start + page_size, total) + sliced = all_pids[start:end] + + papers = [] + for pid in sliced: + if pid not in paper_db: + continue + entry = paper_db[pid] + papers.append(Paper( + paper_id=pid, + title=entry.get("title"), + abstract=entry.get("abstract"), + url=entry.get("url"), + venue=entry.get("venue"), + year=entry.get("year"), + reference_count=entry.get("referenceCount"), + citation_count=entry.get("citationCount"), + influentialCitationCount=entry.get("influentialCitationCount"), + fieldsOfStudy=entry.get("fieldsOfStudy"), + tldr=entry.get("tldr", {}).get("text") if entry.get("tldr") else None, + authors=[Author(name=a["name"]) for a in entry.get("authors", [])], + sim_score=random.uniform(0, 1) # Stub score + )) + + return PapersResponse( + total_results=total, + max_display=len(sliced), + page=page, + page_size=page_size, + papers=papers + ) diff --git a/app/services/requirements.txt b/app/services/requirements.txt index c9b6004..81a4930 100644 --- a/app/services/requirements.txt +++ b/app/services/requirements.txt @@ -1,3 +1,3 @@ fastapi uvicorn -pydantic +pydantic \ No newline at end of file