diff --git a/.gitignore b/.gitignore index e9eb0a6..f12775e 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ app/model/model_save/* #data app/services/graph_service/data/indices/paper_ivf.index app/services/graph_service/data/indices +app/services/papers_service/data app/indices app/routers/previous/output/summaries_dir @@ -25,11 +26,17 @@ app/data app/routers/output/summaries_dir/* + + #logs app/scripts/logs app/scripts/pyrouge_root app/scripts/running_logs +app/services/old + + + #etc app/etc/* @@ -58,6 +65,11 @@ share/python-wheels/ *.egg MANIFEST + +app/data/ +app/model +app/tools/ + # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. @@ -206,3 +218,4 @@ cython_debug/ .pypirc app/data/inductive/train.jsonl app/routers/output/summaries_dir/ +app/services/papers_service/data/paper_db.json diff --git a/app/services/papers_service/data/__init__.py b/app/NoneType similarity index 100% rename from app/services/papers_service/data/__init__.py rename to app/NoneType diff --git a/app/runtime/Dockerfile b/app/runtime/Dockerfile index f1c9f86..da9a191 100644 --- a/app/runtime/Dockerfile +++ b/app/runtime/Dockerfile @@ -5,21 +5,23 @@ ENV DEBIAN_FRONTEND=noninteractive \ PYTHONUNBUFFERED=1 \ NVIDIA_VISIBLE_DEVICES=all +WORKDIR /app + # 필수 패키지 RUN apt-get update && apt-get install -y \ python3-pip python3-dev git && \ rm -rf /var/lib/apt/lists/* -# 프로젝트 소스 복사 -WORKDIR /workspace -COPY .. /workspace +# 1) requirements만 먼저 복사 → 레이어 캐시 +COPY runtime/requirements.txt ./requirements.txt +RUN pip install --no-cache-dir --upgrade pip \ + && pip install --no-cache-dir -r requirements.txt -# 의존성(인퍼런스 전용) -RUN pip3 install --upgrade pip \ - && pip3 install -r app/runtime/requirements.txt +# 2) 실제 런타임 코드 복사 +COPY runtime/ ./runtime/ # 지표·인덱스(volume 으로 붙여도 OK) # COPY indices/ /workspace/indices EXPOSE 8004 -CMD ["uvicorn", "app.runtime.api:app", "--host", "0.0.0.0", "--port", "8004"] \ No newline at end of file +CMD ["uvicorn", "runtime.api:app", "--host", "0.0.0.0", "--port", "8004"] diff --git a/app/services/papers_service/data/ids.json b/app/runtime/__init__.py similarity index 100% rename from app/services/papers_service/data/ids.json rename to app/runtime/__init__.py diff --git a/app/runtime/api.py b/app/runtime/api.py index 355c838..47b7cd1 100644 --- a/app/runtime/api.py +++ b/app/runtime/api.py @@ -6,6 +6,7 @@ from runtime.cluster_searcher import search_clusters, cluster2pids, meta from runtime.graph_builder import build_tree + app = FastAPI(title="SearchForest-AI Recommend API") @@ -32,7 +33,7 @@ class RecResponse(BaseModel): @app.get("/inference", response_model=RecResponse) def recommend( query: str = Query(..., description="검색 쿼리"), - top_k: int = Query(5, gt=1, le=10) # default 5 + top_k: int = Query(10, gt=1, le=10) # default 10 ): # 1) 쿼리 기준 top-k 클러스터 hits = search_clusters(query, top_k) @@ -46,11 +47,6 @@ def recommend( root["children"].append(cluster_node) return {"results": root } - - - - - # 로컬 실행용 if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8004) diff --git a/app/runtime/graph_builder.py b/app/runtime/graph_builder.py index d6bc7df..ec7e99e 100644 --- a/app/runtime/graph_builder.py +++ b/app/runtime/graph_builder.py @@ -9,7 +9,7 @@ import torch from runtime.cluster_searcher import meta, cluster2pids # ← meta 와 함께 추가로 import - +kw2pids: dict[str, list[str]] = {} # keyword → [paper_ids] # ── 전역 설정 ─────────────────────────────────────────── @@ -121,6 +121,19 @@ def contains_kw(abs_txt: str, kw: str) -> bool: max_features=40_000, ) +# from sklearn.feature_extraction.text import TfidfVectorizer + +# # 전처리한 phrase 문서를 그대로 feeding +# vectorizer = TfidfVectorizer( +# tokenizer=lambda s: s, preprocessor=lambda s: s, lowercase=False, +# ngram_range=(1,3), min_df=5, max_df=0.8 +# ) +# tfidf_mat = vectorizer.fit_transform(docs) # shape (N_docs, N_terms) +# idf = vectorizer.idf_ +# vocab = vectorizer.vocabulary_ # dict{phrase: idx} + + + def top_keywords(pids, n=8): docs = [_as_text(G.nodes[p].get("abstract", "")).lower() for p in pids if G.has_node(p)] @@ -156,7 +169,17 @@ def build_tree(root_kw: str, cid: int, depth: int = 1): )) pids_lvl0 = cluster2pids[cid] - tree = {"id": root_kw, "value": 1.0, "children": []} + + # ① root 에도 pids 부여 + tree = { + "id": root_kw, + "value": 1.0, + "pids": pids_lvl0, # ★ 추가 + "children": [] + } + + # kw2pids 전역 캐시에도 root 등록 + kw2pids[root_kw] = pids_lvl0 # ── depth-1 (최대 3개) ────────────────────── for kw1, sc1 in select_kw_scored(root_kw, cand, tfidf_dict, k=3): @@ -172,16 +195,36 @@ def build_tree(root_kw: str, cid: int, depth: int = 1): "id": kw1, "value": round(sc1, 4), "pids": hop1, # 필요 없으면 제거 + "children": [] } + kw2pids[kw1] = hop1 + # ── depth-2 : parent=kw1, 최대 3개 ─────── if depth > 1: for kw2, sc2 in select_kw_scored(kw1, cand, tfidf_dict, k=3): + # node1["children"].append({ + # "id": kw2, + # "value": round(sc2, 4), + # }) + # hop-2 pids (root → kw1 → kw2) + kw2_emb = model.encode([kw2], + normalize_embeddings=True)[0] + hop2 = [ + p for p in hop1 + if (emb := get_abs_emb(p)) is not None + and util.cos_sim(kw2_emb, emb).item() > COS_TH2 + ] + node1["children"].append({ "id": kw2, "value": round(sc2, 4), + "pids": hop2, }) + # kw2pids에 2-depth 저장 + kw2pids[kw2] = hop2 + tree["children"].append(node1) return tree diff --git a/app/runtime/requirements.txt b/app/runtime/requirements.txt index 087a22d..4be3dea 100644 --- a/app/runtime/requirements.txt +++ b/app/runtime/requirements.txt @@ -4,4 +4,8 @@ torch sentence-transformers faiss-gpu networkx -tqdm \ No newline at end of file +tqdm +fastapi +uvicorn +pydantic +spacy \ No newline at end of file diff --git a/app/runtime/run_uvicorn.sh b/app/runtime/run_uvicorn.sh index bce9f44..015e7b2 100755 --- a/app/runtime/run_uvicorn.sh +++ b/app/runtime/run_uvicorn.sh @@ -1,3 +1,3 @@ #!/bin/bash -uvicorn runtime.api:app --reload --port 8004 \ No newline at end of file +uvicorn runtime.api:app --host 0.0.0.0 --port 8004 --reload diff --git a/app/services/get_meta.py b/app/services/get_meta.py new file mode 100644 index 0000000..5c8f2e1 --- /dev/null +++ b/app/services/get_meta.py @@ -0,0 +1,17 @@ +import aiohttp, asyncio + +S2_URL = "https://api.semanticscholar.org/graph/v1/paper/{}" +FIELDS = "title,abstract,year,venue,referenceCount,citationCount," \ + "influentialCitationCount,fieldsOfStudy,authors,tldr" + +async def fetch_paper(session, pid): + url = S2_URL.format(pid) + params = {"fields": FIELDS} + async with session.get(url, params=params, timeout=10) as r: + r.raise_for_status() + return await r.json() + +async def fetch_many(pids): + async with aiohttp.ClientSession() as session: + tasks = [fetch_paper(session, pid) for pid in pids] + return await asyncio.gather(*tasks) \ No newline at end of file diff --git a/app/services/graph_service/Dockerfile b/app/services/graph_service/Dockerfile index 6cf9703..4ebe574 100644 --- a/app/services/graph_service/Dockerfile +++ b/app/services/graph_service/Dockerfile @@ -2,10 +2,9 @@ FROM python:3.9-slim WORKDIR /app -COPY requirements.txt . +COPY . . RUN pip install --no-cache-dir -r requirements.txt -COPY . . # FastAPI Uvicorn 실행 diff --git a/app/services/graph_service/dummy_data.py b/app/services/graph_service/dummy_data.py deleted file mode 100644 index 478db51..0000000 --- a/app/services/graph_service/dummy_data.py +++ /dev/null @@ -1,115 +0,0 @@ -def get_dummy_tree(root: str, top1: int = 5, top2: int = 3): - """ - root 키워드 + top1, top2 파라미터를 반영한 더미 radial-tree 반환 - depth1: top1 개수, depth2: top2 개수로 구성됩니다. - """ - children = [] - - # depth1: top1개의 자식 노드 생성 - for i in range(top1): - # 라벨은 A, B, C, ... 또는 번호로 생성 - label = chr(ord('A') + i) if i < 26 else str(i+1) - # base_sim: 0.9에서 일정 간격으로 감소 - base_sim = round(1.0 - (i + 1) * (0.1), 4) - - # depth2: 각 depth1 노드에 top2 개수만큼 자식 생성 - grandchildren = [] - for j in range(top2): - child_label = f"{root}-{label}-{j+1}" - # sim value: base_sim에서 0.05씩 감소 - child_sim = round(base_sim - (j + 1) * 0.05, 4) - grandchildren.append({ - "id": child_label, - "value": child_sim, - "children": [] - }) - - children.append({ - "id": f"{root}-{label}", - "value": base_sim, - "children": grandchildren - }) - - # 루트 노드 반환 - return { - "id": root, - "value": 1.0, - "children": children - } - -# --- 기본 더미 트리 생성기 (context 포함) --- -def get_dummy_tree_with_context(root: str, top1: int = 5, top2: int = 3): - children = [] - for i in range(top1): - label = chr(ord('A') + i) if i < 26 else str(i+1) - base_sim = round(1.0 - (i + 1) * 0.1, 4) - grandchildren = [] - for j in range(top2): - child_label = f"{label}-{j+1}" - full_context = f"{root}-{label}-{j+1}" - child_sim = round(base_sim - (j + 1) * 0.05, 4) - grandchildren.append({ - "id": child_label, - "context": full_context, - "value": child_sim, - "children": [] - }) - children.append({ - "id": label, - "context": f"{root}-{label}", - "value": base_sim, - "children": grandchildren - }) - return { - "id": root, - "context": root, - "value": 1.0, - "children": children - } - -def get_dummy_tree_with_context_and_example(root: str, top1: int = 5, top2: int = 3): - """ - root 키워드를 중심으로 top1개의 1-depth와 각 1-depth마다 top2개의 2-depth를 생성한 더미 트리를 반환. - 각 노드에는 context, value, example 필드가 포함됨. - """ - children = [] - - for i in range(top1): - label = chr(ord('A') + i) if i < 26 else str(i + 1) - base_sim = round(1.0 - (i + 1) * 0.1, 4) - lvl1_id = label - lvl1_context = f"{root}-{label}" - lvl1_example = f"{root} 분야의 하위 주제 {label}에 대한 간단한 설명입니다." - - grandchildren = [] - for j in range(top2): - lvl2_id = f"{label}-{j + 1}" - lvl2_context = f"{root}-{label}-{j + 1}" - lvl2_value = round(base_sim - (j + 1) * 0.05, 4) - lvl2_example = f"{label} 세부 주제 {j + 1}에 대한 예시 설명입니다." - - grandchildren.append({ - "id": lvl2_id, - "context": lvl2_context, - "value": lvl2_value, - "example": lvl2_example, - "children": [] - }) - - children.append({ - "id": lvl1_id, - "context": lvl1_context, - "value": base_sim, - "example": lvl1_example, - "children": grandchildren - }) - - return { - "id": root, - "context": root, - "value": 1.0, - "example": f"{root}라는 주제를 중심으로 확장된 키워드 구조입니다.", - "children": children - } - - diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py index 0f19ede..63eb2b4 100644 --- a/app/services/graph_service/graph_service.py +++ b/app/services/graph_service/graph_service.py @@ -1,152 +1,113 @@ -import os -import json, hashlib -from typing import List, Dict, Optional, Tuple, Union -from fastapi import FastAPI, HTTPException -from pydantic import BaseModel -import aioredis -from data_util.logging import logger - -from data_util.config import Config -from collections import defaultdict -import requests -from fastapi import Query -from tree_mapping import extract_tree_mapping - -# ──────────────────────────────────────────────────────────────── -app = FastAPI(title="Graph Service with AI Inference") - -# Redis 초기화용 글로벌 -REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379") -redis: Optional[aioredis.Redis] = None - -# 요청 모델 -class GraphRequest(BaseModel): - root: str - top1: int = 5 - top2: int = 3 - -# 응답 트리 노드 구조 -class KeywordNode(BaseModel): - id: str - value: float - children: List["KeywordNode"] -KeywordNode.update_forward_refs() - -# 전체 응답 구조 -class GraphResponse(BaseModel): - keyword_tree: KeywordNode - -# Redis 연결 -@app.on_event("startup") -async def startup_event(): - global redis - # modern aioredis uses from_url - try: - redis = await aioredis.from_url( - REDIS_URL, - encoding="utf-8", - decode_responses=True, - max_connections=10 - ) - logger.info(f"✅ Connected to Redis at {REDIS_URL}") - except Exception as e: - logger.warning(f"⚠️ Redis 연결 실패, 캐시 미사용: {e}") - redis = None - -@app.on_event("shutdown") -async def shutdown_event(): - await redis.close() - -# 캐시 키 생성 함수 -def make_cache_key( root: str, top1: int, top2: int) -> str: - # 파라미터 조합으로 고유 키 생성 - key_str = f"{root}|{top1}|{top2}" - return "graph:" + hashlib.sha256(key_str.encode()).hexdigest() - - -# AI 서버 호출 함수 -def fetch_keywords(query: str) -> list[str]: - try: - response = requests.get( - "http://searchforest-ai:8004/inference", - params={"query": query, "top_k": 5} - ) - response.raise_for_status() - data = response.json() - keywords = [child["kw"] for child in data["results"]["children"]] - return keywords - except Exception as e: - print(f"[ERROR] AI 서버 호출 실패: {e}") - return [] - -# AI 서버 호출 + 결과 캐싱 -async def fetch_from_ai_and_cache(root: str, top1: int, top2: int): - try: - # response = requests.get("http://searchforest-ai:8004/inference", params={"query": root, "top_k": top1}) - response = requests.get("http://localhost:8004/inference", params={"query": root, "top_k": top1}) - - response.raise_for_status() - data = response.json() - - # 트리 구성 - keyword_tree = { - "id": root, - "value": 1.0, - "children": [] - } - kw2pids = {} - - for cluster in data["results"]["children"]: - cluster_kw = cluster["kw"] - subnodes = cluster.get("children", []) - child_node = { - "id": cluster_kw, - "value": cluster["sim"], - "children": [] - } - for sub in subnodes: - child_node["children"].append({"id": sub["kw"], "value": 0.8, "children": []}) - kw2pids[sub["kw"]] = sub["pids"] - - keyword_tree["children"].append(child_node) - - cache_key = make_cache_key(root, top1, top2) - if redis: - await redis.set(cache_key, json.dumps({"tree": keyword_tree, "kw2pids": kw2pids}), ex=3600) - - return keyword_tree, kw2pids - - except Exception as e: - print(f"[ERROR] AI 호출 실패: {e}") - raise - -# /graph 엔드포인트 -@app.post("/graph", response_model=GraphResponse) -async def build_graph(req: GraphRequest): - - cache_key = make_cache_key(req.root, req.top1, req.top2) - if redis: - cached = await redis.get(cache_key) - if cached: - obj = json.loads(cached) - return {"keyword_tree": obj["tree"], "kw2pids": obj["kw2pids"]} - - tree = await fetch_from_ai_and_cache(req.root, req.top1, req.top2) - - root, mapping = extract_tree_mapping(original_json) - tree = manual_tree_with_full_values(root, mapping) - tree_parsed = manual_tree_with_full_values(tree) - - return {"keyword_tree": tree_parsed, "kw2pids": kw2pids} - - -# /kw2pids 엔드포인트 (핑퐁용) -@app.get("/kw2pids") -async def get_kw2pids(query: str = Query(...), top1: int = 5, top2: int = 3): - cache_key = make_cache_key(query, top1, top2) - if redis: - cached = await redis.get(cache_key) - if cached: - obj = json.loads(cached) - return obj["kw2pids"] - return {"message": "No cached kw2pids available."} +import os, json, hashlib +from typing import List, Dict, Optional +from fastapi import FastAPI, Query +from pydantic import BaseModel +import aioredis, requests +import httpx + +from json_to_tree_and_kw2pid import manual_tree_with_full_values + +# ───────────────────────────── +app = FastAPI(title="Graph Service with AI Inference") +REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379") +redis: Optional[aioredis.Redis] = None + +# ───────── Models ──────────── +class GraphRequest(BaseModel): + root: str + top1: int = 5 + top2: int = 3 + +class KeywordNode(BaseModel): + id: str + value: float + children: List["KeywordNode"] +KeywordNode.update_forward_refs() + +class GraphResponse(BaseModel): # ✨ + keyword_tree: KeywordNode + kw2pids: Dict[str, List[str]] + +# ───────── Redis Events ────── +@app.on_event("startup") +async def startup_event(): + global redis + try: + redis = await aioredis.from_url( + REDIS_URL, encoding="utf-8", decode_responses=True, max_connections=10 + ) + print(f"✅ Connected to Redis at {REDIS_URL}") + except Exception as e: + print(f"⚠️ Redis 연결 실패, 캐시 미사용: {e}") + redis = None + +@app.on_event("shutdown") +async def shutdown_event(): + if redis: # ✨ + await redis.close() + +# ───────── Utils ──────────── +def make_cache_key(root: str, top1: int, top2: int) -> str: + return "graph:" + hashlib.sha256(f"{root}|{top1}|{top2}".encode()).hexdigest() + +async def fetch_from_ai_and_cache(root: str, top1: int, top2: int): + url = "https://58b9-165-194-104-91.ngrok-free.app/inference" + params = {"query": root, "top_k": top1, "top2": top2} + + async with httpx.AsyncClient() as client: + resp = await client.get(url, params=params, timeout=15) + resp.raise_for_status() + data = resp.json() + + # 2-1) keyword_tree + mapping = { n["id"]:{"value": n.get("sim",0.8),"children": n.get("children",[])} + for n in data["results"]["children"][:top1] } + keyword_tree = manual_tree_with_full_values(root, mapping) + + # 2-2) kw2pids ☑ root + 1-depth + 2-depth + kw2pids = {} + + # root → 모든 1-depth pids 합집합 + root_pids = [] + for n in data["results"]["children"][:top1]: + root_pids.extend(n.get("pids", [])) + kw2pids[root] = root_pids + + # 1-depth + for n in data["results"]["children"][:top1]: + if "pids" in n: + kw2pids[n["id"]] = n["pids"] + + # 2-depth + for child in n.get("children", []): + if "pids" in child: + kw2pids[child["id"]] = child["pids"] + + if redis: + await redis.set( + make_cache_key(root, top1, top2), + json.dumps({"keyword_tree": keyword_tree, "kw2pids": kw2pids}), + ex=3600, + ) + return keyword_tree, kw2pids + +# ───────── API ──────────── +@app.post("/graph", response_model=GraphResponse) +async def build_graph(req: GraphRequest): + cache_key = make_cache_key(req.root, req.top1, req.top2) + if redis and (cached := await redis.get(cache_key)): + obj = json.loads(cached) + return obj # FastAPI가 모델로 자동 직렬화 + + keyword_tree, kw2pids = await fetch_from_ai_and_cache( + req.root, req.top1, req.top2 + ) + + + return {"keyword_tree": keyword_tree, "kw2pids": kw2pids} # ✨ + +@app.get("/kw2pids") +async def get_kw2pids(query: str = Query(...), top1: int = 5, top2: int = 3): + if redis and (cached := await redis.get(make_cache_key(query, top1, top2))): + return json.loads(cached)["kw2pids"] + return {"message": "No cached kw2pids available."} \ No newline at end of file diff --git a/app/services/graph_service/json_to_tree_and_kw2pid.py b/app/services/graph_service/json_to_tree_and_kw2pid.py new file mode 100644 index 0000000..5c22db8 --- /dev/null +++ b/app/services/graph_service/json_to_tree_and_kw2pid.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +""" +json_to_tree_and_kw2pid.py + +– 입력: ① your_raw_json : 처음 보내주신 {"results": …} 구조 +– 출력: ② tree_dict : manual_tree_with_full_values() 로 만든 2-depth 그래프 + ③ kw2pid_dict : {키워드(str): [pid, …]} 형태 +""" +import json +from typing import Dict, List, Union, Tuple + +# ---------- 타입 & util ---------- # +TreeMapping = Dict[str, Dict[str, Union[float, List[Dict[str, Union[str, float]]]]]] + +def manual_tree_with_full_values(root: str, mapping: TreeMapping): + children = [] + for lvl1_label, data in mapping.items(): + lvl1_value = data["value"] + lvl2_items = data["children"] + + grandchildren = [] + for lvl2 in lvl2_items: + lvl2_label = lvl2["id"] + lvl2_value = lvl2["value"] + + grandchildren.append({ + "id": lvl2_label, + "context": f"{root}-{lvl1_label}-{lvl2_label}", + "value": lvl2_value, + "children": [] + }) + + children.append({ + "id": lvl1_label, + "context": f"{root}-{lvl1_label}", + "value": lvl1_value, + "children": grandchildren + }) + + return { + "id": root, + "context": root, + "value": 1.0, + "children": children + } + +# ---------- 파싱 핵심 ---------- # +def split_json( + raw_json: str +) -> Tuple[dict, dict]: + """ + 1) 2-depth 트리(dict) + 2) kw2pid 매핑(dict) 반환 + """ + data = json.loads(raw_json) + res = data["results"] + + root = res["root"] + lvl1_nodes = res["children"] + + # 1-depth → 2-depth 변환용 임시 mapping + mapping: TreeMapping = {} + # kw2pid 누적 + kw2pid: Dict[str, List[str]] = {} + + for lvl1 in lvl1_nodes: + lvl1_label = lvl1["id"] + lvl1_value = lvl1["value"] + mapping[lvl1_label] = {"value": lvl1_value, "children": []} + + # 이 예시 JSON에선 pids 가 2-depth 키워드마다 달려있음 + for lvl2 in lvl1["children"]: + lvl2_label = lvl2["id"] + lvl2_value = lvl2["value"] + mapping[lvl1_label]["children"].append( + {"id": lvl2_label, "value": lvl2_value} + ) + + # kw2pid 추가 + if "pids" in lvl2 and lvl2["pids"]: + kw2pid[lvl2_label] = lvl2["pids"] + + # (선택) 1-depth 키워드에도 동일 pids 부여하고 싶다면 주석 해제 + # if mapping[lvl1_label]["children"]: + # kw2pid[lvl1_label] = mapping[lvl1_label]["children"][0]["pids"] + + tree_dict = manual_tree_with_full_values(root, mapping) + return tree_dict, kw2pid + +# ---------- 사용 예시 ---------- # +if __name__ == "__main__": + with open("first_result.json", "r", encoding="utf-8") as f: + raw = f.read() + + tree, kw2pid = split_json(raw) + + # 필요에 따라 파일로 저장 + with open("tree_2depth.json", "w", encoding="utf-8") as f: + json.dump(tree, f, ensure_ascii=False, indent=2) + + with open("kw2pid.json", "w", encoding="utf-8") as f: + json.dump(kw2pid, f, ensure_ascii=False, indent=2) + + # 콘솔 확인 + print("=== 2-depth tree ===") + print(json.dumps(tree, ensure_ascii=False, indent=2)[:800], "...\n") + print("=== kw2pid ===") + print(json.dumps(kw2pid, ensure_ascii=False, indent=2)[:800], "...") \ No newline at end of file diff --git a/app/services/graph_service/requirements.txt b/app/services/graph_service/requirements.txt index 087a22d..2708847 100644 --- a/app/services/graph_service/requirements.txt +++ b/app/services/graph_service/requirements.txt @@ -1,7 +1,6 @@ -numpy -scikit-learn -torch -sentence-transformers -faiss-gpu -networkx -tqdm \ No newline at end of file +fastapi +uvicorn[standard] +pydantic +aioredis +requests +httpx>=0.26 \ No newline at end of file diff --git a/app/services/graph_service/test.json b/app/services/graph_service/test.json new file mode 100644 index 0000000..3197678 --- /dev/null +++ b/app/services/graph_service/test.json @@ -0,0 +1 @@ +{"results":{"root":"test","children":[{"id":"dark matter density","value":1.0,"children":[{"id":"dark matter density","value":0.8,"pids":["59396448","119215569","119234393","118493497","118683283","41660913","118557040","56387919","17578958","118365709","116981343","118607320","73631352","118865190","119393943","14073020","119414536","59065822","15089339","202740","13418816","119111724","119298498","15832449","119241489","14563896","119187336","66122070","30687560","119244797","53009603","119377295","195750878","119442219","118884237","52076653","55808663","119269304"]},{"id":"such dark matter","value":0.1861,"pids":["59396448","119215569","119234393","118493497","118683283","41660913","118557040","56387919","17578958","118365709","116981343","118607320","73631352","118865190","119393943","14073020","119414536","59065822","15089339","202740","13418816","119111724","119298498","15832449","119241489","14563896","119187336","66122070","30687560","119244797","53009603","119377295","195750878","119442219","118884237","52076653","55808663","119269304"]},{"id":"dark matter equation","value":-0.3604,"pids":["59396448","119215569","119234393","118493497","118683283","41660913","118557040","56387919","17578958","118365709","116981343","118607320","73631352","118865190","119393943","14073020","119414536","59065822","15089339","202740","13418816","119111724","119298498","15832449","119241489","14563896","119187336","66122070","30687560","119244797","53009603","119377295","195750878","119442219","118884237","52076653","55808663","119269304"]}],"sim":0.517},{"id":"functions and","value":1.0,"children":[{"id":"functions and","value":0.8,"pids":["119139045","118454356","3137686","119334046","36196479","119303391","17620745"]},{"id":"functions","value":0.193,"pids":["119139045","118454356","3137686","119334046","36196479","119303391","17620745"]},{"id":"riemann hypothesis","value":-0.2895,"pids":["119139045","118454356","3137686","119334046","36196479","119303391","17620745"]}],"sim":0.5162},{"id":"superstring amplitudes in","value":1.0,"children":[{"id":"superstring amplitudes in","value":0.8,"pids":["119190767","15809595","119637242","115157387","118502290","11003463","15484108","15078341","14488445","54747167","115176345","15123220","15728255","119317525","119217245","119620386","119156821","116737595"]},{"id":"superstring amplitudes","value":0.198,"pids":["119190767","15809595","119637242","115157387","118502290","11003463","15484108","15078341","14488445","54747167","115176345","15123220","15728255","119317525","119217245","119620386","119156821","116737595"]},{"id":"the superstring theory","value":-0.3401,"pids":["119190767","15809595","119637242","115157387","118502290","11003463","15484108","15078341","14488445","54747167","115176345","15123220","15728255","119317525","119217245","119620386","119156821","116737595"]}],"sim":0.5088},{"id":"physical quark masses","value":1.0,"children":[{"id":"physical quark masses","value":0.8,"pids":["119199799","118499851","118594540","118616937","17681501","119253849","118573385","53408656","119299206","15021084","118614504","951081","118512027","117744851","118383911","119206765","118682442","17767351","119455153","6999036","118435926","16923103","118999246","16376338","15058137","14934507","119098413","119186598","13227870","119230894","118931380","118907218","6745272","59929994","118724416","96459351","18833137","119350545","15321595","119177691","8204356","119265888","9951650","73582376","118309408","14998696","119490124","119252039","54615791","11051018","118334003","119110313","119274896","385703","119234296","46899660","7247089","119153192","15935578","119296171","12958775","119288809","119019127","15816221","14827452","90262695","14682292","117844101","117033489","14294720","5911300","2735565","18150622","14546422","18475783","119019112","118841610","59441559","119230198","118529794","85556034","119268625","118729327","15539886","119238442","2199424","119298353","700993","119478564","118672038","119112605","12973610"]},{"id":"with physical quark","value":0.1852,"pids":["119199799","118499851","118594540","118616937","17681501","119253849","118573385","53408656","119299206","15021084","118614504","951081","118512027","117744851","118383911","119206765","118682442","17767351","119455153","6999036","118435926","16923103","118999246","16376338","15058137","14934507","119098413","119186598","13227870","119230894","118931380","118907218","6745272","59929994","118724416","96459351","18833137","119350545","15321595","119177691","8204356","119265888","9951650","73582376","118309408","14998696","119490124","119252039","54615791","11051018","118334003","119110313","119274896","385703","119234296","46899660","7247089","119153192","15935578","119296171","12958775","119288809","119019127","15816221","14827452","90262695","14682292","117844101","117033489","14294720","5911300","2735565","18150622","14546422","18475783","119019112","118841610","59441559","119230198","118529794","85556034","119268625","118729327","15539886","119238442","2199424","119298353","700993","119478564","118672038","119112605","12973610"]},{"id":"qcd calculations","value":-0.3092,"pids":["119199799","118499851","118594540","118616937","17681501","119253849","118573385","53408656","119299206","15021084","118614504","951081","118512027","117744851","118383911","119206765","118682442","17767351","119455153","6999036","118435926","16923103","118999246","16376338","15058137","14934507","119098413","119186598","13227870","119230894","118931380","118907218","6745272","59929994","118724416","96459351","18833137","119350545","15321595","119177691","8204356","119265888","9951650","73582376","118309408","14998696","119490124","119252039","54615791","11051018","118334003","119110313","119274896","385703","119234296","46899660","7247089","119153192","15935578","119296171","12958775","119288809","119019127","15816221","14827452","90262695","14682292","117844101","117033489","14294720","5911300","2735565","18150622","14546422","18475783","119019112","118841610","59441559","119230198","118529794","85556034","119268625","118729327","15539886","119238442","2199424","119298353","700993","119478564","118672038","119112605","12973610"]}],"sim":0.5084},{"id":"offline handwriting recognition","value":1.0,"children":[{"id":"offline handwriting recognition","value":0.8,"pids":["3842393","6549978","126180494","202660770","6069782","16509326","16661426","263092","6060495","51955928","6708387","25051392","121292373","2308618","34701244","140262773","198968240","67856041","10552590","199405577","13745107","318257","4565931","56895553","7685498","4707079","53428248","3825772","53219846","145056436","52889887","54447114","4762792","49414646","9731718","10663135","140210708","54462895","90262232","17735501","67856708","315789","201665955","4605057","51871912"]},{"id":"handwriting recognition","value":0.1943,"pids":["3842393","6549978","126180494","202660770","6069782","16509326","16661426","263092","6060495","51955928","6708387","25051392","121292373","2308618","34701244","140262773","198968240","67856041","10552590","199405577","13745107","318257","4565931","56895553","7685498","4707079","53428248","3825772","53219846","145056436","52889887","54447114","4762792","49414646","9731718","10663135","140210708","54462895","90262232","17735501","67856708","315789","201665955","4605057","51871912"]},{"id":"using deep neural","value":-0.3086,"pids":["3842393","6549978","126180494","202660770","6069782","16509326","16661426","263092","6060495","51955928","6708387","25051392","121292373","2308618","34701244","140262773","198968240","67856041","10552590","199405577","13745107","318257","4565931","56895553","7685498","4707079","53428248","3825772","53219846","145056436","52889887","54447114","4762792","49414646","9731718","10663135","140210708","54462895","90262232","17735501","67856708","315789","201665955","4605057","51871912"]}],"sim":0.5077},{"id":"cosmic microwave background","value":1.0,"children":[{"id":"cosmic microwave background","value":0.8,"pids":["119278530","1887453","119495132","119472241","14218199","18120479","119243126","54173237","119212870","51745830","119114124","5398329","16479802","15652812","119180175","44129566","118565857","119073969","119223268","119202493","119177664","52065651","118525940","119190546","119408894","15329615","4594646","119511960","16019463","31073237","118672217","118381670","118615359","118396636","29627522","6037952","119415834","14371538","119163396","119450190","3061987","21929004","988092","119330744","119206660","118733060","10052235","15054396","517403","84846361","118588389","118959658","119481246"]},{"id":"recent cosmic microwave","value":0.1895,"pids":["119278530","1887453","119495132","119472241","14218199","18120479","119243126","54173237","119212870","51745830","119114124","5398329","16479802","15652812","119180175","44129566","118565857","119073969","119223268","119202493","119177664","52065651","118525940","119190546","119408894","15329615","4594646","119511960","16019463","31073237","118672217","118381670","118615359","118396636","29627522","6037952","119415834","14371538","119163396","119450190","3061987","21929004","988092","119330744","119206660","118733060","10052235","15054396","517403","84846361","118588389","118959658","119481246"]},{"id":"the cosmological parameters","value":-0.3145,"pids":["119278530","1887453","119495132","119472241","14218199","18120479","119243126","54173237","119212870","51745830","119114124","5398329","16479802","15652812","119180175","44129566","118565857","119073969","119223268","119202493","119177664","52065651","118525940","119190546","119408894","15329615","4594646","119511960","16019463","31073237","118672217","118381670","118615359","118396636","29627522","6037952","119415834","14371538","119163396","119450190","3061987","21929004","988092","119330744","119206660","118733060","10052235","15054396","517403","84846361","118588389","118959658","119481246"]}],"sim":0.5051},{"id":"mechanical lattice","value":1.0,"children":[{"id":"mechanical lattice","value":0.8,"pids":["118471994","4950906","119285651","5397433","119180039"]},{"id":"the mechanical lattice","value":0.1946,"pids":["118471994","4950906","119285651","5397433","119180039"]},{"id":"mechanical response","value":-0.35,"pids":["118471994","4950906","119285651","5397433","119180039"]}],"sim":0.5051},{"id":"stochastic optimization","value":1.0,"children":[{"id":"stochastic optimization","value":0.8,"pids":["182953141","6884742","47017143","3637630","58981788","52180472","1548646","119173094","16613403","4958405","52079347","86422815"]},{"id":"optimization","value":0.1852,"pids":["182953141","6884742","47017143","3637630","58981788","52180472","1548646","119173094","16613403","4958405","52079347","86422815"]},{"id":"the optimal solution","value":-0.3728,"pids":["182953141","6884742","47017143","3637630","58981788","52180472","1548646","119173094","16613403","4958405","52079347","86422815"]}],"sim":0.495},{"id":"adversarially robust models","value":1.0,"children":[{"id":"adversarially robust models","value":0.8,"pids":["182952436","59336190","197431150","195218789","883252","49862308","59413762","159041363","173188378","3272089","59222747","173990256","128358825","51925625","195767324","135464475","195345281","145049777","53729258","67855552","53039886","52920928","202578080","53292287","85543329","29160618","67788180","2541531","53668092","53735542","53047456","85498673","56657912","195584368","199064656","52298300","119186282","166228688","49901528","53737378"]},{"id":"adversarial robustness and","value":0.1847,"pids":["182952436","59336190","197431150","195218789","883252","49862308","59413762","159041363","173188378","3272089","59222747","173990256","128358825","51925625","195767324","135464475","195345281","145049777","53729258","67855552","53039886","52920928","202578080","53292287","85543329","29160618","67788180","2541531","53668092","53735542","53047456","85498673","56657912","195584368","199064656","52298300","119186282","166228688","49901528","53737378"]},{"id":"adversarial learning problem","value":-0.3608,"pids":["182952436","59336190","197431150","195218789","883252","49862308","59413762","159041363","173188378","3272089","59222747","173990256","128358825","51925625","195767324","135464475","195345281","145049777","53729258","67855552","53039886","52920928","202578080","53292287","85543329","29160618","67788180","2541531","53668092","53735542","53047456","85498673","56657912","195584368","199064656","52298300","119186282","166228688","49901528","53737378"]}],"sim":0.4934},{"id":"the chemotaxis sensitivity","value":1.0,"children":[{"id":"the chemotaxis sensitivity","value":0.8,"pids":["119696972","119313443","119156166","119175757","56390808","119301760","59445935","119116911","119315730","119317122","119167731","85546779","55106939","119294250","119171973","119589133","119145111","160010311","20958772","119148805","119665385","59488170","119641359","119660499","119172102","119601309","152282979"]},{"id":"chemotaxis sensitivity","value":0.1958,"pids":["119696972","119313443","119156166","119175757","56390808","119301760","59445935","119116911","119315730","119317122","119167731","85546779","55106939","119294250","119171973","119589133","119145111","160010311","20958772","119148805","119665385","59488170","119641359","119660499","119172102","119601309","152282979"]},{"id":"generalized solution this","value":-0.3051,"pids":["119696972","119313443","119156166","119175757","56390808","119301760","59445935","119116911","119315730","119317122","119167731","85546779","55106939","119294250","119171973","119589133","119145111","160010311","20958772","119148805","119665385","59488170","119641359","119660499","119172102","119601309","152282979"]}],"sim":0.4918}]}} \ No newline at end of file diff --git a/app/services/main.py b/app/services/main.py index fe07af2..e3edcd7 100644 --- a/app/services/main.py +++ b/app/services/main.py @@ -75,4 +75,23 @@ def api_search( """ TODO: api_graph + api_papers 조합해서 한번에 반환 """ - raise NotImplementedError \ No newline at end of file + raise NotImplementedError + + + import aiohttp, asyncio + + S2_URL = "https://api.semanticscholar.org/graph/v1/paper/{}" + FIELDS = "title,abstract,year,venue,referenceCount,citationCount," \ + "influentialCitationCount,fieldsOfStudy,authors,tldr" + + async def fetch_paper(session, pid): + url = S2_URL.format(pid) + params = {"fields": FIELDS} + async with session.get(url, params=params, timeout=10) as r: + r.raise_for_status() + return await r.json() + + async def fetch_many(pids): + async with aiohttp.ClientSession() as session: + tasks = [fetch_paper(session, pid) for pid in pids] + return await asyncio.gather(*tasks) \ No newline at end of file diff --git a/app/services/papers_service/Dockerfile b/app/services/papers_service/Dockerfile index 0bdfd6d..03d2255 100644 --- a/app/services/papers_service/Dockerfile +++ b/app/services/papers_service/Dockerfile @@ -1,27 +1,25 @@ FROM python:3.9-slim +# 1. 필수 패키지 + gdown 설치 +RUN apt-get update && \ + apt-get install -y --no-install-recommends python3-pip git && \ + pip3 install --no-cache-dir gdown && \ + rm -rf /var/lib/apt/lists/* +# 2. 프로젝트 코드 복사 +# (컨텍스트 최상단에서 COPY . . 라면 /app 구조도 함께 포함됩니다) WORKDIR /app - -# 1. 의존성 설치 -COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt - -RUN apt-get update && apt-get install -y tar - -# 2. gdown 설치 -RUN pip install gdown beautifulsoup4 - -# 3. Google Drive에서 tar.gz 파일 다운로드 → data/ 안에 저장 -RUN mkdir -p data && \ - gdown --id 1tFYFjScIyu9RvAHWWWf-SGHwTs7F5866 -O data/inductive_test_checkpoint_collected.tar.gz - -# 2. data/ 폴더 안에서 압축 해제 + 압축 파일 삭제 -RUN tar -xzf data/inductive_test_checkpoint_collected.tar.gz -C data/ && \ - rm data/inductive_test_checkpoint_collected.tar.gz - -# 4. 코드 복사 (덮어쓰지 않도록 이후에) -COPY data/kw2pids.json data/ -COPY papers_service.py ./ +COPY . . + +# 3. Google Drive에서 tar.gz 다운로드 → data/ 안에 저장·압축 해제 +RUN mkdir -p app/data && \ + gdown --id 13KL8qntiJlyxc3ObqjB7t8NEXoRyYyXX \ + -O app/data/paper_db.tar.gz && \ + tar -xzf app/data/paper_db.tar.gz \ + -C app/data && \ + rm app/data/paper_db.tar.gz + +# 4. 파이썬 의존성 설치 +RUN pip3 install --no-cache-dir -r requirements.txt # 경로는 필요에 맞게 # 5. 앱 실행 CMD ["uvicorn", "papers_service:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/app/services/papers_service/data/dummy_data.py b/app/services/papers_service/data/dummy_data.py deleted file mode 100644 index 56c8b38..0000000 --- a/app/services/papers_service/data/dummy_data.py +++ /dev/null @@ -1,128 +0,0 @@ -# papers_service/dummy_data.py - -def get_dummy_papers(): - """ - 더미 논문 리스트 반환 - """ - return [ - { - "paper_id": "p1", - "title": "Dummy Paper A", - "abstract": "This is the abstract of dummy paper A.", - "authors": ["Alice"], - "year": 2023, - "citation_count": 5, - "sim_score": 0.90, - "summary": "이 논문은 A에 대해 간략히 설명합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic" - }, - { - "paper_id": "p2", - "title": "Dummy Paper B", - "abstract": "This is the abstract of dummy paper B.", - "authors": ["Bob", "Carol"], - "year": 2020, - "citation_count": 3, - "sim_score": 0.85, - "summary": "이 논문은 B의 주요 기여를 요약합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic" - }, - { - "paper_id": "p3", - "title": "Dummy Paper C", - "abstract": "This is the abstract of dummy paper C.", - "authors": ["Dave"], - "year": 2022, - "citation_count": 7, - "sim_score": 0.80, - "summary": "이 논문에서는 C를 제안하고 실험 결과를 제공합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic" - }, - { - "paper_id": "p4", - "title": "Dummy Paper D", - "abstract": "This is the abstract of dummy paper D.", - "authors": ["Eve", "Frank"], - "year": 2021, - "citation_count": 10, - "sim_score": 0.75, - "summary": "이 논문은 D 기법의 유효성을 평가합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic" - }, - { - "paper_id": "p5", - "title": "Dummy Paper E", - "abstract": "This is the abstract of dummy paper E.", - "authors": ["Grace"], - "year": 2019, - "citation_count": 12, - "sim_score": 0.70, - "summary": "이 논문에서는 E 알고리즘을 제안합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic" - }, - { - "paper_id": "p6", - "title": "Dummy Paper F", - "abstract": "This is the abstract of dummy paper F.", - "authors": ["Heidi"], - "year": 2020, - "citation_count": 8, - "sim_score": 0.65, - "summary": "이 논문은 F 시스템의 성능을 분석합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic" - }, - { - "paper_id": "p7", - "title": "Dummy Paper G", - "abstract": "This is the abstract of dummy paper G.", - "authors": ["Ivan", "Judy"], - "year": 2022, - "citation_count": 6, - "sim_score": 0.60, - "summary": "이 논문에서는 G 모델을 제안하고 평가합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic" - }, - { - "paper_id": "p8", - "title": "Dummy Paper H", - "abstract": "This is the abstract of dummy paper H.", - "authors": ["Kevin"], - "year": 2021, - "citation_count": 9, - "sim_score": 0.55, - "summary": "이 논문은 H 프로토콜의 보안성을 검증합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic", - }, - { - "paper_id": "p9", - "title": "Dummy Paper I", - "abstract": "This is the abstract of dummy paper I.", - "authors": ["Laura"], - "year": 2023, - "citation_count": 4, - "sim_score": 0.50, - "summary": "이 논문에서는 I 프레임워크를 소개합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic" - }, - { - "paper_id": "p10", - "title": "Dummy Paper J", - "abstract": "This is the abstract of dummy paper J.", - "authors": ["Mallory", "Niaj"], - "year": 2018, - "citation_count": 15, - "sim_score": 0.45, - "summary": "이 논문은 J 방법론의 활용 사례를 제시합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic" - } - ] \ No newline at end of file diff --git a/app/services/papers_service/data/kw2pids.json b/app/services/papers_service/data/kw2pids.json deleted file mode 100644 index 368bd49..0000000 --- a/app/services/papers_service/data/kw2pids.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "machine learning": [ - "40108038", - "59572248", - "5799960", - "14188576", - "119242784" - ], - "neural network": [ - "41418788", - "119472164", - "11501607", - "119111722", - "14909482" - ], - "graph representation": [ - "55836730", - "118751294", - "51183683", - "118849608", - "118816857", - "5734610" - ], - "text summarization": [ - "59572248", - "5799960", - "14188576", - "51183683", - "85459157", - "56099032" - ], - "natural language processing": [ - "11501607", - "59572248", - "14909482" - ], - "deep learning": [ - "11534505", - "59572248", - "13074624", - "53641451" - ], - "transformer": [ - "41418788", - "5799960", - "11501607" - ], - "embedding": [ - "59408549", - "14516333", - "119144587" - ] -} \ No newline at end of file diff --git a/app/services/papers_service/papers_service.py b/app/services/papers_service/papers_service.py index 1f02272..16b7d91 100644 --- a/app/services/papers_service/papers_service.py +++ b/app/services/papers_service/papers_service.py @@ -1,146 +1,142 @@ -# papers_service/main.py -import random - -from fastapi import FastAPI, HTTPException, Query -from pydantic import BaseModel -from typing import List, Optional -import json -import os - -app = FastAPI(title="Papers Service") - - -class Author(BaseModel): - name: str - - -class Citation(BaseModel): - paperId: str - title: Optional[str] - year: Optional[int] - - -class Reference(BaseModel): - paperId: str - title: Optional[str] - year: Optional[int] - - -# --- 논문 객체 정의 --- -class Paper(BaseModel): - paper_id: str - abstract: Optional[str] - title: Optional[str] - - url: Optional[str] - venue: Optional[str] - year: Optional[int] - - reference_count: Optional[int] - citation_count: Optional[int] - influentialCitationCount: Optional[int] - - fieldsOfStudy: Optional[List[str]] - tldr: Optional[str] - authors: List[Author] - - sim_score: float - - -# --- 응답 모델 --- -class PapersResponse(BaseModel): - total_results: int - max_display: int - page: int - page_size: int - papers: List[Paper] - - - -def make_cache_key(root, top1, top2): - key_str = f"{root}|{top1}|{top2}" - return "keyword_tree:graph:" + hashlib.sha256(key_str.encode()).hexdigest() - - -def fetch_keyword_tree_from_graph_service(query: str) -> dict: - response = requests.get("http://graph-service:8002/keyword_tree", params={"query": query}) - return response.json() - -@app.get("/papers", response_model=PapersResponse) -def get_papers_by_keyword( - kw: str = Query(..., description="클릭한 키워드"), - page: int = Query(1, ge=1), - page_size: int = Query(20, ge=1, le=100) -): - - response = requests.post("http://graph-service:8002/graph", json={ - "root": kw, - "top1": 10, - "top2": 3 - }) - - - keyword_tree = response.json().get("keyword_tree") - - response = requests.get("http://graph-service:8002/keyword_tree", params={"query": "AI"}) - kw2pids = response.json() - - - - keyword_tree_json = redis.get(f"keyword_tree:{cache_key}") - keyword_tree = json.loads(keyword_tree_json) - - key = make_cache_key("pid123", "AI", 5, 3) - cached = redis.get(key) - if cached: - kw2pids = json.loads(cached) - - - if kw not in kw2pids: - raise HTTPException(status_code=404, detail=f"Keyword '{kw}' not found.") - else: - all_pids = kw2pids[kw] - - # 페이징 - total = len(all_pids) - start = (page - 1) * page_size - end = min(start + page_size, total) - sliced = all_pids[start:end] - - papers = [] - for pid in sliced: - if pid not in paper_db: - continue - entry = paper_db[pid] - papers.append(Paper( - paper_id=pid, - title=entry.get("title"), - abstract=entry.get("abstract"), - url=entry.get("url"), - venue=entry.get("venue"), - year=entry.get("year"), - reference_count=entry.get("referenceCount"), - citation_count=entry.get("citationCount"), - influentialCitationCount=entry.get("influentialCitationCount"), - fieldsOfStudy=entry.get("fieldsOfStudy"), - tldr=entry.get("tldr", {}).get("text") if entry.get("tldr") else None, - authors=[Author(name=a["name"]) for a in entry.get("authors", [])], - sim_score=random.uniform(0, 1) # Stub score - )) - - return PapersResponse( - total_results=total, - max_display=len(sliced), - page=page, - page_size=page_size, - papers=papers - ) - -@app.get("/keyword_tree") -async def get_keyword_tree(query: str = Query(...)): - cache_key = f"kw2pids:{query}" - if redis: - cached = await redis.get(cache_key) - if cached: - return json.loads(cached) - return {"message": "No cached keyword->pids mapping found."} +# papers_service/main.py + +import os, json, random, asyncio +from typing import List, Dict, Optional + +from fastapi import FastAPI, Query, HTTPException +from pydantic import BaseModel +import httpx + +app = FastAPI(title="Papers Service") + +# ───────── Pydantic Models ───────── +class Author(BaseModel): + name: str + +class Paper(BaseModel): + paper_id: str + title: Optional[str] + abstract: Optional[str] + url: Optional[str] + venue: Optional[str] + year: Optional[int] + reference_count: Optional[int] + citation_count: Optional[int] + influentialCitationCount: Optional[int] + fieldsOfStudy: Optional[List[str]] + tldr: Optional[str] + authors: List[Author] + sim_score: float + +class PapersResponse(BaseModel): + total_results: int + max_display: int + page: int + page_size: int + papers: List[Paper] + +# ───────── Data Load ───────── +BASE_DIR = os.path.join(os.path.dirname(__file__), "data") + +def safe_load(fname: str) -> dict: + path = os.path.join(BASE_DIR, fname) + try: + with open(path, encoding="utf-8") as f: + return json.load(f) + except FileNotFoundError: + print(f"⚠️ {fname} not found → 빈 dict 사용") + return {} + +paper_db: Dict[str, dict] = safe_load("paper_db.json") +kw2pids: Dict[str, List[str]] = safe_load("kw2pids.json") +save_lock = asyncio.Lock() # 파일 캐시 동시 접근 보호 + +# ────────────── Helper ────────────── +def build_paper(pid: str) -> Paper: + e = paper_db[pid] + return Paper( + paper_id = pid, + title = e.get("title"), + abstract = e.get("abstract"), + url = e.get("url"), + venue = e.get("venue"), + year = e.get("year"), + reference_count = e.get("referenceCount"), + citation_count = e.get("citationCount"), + influentialCitationCount = e.get("influentialCitationCount"), + fieldsOfStudy = e.get("fieldsOfStudy"), + tldr = (e.get("tldr") or {}).get("text"), # ← 수정 + authors = [Author(name=a["name"]) + for a in e.get("authors", [])], + sim_score = random.uniform(0, 1), # stub + ) + +def paginate(ids: List[str], page: int, page_size: int): + total = len(ids) + start = (page - 1) * page_size + return ids[start:start + page_size], total + +async def build_papers(ids: List[str]) -> List[Paper]: + papers: List[Paper] = [] + for pid in ids: + if await ensure_paper(pid): + papers.append(build_paper(pid)) + return papers + +# ───────── API ───────── +@app.get("/papers", response_model=PapersResponse) +async def get_papers( + root: str = Query(..., description="검색 루트(처음 입력)"), + kw: str = Query(..., description="사용자가 선택한 키워드"), + page: int = Query(1, ge=1), + page_size: int = Query(20, ge=1, le=100), +): + ids = await ensure_kw2pids(root, kw) + if not ids: + raise HTTPException(404, f"Keyword '{kw}' not found") + + sliced, total = paginate(ids, page, page_size) + + # paper_db 에 존재하는 PID 만 반환 + papers = [build_paper(pid) for pid in sliced if pid in paper_db] + + return PapersResponse( + total_results = total, + max_display = len(papers), + page = page, + page_size = page_size, + papers = papers, + ) + +GRAPH_BASE = os.getenv("GRAPH_URL", "http://graph-service:8002") + +async def ensure_kw2pids(root: str, keyword: str, + top1: int = 5, top2: int = 3) -> List[str]: + """keyword 가 캐시에 없으면 graph-service 를 호출해 kw2pids 갱신""" + if keyword in kw2pids: + return kw2pids[keyword] + + async with httpx.AsyncClient() as client: + resp = await client.post( + f"{GRAPH_BASE}/graph", + json={"root": keyword, "top1": top1, "top2": top2}, + timeout=15 + ) + if resp.status_code != 200: + raise HTTPException(502, "graph_service error") + + data = resp.json() + kw2pids.update(data["kw2pids"]) + + # 🔽 파일 캐시 저장 시도 → 읽기 전용이면 무시 + try: + async with save_lock: + path = os.path.join(BASE_DIR, "kw2pids.json") + with open(path, "w") as f: + json.dump(kw2pids, f, ensure_ascii=False) + except OSError: + # read-only · 컨테이너 환경에선 무시하고 넘어감 + pass + + return kw2pids.get(keyword, []) + \ No newline at end of file diff --git a/app/services/papers_service/requirements.txt b/app/services/papers_service/requirements.txt index 4174eb1..bf2e342 100644 --- a/app/services/papers_service/requirements.txt +++ b/app/services/papers_service/requirements.txt @@ -1,3 +1,4 @@ fastapi uvicorn[standard] -pydantic \ No newline at end of file +pydantic +httpx>=0.26 \ No newline at end of file diff --git a/app/services/requirements.txt b/app/services/requirements.txt index c9b6004..81a4930 100644 --- a/app/services/requirements.txt +++ b/app/services/requirements.txt @@ -1,3 +1,3 @@ fastapi uvicorn -pydantic +pydantic \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 549ed4b..e14d2c1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,14 +8,24 @@ services: - "6379:6379" restart: unless-stopped - papers-service: - build: - context: ./app/services/papers_service - dockerfile: Dockerfile - ports: - - "8000:8000" - depends_on: - - searchforest-ai + # runtime_infer: + # build: + # context: ./app + # dockerfile: runtime/Dockerfile + # container_name: searchforest-infer + # environment: + # - NVIDIA_VISIBLE_DEVICES=all + # volumes: + # - ./app/indices:/app/indices:ro # 인덱스 실시간 사용 + # ports: + # - "8004:8004" + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: 1 + # capabilities: [gpu] graph-service: build: @@ -25,31 +35,26 @@ services: ports: - "8002:8002" environment: - # Redis URL을 환경변수로 주입 - REDIS_URL: "redis://redis:6379" - # (필요하다면) CONFIG_PATH, INDEX_PATH 등도 + - REDIS_URL=redis://redis:6379 + - INFER_URL=http://host.docker.internal:8004 depends_on: - redis - - searchforest-ai restart: unless-stopped - - runtime_infer: + + papers-service: build: - context: ./app/runtime - dockerfile: app/runtime/Dockerfile - container_name: searchforest-infer - runtime: nvidia # GPU 전달 + context: ./app/services/papers_service + dockerfile: Dockerfile + container_name: papers_service + ports: + - "8000:8000" environment: - - NVIDIA_VISIBLE_DEVICES=all + - REDIS_URL=redis://redis:6379 + - GRAPH_URL=http://graph-service:8002 + depends_on: + - redis + - graph-service + restart: unless-stopped volumes: - - ./indices:/workspace/indices # 인덱스 실시간 사용 - ports: - - "8004:8004" - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] + - ./app/data:/app/data:ro