From 12cba7ebf82ce111f7290d75b0a6358ac6686d7f Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 20:53:14 +0000 Subject: [PATCH 01/35] fix: update graph_service files --- app/services/graph_service/Dockerfile | 4 +++- app/services/graph_service/graph_service.py | 1 - app/services/graph_service/requirements.txt | 5 ++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/app/services/graph_service/Dockerfile b/app/services/graph_service/Dockerfile index 6cf9703..11a6479 100644 --- a/app/services/graph_service/Dockerfile +++ b/app/services/graph_service/Dockerfile @@ -5,7 +5,9 @@ WORKDIR /app COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt -COPY . . +COPY graph_service.py . + +COPY tree_mapping.py . # FastAPI Uvicorn 실행 diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py index 0f19ede..34855bb 100644 --- a/app/services/graph_service/graph_service.py +++ b/app/services/graph_service/graph_service.py @@ -6,7 +6,6 @@ import aioredis from data_util.logging import logger -from data_util.config import Config from collections import defaultdict import requests from fastapi import Query diff --git a/app/services/graph_service/requirements.txt b/app/services/graph_service/requirements.txt index 087a22d..60b8699 100644 --- a/app/services/graph_service/requirements.txt +++ b/app/services/graph_service/requirements.txt @@ -4,4 +4,7 @@ torch sentence-transformers faiss-gpu networkx -tqdm \ No newline at end of file +tqdm +fastapi +uvicorn +pydantic \ No newline at end of file From 4e75978976b9c607220fb49a303362cbad6f17a0 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 20:59:02 +0000 Subject: [PATCH 02/35] fix: update graph_service files --- app/services/graph_service/old/dummy_data.py | 115 +++++++++++++++++++ app/services/graph_service/requirements.txt | 9 +- 2 files changed, 116 insertions(+), 8 deletions(-) create mode 100644 app/services/graph_service/old/dummy_data.py diff --git a/app/services/graph_service/old/dummy_data.py b/app/services/graph_service/old/dummy_data.py new file mode 100644 index 0000000..478db51 --- /dev/null +++ b/app/services/graph_service/old/dummy_data.py @@ -0,0 +1,115 @@ +def get_dummy_tree(root: str, top1: int = 5, top2: int = 3): + """ + root 키워드 + top1, top2 파라미터를 반영한 더미 radial-tree 반환 + depth1: top1 개수, depth2: top2 개수로 구성됩니다. + """ + children = [] + + # depth1: top1개의 자식 노드 생성 + for i in range(top1): + # 라벨은 A, B, C, ... 또는 번호로 생성 + label = chr(ord('A') + i) if i < 26 else str(i+1) + # base_sim: 0.9에서 일정 간격으로 감소 + base_sim = round(1.0 - (i + 1) * (0.1), 4) + + # depth2: 각 depth1 노드에 top2 개수만큼 자식 생성 + grandchildren = [] + for j in range(top2): + child_label = f"{root}-{label}-{j+1}" + # sim value: base_sim에서 0.05씩 감소 + child_sim = round(base_sim - (j + 1) * 0.05, 4) + grandchildren.append({ + "id": child_label, + "value": child_sim, + "children": [] + }) + + children.append({ + "id": f"{root}-{label}", + "value": base_sim, + "children": grandchildren + }) + + # 루트 노드 반환 + return { + "id": root, + "value": 1.0, + "children": children + } + +# --- 기본 더미 트리 생성기 (context 포함) --- +def get_dummy_tree_with_context(root: str, top1: int = 5, top2: int = 3): + children = [] + for i in range(top1): + label = chr(ord('A') + i) if i < 26 else str(i+1) + base_sim = round(1.0 - (i + 1) * 0.1, 4) + grandchildren = [] + for j in range(top2): + child_label = f"{label}-{j+1}" + full_context = f"{root}-{label}-{j+1}" + child_sim = round(base_sim - (j + 1) * 0.05, 4) + grandchildren.append({ + "id": child_label, + "context": full_context, + "value": child_sim, + "children": [] + }) + children.append({ + "id": label, + "context": f"{root}-{label}", + "value": base_sim, + "children": grandchildren + }) + return { + "id": root, + "context": root, + "value": 1.0, + "children": children + } + +def get_dummy_tree_with_context_and_example(root: str, top1: int = 5, top2: int = 3): + """ + root 키워드를 중심으로 top1개의 1-depth와 각 1-depth마다 top2개의 2-depth를 생성한 더미 트리를 반환. + 각 노드에는 context, value, example 필드가 포함됨. + """ + children = [] + + for i in range(top1): + label = chr(ord('A') + i) if i < 26 else str(i + 1) + base_sim = round(1.0 - (i + 1) * 0.1, 4) + lvl1_id = label + lvl1_context = f"{root}-{label}" + lvl1_example = f"{root} 분야의 하위 주제 {label}에 대한 간단한 설명입니다." + + grandchildren = [] + for j in range(top2): + lvl2_id = f"{label}-{j + 1}" + lvl2_context = f"{root}-{label}-{j + 1}" + lvl2_value = round(base_sim - (j + 1) * 0.05, 4) + lvl2_example = f"{label} 세부 주제 {j + 1}에 대한 예시 설명입니다." + + grandchildren.append({ + "id": lvl2_id, + "context": lvl2_context, + "value": lvl2_value, + "example": lvl2_example, + "children": [] + }) + + children.append({ + "id": lvl1_id, + "context": lvl1_context, + "value": base_sim, + "example": lvl1_example, + "children": grandchildren + }) + + return { + "id": root, + "context": root, + "value": 1.0, + "example": f"{root}라는 주제를 중심으로 확장된 키워드 구조입니다.", + "children": children + } + + diff --git a/app/services/graph_service/requirements.txt b/app/services/graph_service/requirements.txt index 60b8699..4174eb1 100644 --- a/app/services/graph_service/requirements.txt +++ b/app/services/graph_service/requirements.txt @@ -1,10 +1,3 @@ -numpy -scikit-learn -torch -sentence-transformers -faiss-gpu -networkx -tqdm fastapi -uvicorn +uvicorn[standard] pydantic \ No newline at end of file From b5cbe2e65a3aa19fef20894673a5cb6144307bc4 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 21:02:33 +0000 Subject: [PATCH 03/35] fix: update graph_service files --- app/services/graph_service/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/services/graph_service/requirements.txt b/app/services/graph_service/requirements.txt index 4174eb1..d5a0f17 100644 --- a/app/services/graph_service/requirements.txt +++ b/app/services/graph_service/requirements.txt @@ -1,3 +1,4 @@ fastapi uvicorn[standard] -pydantic \ No newline at end of file +pydantic +aioredis \ No newline at end of file From 67c6665d972380412ecba9d179771746aa310d48 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 21:08:06 +0000 Subject: [PATCH 04/35] s --- app/services/graph_service/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/services/graph_service/requirements.txt b/app/services/graph_service/requirements.txt index d5a0f17..f81927f 100644 --- a/app/services/graph_service/requirements.txt +++ b/app/services/graph_service/requirements.txt @@ -1,4 +1,5 @@ fastapi uvicorn[standard] pydantic -aioredis \ No newline at end of file +aioredis +data_util \ No newline at end of file From 085d52f05de037e6177c2c456ad0aba03d3cee54 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 21:12:19 +0000 Subject: [PATCH 05/35] s --- app/services/graph_service/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/graph_service/requirements.txt b/app/services/graph_service/requirements.txt index f81927f..5c57be2 100644 --- a/app/services/graph_service/requirements.txt +++ b/app/services/graph_service/requirements.txt @@ -2,4 +2,4 @@ fastapi uvicorn[standard] pydantic aioredis -data_util \ No newline at end of file +data-util \ No newline at end of file From 3b79d27018c8ee00d9b8bd0d042bb5dfc8bc3667 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 21:16:49 +0000 Subject: [PATCH 06/35] fix: update graph_service files --- app/services/graph_service/graph_service.py | 2 -- app/services/graph_service/requirements.txt | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py index 34855bb..2268e2e 100644 --- a/app/services/graph_service/graph_service.py +++ b/app/services/graph_service/graph_service.py @@ -5,8 +5,6 @@ from pydantic import BaseModel import aioredis from data_util.logging import logger - -from collections import defaultdict import requests from fastapi import Query from tree_mapping import extract_tree_mapping diff --git a/app/services/graph_service/requirements.txt b/app/services/graph_service/requirements.txt index 5c57be2..bdb84a1 100644 --- a/app/services/graph_service/requirements.txt +++ b/app/services/graph_service/requirements.txt @@ -2,4 +2,5 @@ fastapi uvicorn[standard] pydantic aioredis -data-util \ No newline at end of file +data-util +requests From 78cbb75fbe048fd749997c09b6b5db312b454b13 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 21:20:19 +0000 Subject: [PATCH 07/35] s --- app/services/graph_service/graph_service.py | 7 +++---- app/services/graph_service/requirements.txt | 2 +- app/services/papers_service/requirements.txt | 3 ++- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py index 2268e2e..7de843c 100644 --- a/app/services/graph_service/graph_service.py +++ b/app/services/graph_service/graph_service.py @@ -1,12 +1,11 @@ import os import json, hashlib from typing import List, Dict, Optional, Tuple, Union -from fastapi import FastAPI, HTTPException +from fastapi import FastAPI, HTTPException, Query from pydantic import BaseModel import aioredis from data_util.logging import logger import requests -from fastapi import Query from tree_mapping import extract_tree_mapping # ──────────────────────────────────────────────────────────────── @@ -79,8 +78,8 @@ def fetch_keywords(query: str) -> list[str]: # AI 서버 호출 + 결과 캐싱 async def fetch_from_ai_and_cache(root: str, top1: int, top2: int): try: - # response = requests.get("http://searchforest-ai:8004/inference", params={"query": root, "top_k": top1}) - response = requests.get("http://localhost:8004/inference", params={"query": root, "top_k": top1}) + response = requests.get("http://searchforest-ai:8004/inference", params={"query": root, "top_k": top1}) + # response = requests.get("http://localhost:8004/inference", params={"query": root, "top_k": top1}) response.raise_for_status() data = response.json() diff --git a/app/services/graph_service/requirements.txt b/app/services/graph_service/requirements.txt index bdb84a1..e53b211 100644 --- a/app/services/graph_service/requirements.txt +++ b/app/services/graph_service/requirements.txt @@ -3,4 +3,4 @@ uvicorn[standard] pydantic aioredis data-util -requests +requests \ No newline at end of file diff --git a/app/services/papers_service/requirements.txt b/app/services/papers_service/requirements.txt index 4174eb1..4e88f3f 100644 --- a/app/services/papers_service/requirements.txt +++ b/app/services/papers_service/requirements.txt @@ -1,3 +1,4 @@ fastapi uvicorn[standard] -pydantic \ No newline at end of file +pydantic +random \ No newline at end of file From b0e04635d612627d360498df1a84dd9ab1b64470 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 21:23:31 +0000 Subject: [PATCH 08/35] hi --- app/runtime/run_uvicorn.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/runtime/run_uvicorn.sh b/app/runtime/run_uvicorn.sh index bce9f44..015e7b2 100755 --- a/app/runtime/run_uvicorn.sh +++ b/app/runtime/run_uvicorn.sh @@ -1,3 +1,3 @@ #!/bin/bash -uvicorn runtime.api:app --reload --port 8004 \ No newline at end of file +uvicorn runtime.api:app --host 0.0.0.0 --port 8004 --reload From 4195a90c6f3b382d65a313afa20d8d022a134f13 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 21:27:00 +0000 Subject: [PATCH 09/35] . --- app/services/graph_service/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/app/services/graph_service/requirements.txt b/app/services/graph_service/requirements.txt index e53b211..0b17cf8 100644 --- a/app/services/graph_service/requirements.txt +++ b/app/services/graph_service/requirements.txt @@ -2,5 +2,4 @@ fastapi uvicorn[standard] pydantic aioredis -data-util requests \ No newline at end of file From b9d14b8480c515847beae2147fc16ae235e99360 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 21:27:29 +0000 Subject: [PATCH 10/35] s --- app/services/graph_service/graph_service.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py index 7de843c..b7352f1 100644 --- a/app/services/graph_service/graph_service.py +++ b/app/services/graph_service/graph_service.py @@ -4,7 +4,6 @@ from fastapi import FastAPI, HTTPException, Query from pydantic import BaseModel import aioredis -from data_util.logging import logger import requests from tree_mapping import extract_tree_mapping @@ -44,9 +43,9 @@ async def startup_event(): decode_responses=True, max_connections=10 ) - logger.info(f"✅ Connected to Redis at {REDIS_URL}") + print(f"✅ Connected to Redis at {REDIS_URL}") except Exception as e: - logger.warning(f"⚠️ Redis 연결 실패, 캐시 미사용: {e}") + print(f"⚠️ Redis 연결 실패, 캐시 미사용: {e}") redis = None @app.on_event("shutdown") From ccd8a3598178456e32ce5200ce4bdca9c5df8520 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 21:29:25 +0000 Subject: [PATCH 11/35] s --- app/services/papers_service/requirements.txt | 3 +-- app/services/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/app/services/papers_service/requirements.txt b/app/services/papers_service/requirements.txt index 4e88f3f..4174eb1 100644 --- a/app/services/papers_service/requirements.txt +++ b/app/services/papers_service/requirements.txt @@ -1,4 +1,3 @@ fastapi uvicorn[standard] -pydantic -random \ No newline at end of file +pydantic \ No newline at end of file diff --git a/app/services/requirements.txt b/app/services/requirements.txt index c9b6004..81a4930 100644 --- a/app/services/requirements.txt +++ b/app/services/requirements.txt @@ -1,3 +1,3 @@ fastapi uvicorn -pydantic +pydantic \ No newline at end of file From f07ba06becbe23c17db1e4bba385581a9b61d09a Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 21:40:32 +0000 Subject: [PATCH 12/35] s --- app/runtime/api.py | 2 +- app/runtime/requirements.txt | 5 ++++- app/services/graph_service/graph_service.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/app/runtime/api.py b/app/runtime/api.py index 355c838..884fcfa 100644 --- a/app/runtime/api.py +++ b/app/runtime/api.py @@ -32,7 +32,7 @@ class RecResponse(BaseModel): @app.get("/inference", response_model=RecResponse) def recommend( query: str = Query(..., description="검색 쿼리"), - top_k: int = Query(5, gt=1, le=10) # default 5 + top_k: int = Query(10, gt=1, le=10) # default 10 ): # 1) 쿼리 기준 top-k 클러스터 hits = search_clusters(query, top_k) diff --git a/app/runtime/requirements.txt b/app/runtime/requirements.txt index 087a22d..60b8699 100644 --- a/app/runtime/requirements.txt +++ b/app/runtime/requirements.txt @@ -4,4 +4,7 @@ torch sentence-transformers faiss-gpu networkx -tqdm \ No newline at end of file +tqdm +fastapi +uvicorn +pydantic \ No newline at end of file diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py index b7352f1..fab156e 100644 --- a/app/services/graph_service/graph_service.py +++ b/app/services/graph_service/graph_service.py @@ -11,7 +11,7 @@ app = FastAPI(title="Graph Service with AI Inference") # Redis 초기화용 글로벌 -REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379") +REDIS_URL = os.getenv("REDIS_URL", "redis://{redis_host}:6379") redis: Optional[aioredis.Redis] = None # 요청 모델 From af766359b73261a4440fbba1592abef02df20afd Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 21:42:59 +0000 Subject: [PATCH 13/35] s --- app/services/graph_service/graph_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py index fab156e..780bd4e 100644 --- a/app/services/graph_service/graph_service.py +++ b/app/services/graph_service/graph_service.py @@ -11,7 +11,7 @@ app = FastAPI(title="Graph Service with AI Inference") # Redis 초기화용 글로벌 -REDIS_URL = os.getenv("REDIS_URL", "redis://{redis_host}:6379") +REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379") redis: Optional[aioredis.Redis] = None # 요청 모델 From cd801703851db607ab38fe0403aafb55eef798f6 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 21:48:06 +0000 Subject: [PATCH 14/35] s --- app/services/graph_service/graph_service.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py index 780bd4e..52a3fe1 100644 --- a/app/services/graph_service/graph_service.py +++ b/app/services/graph_service/graph_service.py @@ -77,12 +77,16 @@ def fetch_keywords(query: str) -> list[str]: # AI 서버 호출 + 결과 캐싱 async def fetch_from_ai_and_cache(root: str, top1: int, top2: int): try: - response = requests.get("http://searchforest-ai:8004/inference", params={"query": root, "top_k": top1}) + #response = requests.get("http://searchforest-ai:8004/inference", params={"query": root, "top_k": top1}) + response = requests.get("http://sum-service:8004/inference", params={"query": root, "top_k": top1}) + # response = requests.get("http://localhost:8004/inference", params={"query": root, "top_k": top1}) response.raise_for_status() data = response.json() + tree_data = data["results"]["children"] + # 트리 구성 keyword_tree = { "id": root, From 47cdcf3d9c061cff7413a4827e2e4a1896e0330a Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 21:49:05 +0000 Subject: [PATCH 15/35] s --- app/services/graph_service/graph_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py index 52a3fe1..9c00fca 100644 --- a/app/services/graph_service/graph_service.py +++ b/app/services/graph_service/graph_service.py @@ -63,7 +63,7 @@ def make_cache_key( root: str, top1: int, top2: int) -> str: def fetch_keywords(query: str) -> list[str]: try: response = requests.get( - "http://searchforest-ai:8004/inference", + "http://sum-service:8004/inference", params={"query": query, "top_k": 5} ) response.raise_for_status() From be1c60426dffc3ba2c73888b3d7f2bc8282cde04 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 21:56:46 +0000 Subject: [PATCH 16/35] S --- app/services/graph_service/graph_service.py | 35 +++++++++------------ 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py index 9c00fca..710c61d 100644 --- a/app/services/graph_service/graph_service.py +++ b/app/services/graph_service/graph_service.py @@ -78,7 +78,7 @@ def fetch_keywords(query: str) -> list[str]: async def fetch_from_ai_and_cache(root: str, top1: int, top2: int): try: #response = requests.get("http://searchforest-ai:8004/inference", params={"query": root, "top_k": top1}) - response = requests.get("http://sum-service:8004/inference", params={"query": root, "top_k": top1}) + response = requests.get("http://52.78.34.56:8004/inference", params={"query": root, "top_k": top1}) # response = requests.get("http://localhost:8004/inference", params={"query": root, "top_k": top1}) @@ -87,27 +87,22 @@ async def fetch_from_ai_and_cache(root: str, top1: int, top2: int): tree_data = data["results"]["children"] - # 트리 구성 - keyword_tree = { - "id": root, - "value": 1.0, - "children": [] - } - kw2pids = {} - - for cluster in data["results"]["children"]: - cluster_kw = cluster["kw"] - subnodes = cluster.get("children", []) - child_node = { - "id": cluster_kw, - "value": cluster["sim"], - "children": [] + # 👉 트리 포맷 맞춰 변환 + mapping = {} + for node in tree_data: + lvl1_kw = node["id"] + mapping[lvl1_kw] = { + "value": node.get("sim", 0.8), + "children": node.get("children", []) } - for sub in subnodes: - child_node["children"].append({"id": sub["kw"], "value": 0.8, "children": []}) - kw2pids[sub["kw"]] = sub["pids"] - keyword_tree["children"].append(child_node) + keyword_tree = manual_tree_with_full_values(root, mapping) + + # pids 추출 + kw2pids = {} + for node in tree_data: + for child in node["children"]: + kw2pids[child["id"]] = child["pids"] cache_key = make_cache_key(root, top1, top2) if redis: From 17034e8baba5912e236fc9e0256435d393c590c9 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 22:19:57 +0000 Subject: [PATCH 17/35] s --- app/services/graph_service/graph_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py index 710c61d..ac4d1f9 100644 --- a/app/services/graph_service/graph_service.py +++ b/app/services/graph_service/graph_service.py @@ -78,7 +78,7 @@ def fetch_keywords(query: str) -> list[str]: async def fetch_from_ai_and_cache(root: str, top1: int, top2: int): try: #response = requests.get("http://searchforest-ai:8004/inference", params={"query": root, "top_k": top1}) - response = requests.get("http://52.78.34.56:8004/inference", params={"query": root, "top_k": top1}) + #response = requests.get("http://52.78.34.56:8004/inference", params={"query": root, "top_k": top1}) # response = requests.get("http://localhost:8004/inference", params={"query": root, "top_k": top1}) From b0068981f64f960539dc40453caf7c8470f122b3 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 22:33:51 +0000 Subject: [PATCH 18/35] s --- app/services/graph_service/graph_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py index ac4d1f9..1701297 100644 --- a/app/services/graph_service/graph_service.py +++ b/app/services/graph_service/graph_service.py @@ -63,7 +63,7 @@ def make_cache_key( root: str, top1: int, top2: int) -> str: def fetch_keywords(query: str) -> list[str]: try: response = requests.get( - "http://sum-service:8004/inference", + # "http://sum-service:8004/inference", params={"query": query, "top_k": 5} ) response.raise_for_status() From daa25ebeb72e95a6047061ee89d97f2b2c4f7eb1 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Thu, 12 Jun 2025 22:47:58 +0000 Subject: [PATCH 19/35] s --- app/services/graph_service/graph_service.py | 28 ++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py index 1701297..97c7a6c 100644 --- a/app/services/graph_service/graph_service.py +++ b/app/services/graph_service/graph_service.py @@ -59,20 +59,20 @@ def make_cache_key( root: str, top1: int, top2: int) -> str: return "graph:" + hashlib.sha256(key_str.encode()).hexdigest() -# AI 서버 호출 함수 -def fetch_keywords(query: str) -> list[str]: - try: - response = requests.get( - # "http://sum-service:8004/inference", - params={"query": query, "top_k": 5} - ) - response.raise_for_status() - data = response.json() - keywords = [child["kw"] for child in data["results"]["children"]] - return keywords - except Exception as e: - print(f"[ERROR] AI 서버 호출 실패: {e}") - return [] +# # AI 서버 호출 함수 +# def fetch_keywords(query: str) -> list[str]: +# try: +# response = requests.get( +# # "http://sum-service:8004/inference", +# params={"query": query, "top_k": 5} +# ) +# response.raise_for_status() +# data = response.json() +# keywords = [child["kw"] for child in data["results"]["children"]] +# return keywords +# except Exception as e: +# print(f"[ERROR] AI 서버 호출 실패: {e}") +# return [] # AI 서버 호출 + 결과 캐싱 async def fetch_from_ai_and_cache(root: str, top1: int, top2: int): From d212ddea9487d0bcb65f47e04906b7cb37b4ba1a Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Fri, 13 Jun 2025 07:53:57 +0900 Subject: [PATCH 20/35] =?UTF-8?q?=EB=A1=A4=EB=B0=B1=20=EA=B0=80=EB=B3=B4?= =?UTF-8?q?=EC=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitignore b/.gitignore index 831422b..04531e5 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,12 @@ share/python-wheels/ *.egg MANIFEST + +app/data/ +app/model +app/tools/ +app/services/papers_service/data + # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. From 2f5d44b72a96a5cd57f9593c941066c8488ef211 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Fri, 13 Jun 2025 01:40:03 +0000 Subject: [PATCH 21/35] ignore --- app/services/graph_service/graph_service.py | 146 ------------- app/services/graph_service/main.py | 36 +++ app/services/papers_service/main.py | 205 ++++++++++++++++++ app/services/papers_service/papers_service.py | 146 ------------- 4 files changed, 241 insertions(+), 292 deletions(-) delete mode 100644 app/services/graph_service/graph_service.py create mode 100644 app/services/graph_service/main.py create mode 100644 app/services/papers_service/main.py delete mode 100644 app/services/papers_service/papers_service.py diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py deleted file mode 100644 index 97c7a6c..0000000 --- a/app/services/graph_service/graph_service.py +++ /dev/null @@ -1,146 +0,0 @@ -import os -import json, hashlib -from typing import List, Dict, Optional, Tuple, Union -from fastapi import FastAPI, HTTPException, Query -from pydantic import BaseModel -import aioredis -import requests -from tree_mapping import extract_tree_mapping - -# ──────────────────────────────────────────────────────────────── -app = FastAPI(title="Graph Service with AI Inference") - -# Redis 초기화용 글로벌 -REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379") -redis: Optional[aioredis.Redis] = None - -# 요청 모델 -class GraphRequest(BaseModel): - root: str - top1: int = 5 - top2: int = 3 - -# 응답 트리 노드 구조 -class KeywordNode(BaseModel): - id: str - value: float - children: List["KeywordNode"] -KeywordNode.update_forward_refs() - -# 전체 응답 구조 -class GraphResponse(BaseModel): - keyword_tree: KeywordNode - -# Redis 연결 -@app.on_event("startup") -async def startup_event(): - global redis - # modern aioredis uses from_url - try: - redis = await aioredis.from_url( - REDIS_URL, - encoding="utf-8", - decode_responses=True, - max_connections=10 - ) - print(f"✅ Connected to Redis at {REDIS_URL}") - except Exception as e: - print(f"⚠️ Redis 연결 실패, 캐시 미사용: {e}") - redis = None - -@app.on_event("shutdown") -async def shutdown_event(): - await redis.close() - -# 캐시 키 생성 함수 -def make_cache_key( root: str, top1: int, top2: int) -> str: - # 파라미터 조합으로 고유 키 생성 - key_str = f"{root}|{top1}|{top2}" - return "graph:" + hashlib.sha256(key_str.encode()).hexdigest() - - -# # AI 서버 호출 함수 -# def fetch_keywords(query: str) -> list[str]: -# try: -# response = requests.get( -# # "http://sum-service:8004/inference", -# params={"query": query, "top_k": 5} -# ) -# response.raise_for_status() -# data = response.json() -# keywords = [child["kw"] for child in data["results"]["children"]] -# return keywords -# except Exception as e: -# print(f"[ERROR] AI 서버 호출 실패: {e}") -# return [] - -# AI 서버 호출 + 결과 캐싱 -async def fetch_from_ai_and_cache(root: str, top1: int, top2: int): - try: - #response = requests.get("http://searchforest-ai:8004/inference", params={"query": root, "top_k": top1}) - #response = requests.get("http://52.78.34.56:8004/inference", params={"query": root, "top_k": top1}) - - # response = requests.get("http://localhost:8004/inference", params={"query": root, "top_k": top1}) - - response.raise_for_status() - data = response.json() - - tree_data = data["results"]["children"] - - # 👉 트리 포맷 맞춰 변환 - mapping = {} - for node in tree_data: - lvl1_kw = node["id"] - mapping[lvl1_kw] = { - "value": node.get("sim", 0.8), - "children": node.get("children", []) - } - - keyword_tree = manual_tree_with_full_values(root, mapping) - - # pids 추출 - kw2pids = {} - for node in tree_data: - for child in node["children"]: - kw2pids[child["id"]] = child["pids"] - - cache_key = make_cache_key(root, top1, top2) - if redis: - await redis.set(cache_key, json.dumps({"tree": keyword_tree, "kw2pids": kw2pids}), ex=3600) - - return keyword_tree, kw2pids - - except Exception as e: - print(f"[ERROR] AI 호출 실패: {e}") - raise - -# /graph 엔드포인트 -@app.post("/graph", response_model=GraphResponse) -async def build_graph(req: GraphRequest): - - cache_key = make_cache_key(req.root, req.top1, req.top2) - if redis: - cached = await redis.get(cache_key) - if cached: - obj = json.loads(cached) - return {"keyword_tree": obj["tree"], "kw2pids": obj["kw2pids"]} - - tree = await fetch_from_ai_and_cache(req.root, req.top1, req.top2) - - root, mapping = extract_tree_mapping(original_json) - tree = manual_tree_with_full_values(root, mapping) - tree_parsed = manual_tree_with_full_values(tree) - - return {"keyword_tree": tree_parsed, "kw2pids": kw2pids} - - -# /kw2pids 엔드포인트 (핑퐁용) -@app.get("/kw2pids") -async def get_kw2pids(query: str = Query(...), top1: int = 5, top2: int = 3): - cache_key = make_cache_key(query, top1, top2) - if redis: - cached = await redis.get(cache_key) - if cached: - obj = json.loads(cached) - return obj["kw2pids"] - return {"message": "No cached kw2pids available."} diff --git a/app/services/graph_service/main.py b/app/services/graph_service/main.py new file mode 100644 index 0000000..b3baa70 --- /dev/null +++ b/app/services/graph_service/main.py @@ -0,0 +1,36 @@ +from fastapi import FastAPI +from pydantic import BaseModel +from typing import List, Optional + +# ① dummy_data 모듈에서 get_dummy_tree 함수 import +from dummy_data import get_dummy_tree_with_context_and_example +from conditional_dummy_tree import IMPORTANT_TREES + +app = FastAPI(title="Graph Service (Stub)") + +# --- 요청 모델 --- +class GraphRequest(BaseModel): + root: str + top1: int = 5 + top2: int = 3 + +# --- 키워드 노드 재귀 정의 --- +class KeywordNode(BaseModel): + id: str + value: float + example: Optional[str] + children: List["KeywordNode"] +KeywordNode.update_forward_refs() + +# --- 응답 모델 --- +class GraphResponse(BaseModel): + keyword_tree: KeywordNode + +# --- 엔드포인트: dummy_data 사용 --- +@app.post("/graph", response_model=GraphResponse) +def build_graph(req: GraphRequest): + if req.root in IMPORTANT_TREES: + tree = IMPORTANT_TREES[req.root] + else: + tree = get_dummy_tree_with_context_and_example(req.root, req.top1, req.top2) + return {"keyword_tree": tree} \ No newline at end of file diff --git a/app/services/papers_service/main.py b/app/services/papers_service/main.py new file mode 100644 index 0000000..f732cab --- /dev/null +++ b/app/services/papers_service/main.py @@ -0,0 +1,205 @@ +# papers_service/main.py +import random + +from fastapi import FastAPI, HTTPException, Query +from pydantic import BaseModel +from typing import List, Optional +import json +import os + +app = FastAPI(title="Papers Service (Stub)") + + +class Author(BaseModel): + name: str + + +class Citation(BaseModel): + paperId: str + title: Optional[str] + year: Optional[int] + + +class Reference(BaseModel): + paperId: str + title: Optional[str] + year: Optional[int] + + +# --- 논문 객체 정의 --- +class Paper(BaseModel): + paper_id: str + abstract: Optional[str] + title: Optional[str] + + url: Optional[str] + venue: Optional[str] + year: Optional[int] + + reference_count: Optional[int] + citation_count: Optional[int] + influentialCitationCount: Optional[int] + + fieldsOfStudy: Optional[List[str]] + tldr: Optional[str] + authors: List[Author] + + sim_score: float + + +# --- 응답 모델 --- +class PapersResponse(BaseModel): + total_results: int + max_display: int + page: int + page_size: int + papers: List[Paper] + +# 1) 전역 로딩 +BASE_DIR = os.path.join(os.path.dirname(__file__), "data") +PAPER_DATA_PATH = os.path.join(BASE_DIR, "inductive_test_checkpoint_collected.json") +GRAPH_KEYWORD_PATH = os.path.join(BASE_DIR, "kw2pids.json") +with open(PAPER_DATA_PATH, "r", encoding="utf-8") as f: + paper_db = json.load(f) + +# 예시: 이미 생성한 키워드 → 논문 ID 매핑 +with open(GRAPH_KEYWORD_PATH, "r", encoding="utf-8") as f: + kw2pids = json.load(f) + + +@app.get("/papers", response_model=PapersResponse) +def get_random_papers( + kw: str = Query(..., description="검색할 키워드"), + page: int = Query(1, ge=1), + page_size: int = Query(20, ge=1, le=100) +): + all_pids = list(paper_db.keys()) + sample_size = random.randint(20, 40) + sampled_pids = random.sample(all_pids, min(sample_size, len(all_pids))) + + total = len(sampled_pids) + start = (page - 1) * page_size + end = min(start + page_size, total) + sliced = sampled_pids[start:end] + + papers = [] + for pid in sliced: + entry = paper_db[pid] + papers.append(Paper( + paper_id=pid, + title=entry.get("title"), + abstract=entry.get("abstract"), + url=entry.get("url"), + venue=entry.get("venue"), + year=entry.get("year"), + reference_count=entry.get("referenceCount"), + citation_count=entry.get("citationCount"), + influentialCitationCount=entry.get("influentialCitationCount"), + fieldsOfStudy=entry.get("fieldsOfStudy"), + tldr=entry.get("tldr", {}).get("text") if entry.get("tldr") else None, + authors=[Author(name=a["name"]) for a in entry.get("authors", [])], + sim_score=random.uniform(0, 1) + )) + + return PapersResponse( + total_results=total, + max_display=len(sliced), + page=page, + page_size=page_size, + papers=papers + ) + + +def get_papers_by_keyword( + kw: str = Query(..., description="검색할 키워드"), + page: int = Query(1, ge=1), + page_size: int = Query(20, ge=1, le=100) +): + if kw not in kw2pids: + print(f"Keyword '{kw}' not found.") + all_pids = [ + "40108038", + "59572248", + "5799960", + "14188576", + "119242784" + ] + # raise HTTPException(status_code=404, detail=f"Keyword '{kw}' not found.") + else: + deduplicated_ids = [ + "13074624", + "14188576", + "14516333", + "14909482", + "15302646", + "162168808", + "198147940", + "28639198", + "40108038", + "41418788", + "51183683", + "52232173", + "53641451", + "55836730", + "56099032", + "5734610", + "5799960", + "59408549", + "59572248", + "786330", + "85459157", + "10682321", + "11501607", + "115113968", + "11534505", + "117899249", + "118489086", + "118587315", + "118751294", + "118816857", + "118849608", + "119111722", + "119144587", + "119209851", + "119241784", + "119341051", + "119471991", + "119472164" + ] + # all_pids = kw2pids[kw] + all_pids = random.sample(deduplicated_ids, 20) + + # 페이징 + total = len(all_pids) + start = (page - 1) * page_size + end = min(start + page_size, total) + sliced = all_pids[start:end] + + papers = [] + for pid in sliced: + if pid not in paper_db: + continue + entry = paper_db[pid] + papers.append(Paper( + paper_id=pid, + title=entry.get("title"), + abstract=entry.get("abstract"), + url=entry.get("url"), + venue=entry.get("venue"), + year=entry.get("year"), + reference_count=entry.get("referenceCount"), + citation_count=entry.get("citationCount"), + influentialCitationCount=entry.get("influentialCitationCount"), + fieldsOfStudy=entry.get("fieldsOfStudy"), + tldr=entry.get("tldr", {}).get("text") if entry.get("tldr") else None, + authors=[Author(name=a["name"]) for a in entry.get("authors", [])], + sim_score=random.uniform(0, 1) # Stub score + )) + + return PapersResponse( + total_results=total, + max_display=len(sliced), + page=page, + page_size=page_size, + papers=papers + ) diff --git a/app/services/papers_service/papers_service.py b/app/services/papers_service/papers_service.py deleted file mode 100644 index 1f02272..0000000 --- a/app/services/papers_service/papers_service.py +++ /dev/null @@ -1,146 +0,0 @@ -# papers_service/main.py -import random - -from fastapi import FastAPI, HTTPException, Query -from pydantic import BaseModel -from typing import List, Optional -import json -import os - -app = FastAPI(title="Papers Service") - - -class Author(BaseModel): - name: str - - -class Citation(BaseModel): - paperId: str - title: Optional[str] - year: Optional[int] - - -class Reference(BaseModel): - paperId: str - title: Optional[str] - year: Optional[int] - - -# --- 논문 객체 정의 --- -class Paper(BaseModel): - paper_id: str - abstract: Optional[str] - title: Optional[str] - - url: Optional[str] - venue: Optional[str] - year: Optional[int] - - reference_count: Optional[int] - citation_count: Optional[int] - influentialCitationCount: Optional[int] - - fieldsOfStudy: Optional[List[str]] - tldr: Optional[str] - authors: List[Author] - - sim_score: float - - -# --- 응답 모델 --- -class PapersResponse(BaseModel): - total_results: int - max_display: int - page: int - page_size: int - papers: List[Paper] - - - -def make_cache_key(root, top1, top2): - key_str = f"{root}|{top1}|{top2}" - return "keyword_tree:graph:" + hashlib.sha256(key_str.encode()).hexdigest() - - -def fetch_keyword_tree_from_graph_service(query: str) -> dict: - response = requests.get("http://graph-service:8002/keyword_tree", params={"query": query}) - return response.json() - -@app.get("/papers", response_model=PapersResponse) -def get_papers_by_keyword( - kw: str = Query(..., description="클릭한 키워드"), - page: int = Query(1, ge=1), - page_size: int = Query(20, ge=1, le=100) -): - - response = requests.post("http://graph-service:8002/graph", json={ - "root": kw, - "top1": 10, - "top2": 3 - }) - - - keyword_tree = response.json().get("keyword_tree") - - response = requests.get("http://graph-service:8002/keyword_tree", params={"query": "AI"}) - kw2pids = response.json() - - - - keyword_tree_json = redis.get(f"keyword_tree:{cache_key}") - keyword_tree = json.loads(keyword_tree_json) - - key = make_cache_key("pid123", "AI", 5, 3) - cached = redis.get(key) - if cached: - kw2pids = json.loads(cached) - - - if kw not in kw2pids: - raise HTTPException(status_code=404, detail=f"Keyword '{kw}' not found.") - else: - all_pids = kw2pids[kw] - - # 페이징 - total = len(all_pids) - start = (page - 1) * page_size - end = min(start + page_size, total) - sliced = all_pids[start:end] - - papers = [] - for pid in sliced: - if pid not in paper_db: - continue - entry = paper_db[pid] - papers.append(Paper( - paper_id=pid, - title=entry.get("title"), - abstract=entry.get("abstract"), - url=entry.get("url"), - venue=entry.get("venue"), - year=entry.get("year"), - reference_count=entry.get("referenceCount"), - citation_count=entry.get("citationCount"), - influentialCitationCount=entry.get("influentialCitationCount"), - fieldsOfStudy=entry.get("fieldsOfStudy"), - tldr=entry.get("tldr", {}).get("text") if entry.get("tldr") else None, - authors=[Author(name=a["name"]) for a in entry.get("authors", [])], - sim_score=random.uniform(0, 1) # Stub score - )) - - return PapersResponse( - total_results=total, - max_display=len(sliced), - page=page, - page_size=page_size, - papers=papers - ) - -@app.get("/keyword_tree") -async def get_keyword_tree(query: str = Query(...)): - cache_key = f"kw2pids:{query}" - if redis: - cached = await redis.get(cache_key) - if cached: - return json.loads(cached) - return {"message": "No cached keyword->pids mapping found."} From 415da91151b03fc2a16093531c2b62855c4cbba2 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Fri, 13 Jun 2025 01:43:22 +0000 Subject: [PATCH 22/35] im back --- .gitignore | 4 + app/services/graph_service/graph_service.py | 146 ++++++++++++++++++++ app/services/graph_service/main.py | 36 ----- 3 files changed, 150 insertions(+), 36 deletions(-) create mode 100644 app/services/graph_service/graph_service.py delete mode 100644 app/services/graph_service/main.py diff --git a/.gitignore b/.gitignore index e9eb0a6..21db848 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,10 @@ app/scripts/logs app/scripts/pyrouge_root app/scripts/running_logs +app/services/old + + + #etc app/etc/* diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py new file mode 100644 index 0000000..6c46043 --- /dev/null +++ b/app/services/graph_service/graph_service.py @@ -0,0 +1,146 @@ +import os +import json, hashlib +from typing import List, Dict, Optional, Tuple, Union +from fastapi import FastAPI, HTTPException, Query +from pydantic import BaseModel +import aioredis +import requests +from tree_mapping import extract_tree_mapping + +# ──────────────────────────────────────────────────────────────── +app = FastAPI(title="Graph Service with AI Inference") + +# Redis 초기화용 글로벌 +REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379") +redis: Optional[aioredis.Redis] = None + +# 요청 모델 +class GraphRequest(BaseModel): + root: str + top1: int = 5 + top2: int = 3 + +# 응답 트리 노드 구조 +class KeywordNode(BaseModel): + id: str + value: float + children: List["KeywordNode"] +KeywordNode.update_forward_refs() + +# 전체 응답 구조 +class GraphResponse(BaseModel): + keyword_tree: KeywordNode + +# Redis 연결 +@app.on_event("startup") +async def startup_event(): + global redis + # modern aioredis uses from_url + try: + redis = await aioredis.from_url( + REDIS_URL, + encoding="utf-8", + decode_responses=True, + max_connections=10 + ) + print(f"✅ Connected to Redis at {REDIS_URL}") + except Exception as e: + print(f"⚠️ Redis 연결 실패, 캐시 미사용: {e}") + redis = None + +@app.on_event("shutdown") +async def shutdown_event(): + await redis.close() + +# 캐시 키 생성 함수 +def make_cache_key( root: str, top1: int, top2: int) -> str: + # 파라미터 조합으로 고유 키 생성 + key_str = f"{root}|{top1}|{top2}" + return "graph:" + hashlib.sha256(key_str.encode()).hexdigest() + + +# AI 서버 호출 함수 +def fetch_keywords(query: str) -> list[str]: + try: + response = requests.get( + "https://2f7a-165-194-104-91.ngrok-free.app/inference" + params={"query": query, "top_k": 5} + ) + response.raise_for_status() + data = response.json() + keywords = [child["kw"] for child in data["results"]["children"]] + return keywords + except Exception as e: + print(f"[ERROR] AI 서버 호출 실패: {e}") + return [] + +# AI 서버 호출 + 결과 캐싱 +async def fetch_from_ai_and_cache(root: str, top1: int, top2: int): + try: + #response = requests.get("http://searchforest-ai:8004/inference", params={"query": root, "top_k": top1}) + response = requests.get("https://2f7a-165-194-104-91.ngrok-free.app/inference", params={"query": root, "top_k": top1}) + + # response = requests.get("http://localhost:8004/inference", params={"query": root, "top_k": top1}) + + response.raise_for_status() + data = response.json() + + tree_data = data["results"]["children"] + + # 👉 트리 포맷 맞춰 변환 + mapping = {} + for node in tree_data: + lvl1_kw = node["id"] + mapping[lvl1_kw] = { + "value": node.get("sim", 0.8), + "children": node.get("children", []) + } + + keyword_tree = manual_tree_with_full_values(root, mapping) + + # pids 추출 + kw2pids = {} + for node in tree_data: + for child in node["children"]: + kw2pids[child["id"]] = child["pids"] + + cache_key = make_cache_key(root, top1, top2) + if redis: + await redis.set(cache_key, json.dumps({"tree": keyword_tree, "kw2pids": kw2pids}), ex=3600) + + return keyword_tree, kw2pids + + except Exception as e: + print(f"[ERROR] AI 호출 실패: {e}") + raise + +# /graph 엔드포인트 +@app.post("/graph", response_model=GraphResponse) +async def build_graph(req: GraphRequest): + + cache_key = make_cache_key(req.root, req.top1, req.top2) + if redis: + cached = await redis.get(cache_key) + if cached: + obj = json.loads(cached) + return {"keyword_tree": obj["tree"], "kw2pids": obj["kw2pids"]} + + tree = await fetch_from_ai_and_cache(req.root, req.top1, req.top2) + + root, mapping = extract_tree_mapping(original_json) + tree = manual_tree_with_full_values(root, mapping) + tree_parsed = manual_tree_with_full_values(tree) + + return {"keyword_tree": tree_parsed, "kw2pids": kw2pids} + + +# /kw2pids 엔드포인트 (핑퐁용) +@app.get("/kw2pids") +async def get_kw2pids(query: str = Query(...), top1: int = 5, top2: int = 3): + cache_key = make_cache_key(query, top1, top2) + if redis: + cached = await redis.get(cache_key) + if cached: + obj = json.loads(cached) + return obj["kw2pids"] + return {"message": "No cached kw2pids available."} diff --git a/app/services/graph_service/main.py b/app/services/graph_service/main.py deleted file mode 100644 index b3baa70..0000000 --- a/app/services/graph_service/main.py +++ /dev/null @@ -1,36 +0,0 @@ -from fastapi import FastAPI -from pydantic import BaseModel -from typing import List, Optional - -# ① dummy_data 모듈에서 get_dummy_tree 함수 import -from dummy_data import get_dummy_tree_with_context_and_example -from conditional_dummy_tree import IMPORTANT_TREES - -app = FastAPI(title="Graph Service (Stub)") - -# --- 요청 모델 --- -class GraphRequest(BaseModel): - root: str - top1: int = 5 - top2: int = 3 - -# --- 키워드 노드 재귀 정의 --- -class KeywordNode(BaseModel): - id: str - value: float - example: Optional[str] - children: List["KeywordNode"] -KeywordNode.update_forward_refs() - -# --- 응답 모델 --- -class GraphResponse(BaseModel): - keyword_tree: KeywordNode - -# --- 엔드포인트: dummy_data 사용 --- -@app.post("/graph", response_model=GraphResponse) -def build_graph(req: GraphRequest): - if req.root in IMPORTANT_TREES: - tree = IMPORTANT_TREES[req.root] - else: - tree = get_dummy_tree_with_context_and_example(req.root, req.top1, req.top2) - return {"keyword_tree": tree} \ No newline at end of file From 4525bbd09d37c141596ad389a59aadd3e6fd7f81 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Fri, 13 Jun 2025 01:46:35 +0000 Subject: [PATCH 23/35] s --- app/services/papers_service/{main.py => papers_service.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename app/services/papers_service/{main.py => papers_service.py} (100%) diff --git a/app/services/papers_service/main.py b/app/services/papers_service/papers_service.py similarity index 100% rename from app/services/papers_service/main.py rename to app/services/papers_service/papers_service.py From 43032d7b7a11da5fa237d2fc9e52ce224f155514 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Fri, 13 Jun 2025 03:03:06 +0000 Subject: [PATCH 24/35] feat: safe parfe --- app/services/get_meta.py | 17 ++ app/services/graph_service/dummy_data.py | 115 ---------- app/services/graph_service/graph_service.py | 149 +++++-------- .../graph_service/json_to_tree_and_kw2pid.py | 108 ++++++++++ app/services/graph_service/old/dummy_data.py | 115 ---------- app/services/graph_service/test.json | 1 + app/services/main.py | 21 +- app/services/papers_service/papers_service.py | 202 +++++------------- 8 files changed, 246 insertions(+), 482 deletions(-) create mode 100644 app/services/get_meta.py delete mode 100644 app/services/graph_service/dummy_data.py create mode 100644 app/services/graph_service/json_to_tree_and_kw2pid.py delete mode 100644 app/services/graph_service/old/dummy_data.py create mode 100644 app/services/graph_service/test.json diff --git a/app/services/get_meta.py b/app/services/get_meta.py new file mode 100644 index 0000000..5c8f2e1 --- /dev/null +++ b/app/services/get_meta.py @@ -0,0 +1,17 @@ +import aiohttp, asyncio + +S2_URL = "https://api.semanticscholar.org/graph/v1/paper/{}" +FIELDS = "title,abstract,year,venue,referenceCount,citationCount," \ + "influentialCitationCount,fieldsOfStudy,authors,tldr" + +async def fetch_paper(session, pid): + url = S2_URL.format(pid) + params = {"fields": FIELDS} + async with session.get(url, params=params, timeout=10) as r: + r.raise_for_status() + return await r.json() + +async def fetch_many(pids): + async with aiohttp.ClientSession() as session: + tasks = [fetch_paper(session, pid) for pid in pids] + return await asyncio.gather(*tasks) \ No newline at end of file diff --git a/app/services/graph_service/dummy_data.py b/app/services/graph_service/dummy_data.py deleted file mode 100644 index 478db51..0000000 --- a/app/services/graph_service/dummy_data.py +++ /dev/null @@ -1,115 +0,0 @@ -def get_dummy_tree(root: str, top1: int = 5, top2: int = 3): - """ - root 키워드 + top1, top2 파라미터를 반영한 더미 radial-tree 반환 - depth1: top1 개수, depth2: top2 개수로 구성됩니다. - """ - children = [] - - # depth1: top1개의 자식 노드 생성 - for i in range(top1): - # 라벨은 A, B, C, ... 또는 번호로 생성 - label = chr(ord('A') + i) if i < 26 else str(i+1) - # base_sim: 0.9에서 일정 간격으로 감소 - base_sim = round(1.0 - (i + 1) * (0.1), 4) - - # depth2: 각 depth1 노드에 top2 개수만큼 자식 생성 - grandchildren = [] - for j in range(top2): - child_label = f"{root}-{label}-{j+1}" - # sim value: base_sim에서 0.05씩 감소 - child_sim = round(base_sim - (j + 1) * 0.05, 4) - grandchildren.append({ - "id": child_label, - "value": child_sim, - "children": [] - }) - - children.append({ - "id": f"{root}-{label}", - "value": base_sim, - "children": grandchildren - }) - - # 루트 노드 반환 - return { - "id": root, - "value": 1.0, - "children": children - } - -# --- 기본 더미 트리 생성기 (context 포함) --- -def get_dummy_tree_with_context(root: str, top1: int = 5, top2: int = 3): - children = [] - for i in range(top1): - label = chr(ord('A') + i) if i < 26 else str(i+1) - base_sim = round(1.0 - (i + 1) * 0.1, 4) - grandchildren = [] - for j in range(top2): - child_label = f"{label}-{j+1}" - full_context = f"{root}-{label}-{j+1}" - child_sim = round(base_sim - (j + 1) * 0.05, 4) - grandchildren.append({ - "id": child_label, - "context": full_context, - "value": child_sim, - "children": [] - }) - children.append({ - "id": label, - "context": f"{root}-{label}", - "value": base_sim, - "children": grandchildren - }) - return { - "id": root, - "context": root, - "value": 1.0, - "children": children - } - -def get_dummy_tree_with_context_and_example(root: str, top1: int = 5, top2: int = 3): - """ - root 키워드를 중심으로 top1개의 1-depth와 각 1-depth마다 top2개의 2-depth를 생성한 더미 트리를 반환. - 각 노드에는 context, value, example 필드가 포함됨. - """ - children = [] - - for i in range(top1): - label = chr(ord('A') + i) if i < 26 else str(i + 1) - base_sim = round(1.0 - (i + 1) * 0.1, 4) - lvl1_id = label - lvl1_context = f"{root}-{label}" - lvl1_example = f"{root} 분야의 하위 주제 {label}에 대한 간단한 설명입니다." - - grandchildren = [] - for j in range(top2): - lvl2_id = f"{label}-{j + 1}" - lvl2_context = f"{root}-{label}-{j + 1}" - lvl2_value = round(base_sim - (j + 1) * 0.05, 4) - lvl2_example = f"{label} 세부 주제 {j + 1}에 대한 예시 설명입니다." - - grandchildren.append({ - "id": lvl2_id, - "context": lvl2_context, - "value": lvl2_value, - "example": lvl2_example, - "children": [] - }) - - children.append({ - "id": lvl1_id, - "context": lvl1_context, - "value": base_sim, - "example": lvl1_example, - "children": grandchildren - }) - - return { - "id": root, - "context": root, - "value": 1.0, - "example": f"{root}라는 주제를 중심으로 확장된 키워드 구조입니다.", - "children": children - } - - diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py index 6c46043..52b2d2e 100644 --- a/app/services/graph_service/graph_service.py +++ b/app/services/graph_service/graph_service.py @@ -1,47 +1,39 @@ -import os -import json, hashlib -from typing import List, Dict, Optional, Tuple, Union -from fastapi import FastAPI, HTTPException, Query +import os, json, hashlib +from typing import List, Dict, Optional +from fastapi import FastAPI, Query from pydantic import BaseModel -import aioredis -import requests -from tree_mapping import extract_tree_mapping +import aioredis, requests -# ──────────────────────────────────────────────────────────────── -app = FastAPI(title="Graph Service with AI Inference") +from json_to_tree_and_kw2pid import manual_tree_with_full_values -# Redis 초기화용 글로벌 +# ───────────────────────────── +app = FastAPI(title="Graph Service with AI Inference") REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379") redis: Optional[aioredis.Redis] = None -# 요청 모델 +# ───────── Models ──────────── class GraphRequest(BaseModel): root: str top1: int = 5 top2: int = 3 -# 응답 트리 노드 구조 class KeywordNode(BaseModel): id: str value: float children: List["KeywordNode"] KeywordNode.update_forward_refs() -# 전체 응답 구조 -class GraphResponse(BaseModel): +class GraphResponse(BaseModel): # ✨ keyword_tree: KeywordNode - -# Redis 연결 + kw2pids: Dict[str, List[str]] + +# ───────── Redis Events ────── @app.on_event("startup") async def startup_event(): global redis - # modern aioredis uses from_url try: redis = await aioredis.from_url( - REDIS_URL, - encoding="utf-8", - decode_responses=True, - max_connections=10 + REDIS_URL, encoding="utf-8", decode_responses=True, max_connections=10 ) print(f"✅ Connected to Redis at {REDIS_URL}") except Exception as e: @@ -50,97 +42,52 @@ async def startup_event(): @app.on_event("shutdown") async def shutdown_event(): - await redis.close() + if redis: # ✨ + await redis.close() -# 캐시 키 생성 함수 -def make_cache_key( root: str, top1: int, top2: int) -> str: - # 파라미터 조합으로 고유 키 생성 - key_str = f"{root}|{top1}|{top2}" - return "graph:" + hashlib.sha256(key_str.encode()).hexdigest() - - -# AI 서버 호출 함수 -def fetch_keywords(query: str) -> list[str]: - try: - response = requests.get( - "https://2f7a-165-194-104-91.ngrok-free.app/inference" - params={"query": query, "top_k": 5} - ) - response.raise_for_status() - data = response.json() - keywords = [child["kw"] for child in data["results"]["children"]] - return keywords - except Exception as e: - print(f"[ERROR] AI 서버 호출 실패: {e}") - return [] +# ───────── Utils ──────────── +def make_cache_key(root: str, top1: int, top2: int) -> str: + return "graph:" + hashlib.sha256(f"{root}|{top1}|{top2}".encode()).hexdigest() -# AI 서버 호출 + 결과 캐싱 async def fetch_from_ai_and_cache(root: str, top1: int, top2: int): - try: - #response = requests.get("http://searchforest-ai:8004/inference", params={"query": root, "top_k": top1}) - response = requests.get("https://2f7a-165-194-104-91.ngrok-free.app/inference", params={"query": root, "top_k": top1}) - - # response = requests.get("http://localhost:8004/inference", params={"query": root, "top_k": top1}) - - response.raise_for_status() - data = response.json() - - tree_data = data["results"]["children"] + url = "https://2f7a-165-194-104-91.ngrok-free.app/inference" + data = requests.get(url, params={"query": root, "top_k": top1}).json() - # 👉 트리 포맷 맞춰 변환 - mapping = {} - for node in tree_data: - lvl1_kw = node["id"] - mapping[lvl1_kw] = { - "value": node.get("sim", 0.8), - "children": node.get("children", []) - } + mapping = { + n["id"]: {"value": n.get("sim", 0.8), "children": n.get("children", [])} + for n in data["results"]["children"] + } + keyword_tree = manual_tree_with_full_values(root, mapping) - keyword_tree = manual_tree_with_full_values(root, mapping) + kw2pids = { + child["id"]: child["pids"] + for n in data["results"]["children"] + for child in n["children"] + } - # pids 추출 - kw2pids = {} - for node in tree_data: - for child in node["children"]: - kw2pids[child["id"]] = child["pids"] - - cache_key = make_cache_key(root, top1, top2) - if redis: - await redis.set(cache_key, json.dumps({"tree": keyword_tree, "kw2pids": kw2pids}), ex=3600) - - return keyword_tree, kw2pids - - except Exception as e: - print(f"[ERROR] AI 호출 실패: {e}") - raise + if redis: + await redis.set( + make_cache_key(root, top1, top2), + json.dumps({"tree": keyword_tree, "kw2pids": kw2pids}), + ex=3600, + ) + return keyword_tree, kw2pids -# /graph 엔드포인트 +# ───────── API ──────────── @app.post("/graph", response_model=GraphResponse) async def build_graph(req: GraphRequest): - cache_key = make_cache_key(req.root, req.top1, req.top2) - if redis: - cached = await redis.get(cache_key) - if cached: - obj = json.loads(cached) - return {"keyword_tree": obj["tree"], "kw2pids": obj["kw2pids"]} - - tree = await fetch_from_ai_and_cache(req.root, req.top1, req.top2) - - root, mapping = extract_tree_mapping(original_json) - tree = manual_tree_with_full_values(root, mapping) - tree_parsed = manual_tree_with_full_values(tree) + if redis and (cached := await redis.get(cache_key)): + obj = json.loads(cached) + return obj # FastAPI가 모델로 자동 직렬화 - return {"keyword_tree": tree_parsed, "kw2pids": kw2pids} + keyword_tree, kw2pids = await fetch_from_ai_and_cache( + req.root, req.top1, req.top2 + ) + return {"keyword_tree": keyword_tree, "kw2pids": kw2pids} # ✨ - -# /kw2pids 엔드포인트 (핑퐁용) @app.get("/kw2pids") async def get_kw2pids(query: str = Query(...), top1: int = 5, top2: int = 3): - cache_key = make_cache_key(query, top1, top2) - if redis: - cached = await redis.get(cache_key) - if cached: - obj = json.loads(cached) - return obj["kw2pids"] - return {"message": "No cached kw2pids available."} + if redis and (cached := await redis.get(make_cache_key(query, top1, top2))): + return json.loads(cached)["kw2pids"] + return {"message": "No cached kw2pids available."} \ No newline at end of file diff --git a/app/services/graph_service/json_to_tree_and_kw2pid.py b/app/services/graph_service/json_to_tree_and_kw2pid.py new file mode 100644 index 0000000..5c22db8 --- /dev/null +++ b/app/services/graph_service/json_to_tree_and_kw2pid.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +""" +json_to_tree_and_kw2pid.py + +– 입력: ① your_raw_json : 처음 보내주신 {"results": …} 구조 +– 출력: ② tree_dict : manual_tree_with_full_values() 로 만든 2-depth 그래프 + ③ kw2pid_dict : {키워드(str): [pid, …]} 형태 +""" +import json +from typing import Dict, List, Union, Tuple + +# ---------- 타입 & util ---------- # +TreeMapping = Dict[str, Dict[str, Union[float, List[Dict[str, Union[str, float]]]]]] + +def manual_tree_with_full_values(root: str, mapping: TreeMapping): + children = [] + for lvl1_label, data in mapping.items(): + lvl1_value = data["value"] + lvl2_items = data["children"] + + grandchildren = [] + for lvl2 in lvl2_items: + lvl2_label = lvl2["id"] + lvl2_value = lvl2["value"] + + grandchildren.append({ + "id": lvl2_label, + "context": f"{root}-{lvl1_label}-{lvl2_label}", + "value": lvl2_value, + "children": [] + }) + + children.append({ + "id": lvl1_label, + "context": f"{root}-{lvl1_label}", + "value": lvl1_value, + "children": grandchildren + }) + + return { + "id": root, + "context": root, + "value": 1.0, + "children": children + } + +# ---------- 파싱 핵심 ---------- # +def split_json( + raw_json: str +) -> Tuple[dict, dict]: + """ + 1) 2-depth 트리(dict) + 2) kw2pid 매핑(dict) 반환 + """ + data = json.loads(raw_json) + res = data["results"] + + root = res["root"] + lvl1_nodes = res["children"] + + # 1-depth → 2-depth 변환용 임시 mapping + mapping: TreeMapping = {} + # kw2pid 누적 + kw2pid: Dict[str, List[str]] = {} + + for lvl1 in lvl1_nodes: + lvl1_label = lvl1["id"] + lvl1_value = lvl1["value"] + mapping[lvl1_label] = {"value": lvl1_value, "children": []} + + # 이 예시 JSON에선 pids 가 2-depth 키워드마다 달려있음 + for lvl2 in lvl1["children"]: + lvl2_label = lvl2["id"] + lvl2_value = lvl2["value"] + mapping[lvl1_label]["children"].append( + {"id": lvl2_label, "value": lvl2_value} + ) + + # kw2pid 추가 + if "pids" in lvl2 and lvl2["pids"]: + kw2pid[lvl2_label] = lvl2["pids"] + + # (선택) 1-depth 키워드에도 동일 pids 부여하고 싶다면 주석 해제 + # if mapping[lvl1_label]["children"]: + # kw2pid[lvl1_label] = mapping[lvl1_label]["children"][0]["pids"] + + tree_dict = manual_tree_with_full_values(root, mapping) + return tree_dict, kw2pid + +# ---------- 사용 예시 ---------- # +if __name__ == "__main__": + with open("first_result.json", "r", encoding="utf-8") as f: + raw = f.read() + + tree, kw2pid = split_json(raw) + + # 필요에 따라 파일로 저장 + with open("tree_2depth.json", "w", encoding="utf-8") as f: + json.dump(tree, f, ensure_ascii=False, indent=2) + + with open("kw2pid.json", "w", encoding="utf-8") as f: + json.dump(kw2pid, f, ensure_ascii=False, indent=2) + + # 콘솔 확인 + print("=== 2-depth tree ===") + print(json.dumps(tree, ensure_ascii=False, indent=2)[:800], "...\n") + print("=== kw2pid ===") + print(json.dumps(kw2pid, ensure_ascii=False, indent=2)[:800], "...") \ No newline at end of file diff --git a/app/services/graph_service/old/dummy_data.py b/app/services/graph_service/old/dummy_data.py deleted file mode 100644 index 478db51..0000000 --- a/app/services/graph_service/old/dummy_data.py +++ /dev/null @@ -1,115 +0,0 @@ -def get_dummy_tree(root: str, top1: int = 5, top2: int = 3): - """ - root 키워드 + top1, top2 파라미터를 반영한 더미 radial-tree 반환 - depth1: top1 개수, depth2: top2 개수로 구성됩니다. - """ - children = [] - - # depth1: top1개의 자식 노드 생성 - for i in range(top1): - # 라벨은 A, B, C, ... 또는 번호로 생성 - label = chr(ord('A') + i) if i < 26 else str(i+1) - # base_sim: 0.9에서 일정 간격으로 감소 - base_sim = round(1.0 - (i + 1) * (0.1), 4) - - # depth2: 각 depth1 노드에 top2 개수만큼 자식 생성 - grandchildren = [] - for j in range(top2): - child_label = f"{root}-{label}-{j+1}" - # sim value: base_sim에서 0.05씩 감소 - child_sim = round(base_sim - (j + 1) * 0.05, 4) - grandchildren.append({ - "id": child_label, - "value": child_sim, - "children": [] - }) - - children.append({ - "id": f"{root}-{label}", - "value": base_sim, - "children": grandchildren - }) - - # 루트 노드 반환 - return { - "id": root, - "value": 1.0, - "children": children - } - -# --- 기본 더미 트리 생성기 (context 포함) --- -def get_dummy_tree_with_context(root: str, top1: int = 5, top2: int = 3): - children = [] - for i in range(top1): - label = chr(ord('A') + i) if i < 26 else str(i+1) - base_sim = round(1.0 - (i + 1) * 0.1, 4) - grandchildren = [] - for j in range(top2): - child_label = f"{label}-{j+1}" - full_context = f"{root}-{label}-{j+1}" - child_sim = round(base_sim - (j + 1) * 0.05, 4) - grandchildren.append({ - "id": child_label, - "context": full_context, - "value": child_sim, - "children": [] - }) - children.append({ - "id": label, - "context": f"{root}-{label}", - "value": base_sim, - "children": grandchildren - }) - return { - "id": root, - "context": root, - "value": 1.0, - "children": children - } - -def get_dummy_tree_with_context_and_example(root: str, top1: int = 5, top2: int = 3): - """ - root 키워드를 중심으로 top1개의 1-depth와 각 1-depth마다 top2개의 2-depth를 생성한 더미 트리를 반환. - 각 노드에는 context, value, example 필드가 포함됨. - """ - children = [] - - for i in range(top1): - label = chr(ord('A') + i) if i < 26 else str(i + 1) - base_sim = round(1.0 - (i + 1) * 0.1, 4) - lvl1_id = label - lvl1_context = f"{root}-{label}" - lvl1_example = f"{root} 분야의 하위 주제 {label}에 대한 간단한 설명입니다." - - grandchildren = [] - for j in range(top2): - lvl2_id = f"{label}-{j + 1}" - lvl2_context = f"{root}-{label}-{j + 1}" - lvl2_value = round(base_sim - (j + 1) * 0.05, 4) - lvl2_example = f"{label} 세부 주제 {j + 1}에 대한 예시 설명입니다." - - grandchildren.append({ - "id": lvl2_id, - "context": lvl2_context, - "value": lvl2_value, - "example": lvl2_example, - "children": [] - }) - - children.append({ - "id": lvl1_id, - "context": lvl1_context, - "value": base_sim, - "example": lvl1_example, - "children": grandchildren - }) - - return { - "id": root, - "context": root, - "value": 1.0, - "example": f"{root}라는 주제를 중심으로 확장된 키워드 구조입니다.", - "children": children - } - - diff --git a/app/services/graph_service/test.json b/app/services/graph_service/test.json new file mode 100644 index 0000000..3197678 --- /dev/null +++ b/app/services/graph_service/test.json @@ -0,0 +1 @@ +{"results":{"root":"test","children":[{"id":"dark matter density","value":1.0,"children":[{"id":"dark matter density","value":0.8,"pids":["59396448","119215569","119234393","118493497","118683283","41660913","118557040","56387919","17578958","118365709","116981343","118607320","73631352","118865190","119393943","14073020","119414536","59065822","15089339","202740","13418816","119111724","119298498","15832449","119241489","14563896","119187336","66122070","30687560","119244797","53009603","119377295","195750878","119442219","118884237","52076653","55808663","119269304"]},{"id":"such dark matter","value":0.1861,"pids":["59396448","119215569","119234393","118493497","118683283","41660913","118557040","56387919","17578958","118365709","116981343","118607320","73631352","118865190","119393943","14073020","119414536","59065822","15089339","202740","13418816","119111724","119298498","15832449","119241489","14563896","119187336","66122070","30687560","119244797","53009603","119377295","195750878","119442219","118884237","52076653","55808663","119269304"]},{"id":"dark matter equation","value":-0.3604,"pids":["59396448","119215569","119234393","118493497","118683283","41660913","118557040","56387919","17578958","118365709","116981343","118607320","73631352","118865190","119393943","14073020","119414536","59065822","15089339","202740","13418816","119111724","119298498","15832449","119241489","14563896","119187336","66122070","30687560","119244797","53009603","119377295","195750878","119442219","118884237","52076653","55808663","119269304"]}],"sim":0.517},{"id":"functions and","value":1.0,"children":[{"id":"functions and","value":0.8,"pids":["119139045","118454356","3137686","119334046","36196479","119303391","17620745"]},{"id":"functions","value":0.193,"pids":["119139045","118454356","3137686","119334046","36196479","119303391","17620745"]},{"id":"riemann hypothesis","value":-0.2895,"pids":["119139045","118454356","3137686","119334046","36196479","119303391","17620745"]}],"sim":0.5162},{"id":"superstring amplitudes in","value":1.0,"children":[{"id":"superstring amplitudes in","value":0.8,"pids":["119190767","15809595","119637242","115157387","118502290","11003463","15484108","15078341","14488445","54747167","115176345","15123220","15728255","119317525","119217245","119620386","119156821","116737595"]},{"id":"superstring amplitudes","value":0.198,"pids":["119190767","15809595","119637242","115157387","118502290","11003463","15484108","15078341","14488445","54747167","115176345","15123220","15728255","119317525","119217245","119620386","119156821","116737595"]},{"id":"the superstring theory","value":-0.3401,"pids":["119190767","15809595","119637242","115157387","118502290","11003463","15484108","15078341","14488445","54747167","115176345","15123220","15728255","119317525","119217245","119620386","119156821","116737595"]}],"sim":0.5088},{"id":"physical quark masses","value":1.0,"children":[{"id":"physical quark masses","value":0.8,"pids":["119199799","118499851","118594540","118616937","17681501","119253849","118573385","53408656","119299206","15021084","118614504","951081","118512027","117744851","118383911","119206765","118682442","17767351","119455153","6999036","118435926","16923103","118999246","16376338","15058137","14934507","119098413","119186598","13227870","119230894","118931380","118907218","6745272","59929994","118724416","96459351","18833137","119350545","15321595","119177691","8204356","119265888","9951650","73582376","118309408","14998696","119490124","119252039","54615791","11051018","118334003","119110313","119274896","385703","119234296","46899660","7247089","119153192","15935578","119296171","12958775","119288809","119019127","15816221","14827452","90262695","14682292","117844101","117033489","14294720","5911300","2735565","18150622","14546422","18475783","119019112","118841610","59441559","119230198","118529794","85556034","119268625","118729327","15539886","119238442","2199424","119298353","700993","119478564","118672038","119112605","12973610"]},{"id":"with physical quark","value":0.1852,"pids":["119199799","118499851","118594540","118616937","17681501","119253849","118573385","53408656","119299206","15021084","118614504","951081","118512027","117744851","118383911","119206765","118682442","17767351","119455153","6999036","118435926","16923103","118999246","16376338","15058137","14934507","119098413","119186598","13227870","119230894","118931380","118907218","6745272","59929994","118724416","96459351","18833137","119350545","15321595","119177691","8204356","119265888","9951650","73582376","118309408","14998696","119490124","119252039","54615791","11051018","118334003","119110313","119274896","385703","119234296","46899660","7247089","119153192","15935578","119296171","12958775","119288809","119019127","15816221","14827452","90262695","14682292","117844101","117033489","14294720","5911300","2735565","18150622","14546422","18475783","119019112","118841610","59441559","119230198","118529794","85556034","119268625","118729327","15539886","119238442","2199424","119298353","700993","119478564","118672038","119112605","12973610"]},{"id":"qcd calculations","value":-0.3092,"pids":["119199799","118499851","118594540","118616937","17681501","119253849","118573385","53408656","119299206","15021084","118614504","951081","118512027","117744851","118383911","119206765","118682442","17767351","119455153","6999036","118435926","16923103","118999246","16376338","15058137","14934507","119098413","119186598","13227870","119230894","118931380","118907218","6745272","59929994","118724416","96459351","18833137","119350545","15321595","119177691","8204356","119265888","9951650","73582376","118309408","14998696","119490124","119252039","54615791","11051018","118334003","119110313","119274896","385703","119234296","46899660","7247089","119153192","15935578","119296171","12958775","119288809","119019127","15816221","14827452","90262695","14682292","117844101","117033489","14294720","5911300","2735565","18150622","14546422","18475783","119019112","118841610","59441559","119230198","118529794","85556034","119268625","118729327","15539886","119238442","2199424","119298353","700993","119478564","118672038","119112605","12973610"]}],"sim":0.5084},{"id":"offline handwriting recognition","value":1.0,"children":[{"id":"offline handwriting recognition","value":0.8,"pids":["3842393","6549978","126180494","202660770","6069782","16509326","16661426","263092","6060495","51955928","6708387","25051392","121292373","2308618","34701244","140262773","198968240","67856041","10552590","199405577","13745107","318257","4565931","56895553","7685498","4707079","53428248","3825772","53219846","145056436","52889887","54447114","4762792","49414646","9731718","10663135","140210708","54462895","90262232","17735501","67856708","315789","201665955","4605057","51871912"]},{"id":"handwriting recognition","value":0.1943,"pids":["3842393","6549978","126180494","202660770","6069782","16509326","16661426","263092","6060495","51955928","6708387","25051392","121292373","2308618","34701244","140262773","198968240","67856041","10552590","199405577","13745107","318257","4565931","56895553","7685498","4707079","53428248","3825772","53219846","145056436","52889887","54447114","4762792","49414646","9731718","10663135","140210708","54462895","90262232","17735501","67856708","315789","201665955","4605057","51871912"]},{"id":"using deep neural","value":-0.3086,"pids":["3842393","6549978","126180494","202660770","6069782","16509326","16661426","263092","6060495","51955928","6708387","25051392","121292373","2308618","34701244","140262773","198968240","67856041","10552590","199405577","13745107","318257","4565931","56895553","7685498","4707079","53428248","3825772","53219846","145056436","52889887","54447114","4762792","49414646","9731718","10663135","140210708","54462895","90262232","17735501","67856708","315789","201665955","4605057","51871912"]}],"sim":0.5077},{"id":"cosmic microwave background","value":1.0,"children":[{"id":"cosmic microwave background","value":0.8,"pids":["119278530","1887453","119495132","119472241","14218199","18120479","119243126","54173237","119212870","51745830","119114124","5398329","16479802","15652812","119180175","44129566","118565857","119073969","119223268","119202493","119177664","52065651","118525940","119190546","119408894","15329615","4594646","119511960","16019463","31073237","118672217","118381670","118615359","118396636","29627522","6037952","119415834","14371538","119163396","119450190","3061987","21929004","988092","119330744","119206660","118733060","10052235","15054396","517403","84846361","118588389","118959658","119481246"]},{"id":"recent cosmic microwave","value":0.1895,"pids":["119278530","1887453","119495132","119472241","14218199","18120479","119243126","54173237","119212870","51745830","119114124","5398329","16479802","15652812","119180175","44129566","118565857","119073969","119223268","119202493","119177664","52065651","118525940","119190546","119408894","15329615","4594646","119511960","16019463","31073237","118672217","118381670","118615359","118396636","29627522","6037952","119415834","14371538","119163396","119450190","3061987","21929004","988092","119330744","119206660","118733060","10052235","15054396","517403","84846361","118588389","118959658","119481246"]},{"id":"the cosmological parameters","value":-0.3145,"pids":["119278530","1887453","119495132","119472241","14218199","18120479","119243126","54173237","119212870","51745830","119114124","5398329","16479802","15652812","119180175","44129566","118565857","119073969","119223268","119202493","119177664","52065651","118525940","119190546","119408894","15329615","4594646","119511960","16019463","31073237","118672217","118381670","118615359","118396636","29627522","6037952","119415834","14371538","119163396","119450190","3061987","21929004","988092","119330744","119206660","118733060","10052235","15054396","517403","84846361","118588389","118959658","119481246"]}],"sim":0.5051},{"id":"mechanical lattice","value":1.0,"children":[{"id":"mechanical lattice","value":0.8,"pids":["118471994","4950906","119285651","5397433","119180039"]},{"id":"the mechanical lattice","value":0.1946,"pids":["118471994","4950906","119285651","5397433","119180039"]},{"id":"mechanical response","value":-0.35,"pids":["118471994","4950906","119285651","5397433","119180039"]}],"sim":0.5051},{"id":"stochastic optimization","value":1.0,"children":[{"id":"stochastic optimization","value":0.8,"pids":["182953141","6884742","47017143","3637630","58981788","52180472","1548646","119173094","16613403","4958405","52079347","86422815"]},{"id":"optimization","value":0.1852,"pids":["182953141","6884742","47017143","3637630","58981788","52180472","1548646","119173094","16613403","4958405","52079347","86422815"]},{"id":"the optimal solution","value":-0.3728,"pids":["182953141","6884742","47017143","3637630","58981788","52180472","1548646","119173094","16613403","4958405","52079347","86422815"]}],"sim":0.495},{"id":"adversarially robust models","value":1.0,"children":[{"id":"adversarially robust models","value":0.8,"pids":["182952436","59336190","197431150","195218789","883252","49862308","59413762","159041363","173188378","3272089","59222747","173990256","128358825","51925625","195767324","135464475","195345281","145049777","53729258","67855552","53039886","52920928","202578080","53292287","85543329","29160618","67788180","2541531","53668092","53735542","53047456","85498673","56657912","195584368","199064656","52298300","119186282","166228688","49901528","53737378"]},{"id":"adversarial robustness and","value":0.1847,"pids":["182952436","59336190","197431150","195218789","883252","49862308","59413762","159041363","173188378","3272089","59222747","173990256","128358825","51925625","195767324","135464475","195345281","145049777","53729258","67855552","53039886","52920928","202578080","53292287","85543329","29160618","67788180","2541531","53668092","53735542","53047456","85498673","56657912","195584368","199064656","52298300","119186282","166228688","49901528","53737378"]},{"id":"adversarial learning problem","value":-0.3608,"pids":["182952436","59336190","197431150","195218789","883252","49862308","59413762","159041363","173188378","3272089","59222747","173990256","128358825","51925625","195767324","135464475","195345281","145049777","53729258","67855552","53039886","52920928","202578080","53292287","85543329","29160618","67788180","2541531","53668092","53735542","53047456","85498673","56657912","195584368","199064656","52298300","119186282","166228688","49901528","53737378"]}],"sim":0.4934},{"id":"the chemotaxis sensitivity","value":1.0,"children":[{"id":"the chemotaxis sensitivity","value":0.8,"pids":["119696972","119313443","119156166","119175757","56390808","119301760","59445935","119116911","119315730","119317122","119167731","85546779","55106939","119294250","119171973","119589133","119145111","160010311","20958772","119148805","119665385","59488170","119641359","119660499","119172102","119601309","152282979"]},{"id":"chemotaxis sensitivity","value":0.1958,"pids":["119696972","119313443","119156166","119175757","56390808","119301760","59445935","119116911","119315730","119317122","119167731","85546779","55106939","119294250","119171973","119589133","119145111","160010311","20958772","119148805","119665385","59488170","119641359","119660499","119172102","119601309","152282979"]},{"id":"generalized solution this","value":-0.3051,"pids":["119696972","119313443","119156166","119175757","56390808","119301760","59445935","119116911","119315730","119317122","119167731","85546779","55106939","119294250","119171973","119589133","119145111","160010311","20958772","119148805","119665385","59488170","119641359","119660499","119172102","119601309","152282979"]}],"sim":0.4918}]}} \ No newline at end of file diff --git a/app/services/main.py b/app/services/main.py index fe07af2..e3edcd7 100644 --- a/app/services/main.py +++ b/app/services/main.py @@ -75,4 +75,23 @@ def api_search( """ TODO: api_graph + api_papers 조합해서 한번에 반환 """ - raise NotImplementedError \ No newline at end of file + raise NotImplementedError + + + import aiohttp, asyncio + + S2_URL = "https://api.semanticscholar.org/graph/v1/paper/{}" + FIELDS = "title,abstract,year,venue,referenceCount,citationCount," \ + "influentialCitationCount,fieldsOfStudy,authors,tldr" + + async def fetch_paper(session, pid): + url = S2_URL.format(pid) + params = {"fields": FIELDS} + async with session.get(url, params=params, timeout=10) as r: + r.raise_for_status() + return await r.json() + + async def fetch_many(pids): + async with aiohttp.ClientSession() as session: + tasks = [fetch_paper(session, pid) for pid in pids] + return await asyncio.gather(*tasks) \ No newline at end of file diff --git a/app/services/papers_service/papers_service.py b/app/services/papers_service/papers_service.py index f732cab..a4814aa 100644 --- a/app/services/papers_service/papers_service.py +++ b/app/services/papers_service/papers_service.py @@ -1,53 +1,31 @@ # papers_service/main.py -import random +import os, json, random +from typing import List, Optional, Dict -from fastapi import FastAPI, HTTPException, Query +from fastapi import FastAPI, Query, HTTPException from pydantic import BaseModel -from typing import List, Optional -import json -import os app = FastAPI(title="Papers Service (Stub)") - +# ────────────── Pydantic Models ────────────── class Author(BaseModel): name: str - -class Citation(BaseModel): - paperId: str - title: Optional[str] - year: Optional[int] - - -class Reference(BaseModel): - paperId: str - title: Optional[str] - year: Optional[int] - - -# --- 논문 객체 정의 --- class Paper(BaseModel): paper_id: str abstract: Optional[str] title: Optional[str] - url: Optional[str] venue: Optional[str] year: Optional[int] - reference_count: Optional[int] citation_count: Optional[int] influentialCitationCount: Optional[int] - fieldsOfStudy: Optional[List[str]] tldr: Optional[str] authors: List[Author] - sim_score: float - -# --- 응답 모델 --- class PapersResponse(BaseModel): total_results: int max_display: int @@ -55,151 +33,75 @@ class PapersResponse(BaseModel): page_size: int papers: List[Paper] -# 1) 전역 로딩 +# ────────────── Data Load ────────────── BASE_DIR = os.path.join(os.path.dirname(__file__), "data") -PAPER_DATA_PATH = os.path.join(BASE_DIR, "inductive_test_checkpoint_collected.json") -GRAPH_KEYWORD_PATH = os.path.join(BASE_DIR, "kw2pids.json") -with open(PAPER_DATA_PATH, "r", encoding="utf-8") as f: - paper_db = json.load(f) - -# 예시: 이미 생성한 키워드 → 논문 ID 매핑 -with open(GRAPH_KEYWORD_PATH, "r", encoding="utf-8") as f: - kw2pids = json.load(f) +with open(os.path.join(BASE_DIR, "inductive_test_checkpoint_collected.json"), encoding="utf-8") as f: + paper_db: Dict[str, dict] = json.load(f) + +with open(os.path.join(BASE_DIR, "kw2pids.json"), encoding="utf-8") as f: + kw2pids: Dict[str, List[str]] = json.load(f) + +# ────────────── Helper ────────────── +def build_paper(pid: str) -> Paper: + entry = paper_db[pid] + return Paper( + paper_id=pid, + title=entry.get("title"), + abstract=entry.get("abstract"), + url=entry.get("url"), + venue=entry.get("venue"), + year=entry.get("year"), + reference_count=entry.get("referenceCount"), + citation_count=entry.get("citationCount"), + influentialCitationCount=entry.get("influentialCitationCount"), + fieldsOfStudy=entry.get("fieldsOfStudy"), + tldr=entry.get("tldr", {}).get("text") if entry.get("tldr") else None, + authors=[Author(name=a["name"]) for a in entry.get("authors", [])], + sim_score=random.uniform(0, 1), # stub + ) +def paginate(ids: List[str], page: int, page_size: int): + total = len(ids) + max_page = max((total - 1) // page_size + 1, 1) + if page > max_page: + return [], total + start = (page - 1) * page_size + return ids[start : start + page_size], total +# ────────────── API ────────────── @app.get("/papers", response_model=PapersResponse) def get_random_papers( - kw: str = Query(..., description="검색할 키워드"), page: int = Query(1, ge=1), - page_size: int = Query(20, ge=1, le=100) + page_size: int = Query(20, ge=1, le=100), ): - all_pids = list(paper_db.keys()) - sample_size = random.randint(20, 40) - sampled_pids = random.sample(all_pids, min(sample_size, len(all_pids))) - - total = len(sampled_pids) - start = (page - 1) * page_size - end = min(start + page_size, total) - sliced = sampled_pids[start:end] - - papers = [] - for pid in sliced: - entry = paper_db[pid] - papers.append(Paper( - paper_id=pid, - title=entry.get("title"), - abstract=entry.get("abstract"), - url=entry.get("url"), - venue=entry.get("venue"), - year=entry.get("year"), - reference_count=entry.get("referenceCount"), - citation_count=entry.get("citationCount"), - influentialCitationCount=entry.get("influentialCitationCount"), - fieldsOfStudy=entry.get("fieldsOfStudy"), - tldr=entry.get("tldr", {}).get("text") if entry.get("tldr") else None, - authors=[Author(name=a["name"]) for a in entry.get("authors", [])], - sim_score=random.uniform(0, 1) - )) - + sample_ids = random.sample(list(paper_db.keys()), k=min(40, len(paper_db))) + sliced, total = paginate(sample_ids, page, page_size) return PapersResponse( total_results=total, max_display=len(sliced), page=page, page_size=page_size, - papers=papers + papers=[build_paper(pid) for pid in sliced], ) - +@app.get("/papers/by_keyword", response_model=PapersResponse) # ✨ def get_papers_by_keyword( - kw: str = Query(..., description="검색할 키워드"), - page: int = Query(1, ge=1), - page_size: int = Query(20, ge=1, le=100) + kw: str = Query(..., description="검색 키워드"), + page: int = Query(1, ge=1), + page_size: int = Query(20, ge=1, le=100), ): if kw not in kw2pids: - print(f"Keyword '{kw}' not found.") - all_pids = [ - "40108038", - "59572248", - "5799960", - "14188576", - "119242784" - ] - # raise HTTPException(status_code=404, detail=f"Keyword '{kw}' not found.") - else: - deduplicated_ids = [ - "13074624", - "14188576", - "14516333", - "14909482", - "15302646", - "162168808", - "198147940", - "28639198", - "40108038", - "41418788", - "51183683", - "52232173", - "53641451", - "55836730", - "56099032", - "5734610", - "5799960", - "59408549", - "59572248", - "786330", - "85459157", - "10682321", - "11501607", - "115113968", - "11534505", - "117899249", - "118489086", - "118587315", - "118751294", - "118816857", - "118849608", - "119111722", - "119144587", - "119209851", - "119241784", - "119341051", - "119471991", - "119472164" - ] - # all_pids = kw2pids[kw] - all_pids = random.sample(deduplicated_ids, 20) + raise HTTPException(status_code=404, detail=f"Keyword '{kw}' not found.") - # 페이징 - total = len(all_pids) - start = (page - 1) * page_size - end = min(start + page_size, total) - sliced = all_pids[start:end] - - papers = [] - for pid in sliced: - if pid not in paper_db: - continue - entry = paper_db[pid] - papers.append(Paper( - paper_id=pid, - title=entry.get("title"), - abstract=entry.get("abstract"), - url=entry.get("url"), - venue=entry.get("venue"), - year=entry.get("year"), - reference_count=entry.get("referenceCount"), - citation_count=entry.get("citationCount"), - influentialCitationCount=entry.get("influentialCitationCount"), - fieldsOfStudy=entry.get("fieldsOfStudy"), - tldr=entry.get("tldr", {}).get("text") if entry.get("tldr") else None, - authors=[Author(name=a["name"]) for a in entry.get("authors", [])], - sim_score=random.uniform(0, 1) # Stub score - )) + ids = kw2pids[kw] + sliced, total = paginate(ids, page, page_size) + papers = [build_paper(pid) for pid in sliced if pid in paper_db] return PapersResponse( total_results=total, - max_display=len(sliced), + max_display=len(papers), page=page, page_size=page_size, - papers=papers + papers=papers, ) + \ No newline at end of file From 81f608d5efc26f82e8656ab5ca1371192b776e7a Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Mon, 16 Jun 2025 05:54:42 +0000 Subject: [PATCH 25/35] ? --- app/services/papers_service/papers_service.py | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/app/services/papers_service/papers_service.py b/app/services/papers_service/papers_service.py index a4814aa..3534d18 100644 --- a/app/services/papers_service/papers_service.py +++ b/app/services/papers_service/papers_service.py @@ -104,4 +104,26 @@ def get_papers_by_keyword( page_size=page_size, papers=papers, ) - \ No newline at end of file + + + +GRAPH_BASE = os.getenv("GRAPH_URL", "http://graph:8000") + +def ensure_kw2pids(keyword: str) -> List[str]: + """kw2pids.json 에 없으면 Graph Service 로부터 새로 가져와 저장""" + if keyword in kw2pids: + return kw2pids[keyword] + + import requests + resp = requests.post( + f"{GRAPH_BASE}/graph", + json={"root": keyword, "top1": 5, "top2": 3}, + timeout=15, + ) + if resp.status_code != 200: + raise HTTPException(502, "Graph Service error") + data = resp.json() + kw2pids.update(data["kw2pids"]) # 메모리 캐시 + with open(os.path.join(BASE_DIR, "kw2pids.json"), "w") as f: + json.dump(kw2pids, f, ensure_ascii=False) + return kw2pids.get(keyword, []) \ No newline at end of file From 888ec98fd96574c8096df3c96494b73aad66bb07 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Mon, 16 Jun 2025 08:53:55 +0000 Subject: [PATCH 26/35] =?UTF-8?q?docker:=20graph=5Fservice=20=EB=B0=8F=20c?= =?UTF-8?q?ompose=EC=A0=95=EB=A6=AC=20=EC=99=84=EB=A3=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/runtime/Dockerfile | 16 +++--- app/runtime/__init__.py | 0 app/services/graph_service/Dockerfile | 5 +- app/services/graph_service/graph_service.py | 18 ++++-- app/services/graph_service/requirements.txt | 3 +- docker-compose.yml | 63 +++++++++++---------- 6 files changed, 59 insertions(+), 46 deletions(-) create mode 100644 app/runtime/__init__.py diff --git a/app/runtime/Dockerfile b/app/runtime/Dockerfile index f1c9f86..da9a191 100644 --- a/app/runtime/Dockerfile +++ b/app/runtime/Dockerfile @@ -5,21 +5,23 @@ ENV DEBIAN_FRONTEND=noninteractive \ PYTHONUNBUFFERED=1 \ NVIDIA_VISIBLE_DEVICES=all +WORKDIR /app + # 필수 패키지 RUN apt-get update && apt-get install -y \ python3-pip python3-dev git && \ rm -rf /var/lib/apt/lists/* -# 프로젝트 소스 복사 -WORKDIR /workspace -COPY .. /workspace +# 1) requirements만 먼저 복사 → 레이어 캐시 +COPY runtime/requirements.txt ./requirements.txt +RUN pip install --no-cache-dir --upgrade pip \ + && pip install --no-cache-dir -r requirements.txt -# 의존성(인퍼런스 전용) -RUN pip3 install --upgrade pip \ - && pip3 install -r app/runtime/requirements.txt +# 2) 실제 런타임 코드 복사 +COPY runtime/ ./runtime/ # 지표·인덱스(volume 으로 붙여도 OK) # COPY indices/ /workspace/indices EXPOSE 8004 -CMD ["uvicorn", "app.runtime.api:app", "--host", "0.0.0.0", "--port", "8004"] \ No newline at end of file +CMD ["uvicorn", "runtime.api:app", "--host", "0.0.0.0", "--port", "8004"] diff --git a/app/runtime/__init__.py b/app/runtime/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/services/graph_service/Dockerfile b/app/services/graph_service/Dockerfile index 11a6479..4ebe574 100644 --- a/app/services/graph_service/Dockerfile +++ b/app/services/graph_service/Dockerfile @@ -2,12 +2,9 @@ FROM python:3.9-slim WORKDIR /app -COPY requirements.txt . +COPY . . RUN pip install --no-cache-dir -r requirements.txt -COPY graph_service.py . - -COPY tree_mapping.py . # FastAPI Uvicorn 실행 diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py index 52b2d2e..534a410 100644 --- a/app/services/graph_service/graph_service.py +++ b/app/services/graph_service/graph_service.py @@ -3,6 +3,7 @@ from fastapi import FastAPI, Query from pydantic import BaseModel import aioredis, requests +import httpx from json_to_tree_and_kw2pid import manual_tree_with_full_values @@ -50,25 +51,31 @@ def make_cache_key(root: str, top1: int, top2: int) -> str: return "graph:" + hashlib.sha256(f"{root}|{top1}|{top2}".encode()).hexdigest() async def fetch_from_ai_and_cache(root: str, top1: int, top2: int): - url = "https://2f7a-165-194-104-91.ngrok-free.app/inference" - data = requests.get(url, params={"query": root, "top_k": top1}).json() + url = "https://58b9-165-194-104-91.ngrok-free.app/inference" + params = {"query": root, "top_k": top1, "top2": top2} + + async with httpx.AsyncClient() as client: + resp = await client.get(url, params=params, timeout=15) + resp.raise_for_status() + data = resp.json() mapping = { n["id"]: {"value": n.get("sim", 0.8), "children": n.get("children", [])} - for n in data["results"]["children"] + for n in data["results"]["children"][:top1] } keyword_tree = manual_tree_with_full_values(root, mapping) kw2pids = { child["id"]: child["pids"] for n in data["results"]["children"] - for child in n["children"] + for child in n.get("children", []) + if "pids" in child } if redis: await redis.set( make_cache_key(root, top1, top2), - json.dumps({"tree": keyword_tree, "kw2pids": kw2pids}), + json.dumps({"keyword_tree": keyword_tree, "kw2pids": kw2pids}), ex=3600, ) return keyword_tree, kw2pids @@ -84,6 +91,7 @@ async def build_graph(req: GraphRequest): keyword_tree, kw2pids = await fetch_from_ai_and_cache( req.root, req.top1, req.top2 ) + return {"keyword_tree": keyword_tree, "kw2pids": kw2pids} # ✨ @app.get("/kw2pids") diff --git a/app/services/graph_service/requirements.txt b/app/services/graph_service/requirements.txt index 0b17cf8..2708847 100644 --- a/app/services/graph_service/requirements.txt +++ b/app/services/graph_service/requirements.txt @@ -2,4 +2,5 @@ fastapi uvicorn[standard] pydantic aioredis -requests \ No newline at end of file +requests +httpx>=0.26 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 549ed4b..e14d2c1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,14 +8,24 @@ services: - "6379:6379" restart: unless-stopped - papers-service: - build: - context: ./app/services/papers_service - dockerfile: Dockerfile - ports: - - "8000:8000" - depends_on: - - searchforest-ai + # runtime_infer: + # build: + # context: ./app + # dockerfile: runtime/Dockerfile + # container_name: searchforest-infer + # environment: + # - NVIDIA_VISIBLE_DEVICES=all + # volumes: + # - ./app/indices:/app/indices:ro # 인덱스 실시간 사용 + # ports: + # - "8004:8004" + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: 1 + # capabilities: [gpu] graph-service: build: @@ -25,31 +35,26 @@ services: ports: - "8002:8002" environment: - # Redis URL을 환경변수로 주입 - REDIS_URL: "redis://redis:6379" - # (필요하다면) CONFIG_PATH, INDEX_PATH 등도 + - REDIS_URL=redis://redis:6379 + - INFER_URL=http://host.docker.internal:8004 depends_on: - redis - - searchforest-ai restart: unless-stopped - - runtime_infer: + + papers-service: build: - context: ./app/runtime - dockerfile: app/runtime/Dockerfile - container_name: searchforest-infer - runtime: nvidia # GPU 전달 + context: ./app/services/papers_service + dockerfile: Dockerfile + container_name: papers_service + ports: + - "8000:8000" environment: - - NVIDIA_VISIBLE_DEVICES=all + - REDIS_URL=redis://redis:6379 + - GRAPH_URL=http://graph-service:8002 + depends_on: + - redis + - graph-service + restart: unless-stopped volumes: - - ./indices:/workspace/indices # 인덱스 실시간 사용 - ports: - - "8004:8004" - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] + - ./app/data:/app/data:ro From c8c6153c9af27a92312cb30aee4eccfcf0ef8671 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Mon, 16 Jun 2025 09:05:29 +0000 Subject: [PATCH 27/35] working: papers_service --- app/services/papers_service/papers_service.py | 77 +++++++++++-------- 1 file changed, 44 insertions(+), 33 deletions(-) diff --git a/app/services/papers_service/papers_service.py b/app/services/papers_service/papers_service.py index 3534d18..d534163 100644 --- a/app/services/papers_service/papers_service.py +++ b/app/services/papers_service/papers_service.py @@ -1,5 +1,5 @@ # papers_service/main.py -import os, json, random +import os, json, random, asyncio from typing import List, Optional, Dict from fastapi import FastAPI, Query, HTTPException @@ -35,11 +35,19 @@ class PapersResponse(BaseModel): # ────────────── Data Load ────────────── BASE_DIR = os.path.join(os.path.dirname(__file__), "data") -with open(os.path.join(BASE_DIR, "inductive_test_checkpoint_collected.json"), encoding="utf-8") as f: - paper_db: Dict[str, dict] = json.load(f) -with open(os.path.join(BASE_DIR, "kw2pids.json"), encoding="utf-8") as f: - kw2pids: Dict[str, List[str]] = json.load(f) +def safe_load(fname, default): + try: + with open(os.path.join(BASE_DIR, fname), encoding="utf-8") as f: + return json.load(f) + except FileNotFoundError: + print(f"⚠️ {fname} not found, using default") + return default + +paper_db: Dict[str, dict] = safe_load("inductive_test_checkpoint_collected.json", {}) +kw2pids: Dict[str, List[str]] = safe_load("kw2pids.json", {}) + +save_lock = asyncio.Lock() # ────────────── Helper ────────────── def build_paper(pid: str) -> Paper: @@ -70,21 +78,6 @@ def paginate(ids: List[str], page: int, page_size: int): # ────────────── API ────────────── @app.get("/papers", response_model=PapersResponse) -def get_random_papers( - page: int = Query(1, ge=1), - page_size: int = Query(20, ge=1, le=100), -): - sample_ids = random.sample(list(paper_db.keys()), k=min(40, len(paper_db))) - sliced, total = paginate(sample_ids, page, page_size) - return PapersResponse( - total_results=total, - max_display=len(sliced), - page=page, - page_size=page_size, - papers=[build_paper(pid) for pid in sliced], - ) - -@app.get("/papers/by_keyword", response_model=PapersResponse) # ✨ def get_papers_by_keyword( kw: str = Query(..., description="검색 키워드"), page: int = Query(1, ge=1), @@ -104,26 +97,44 @@ def get_papers_by_keyword( page_size=page_size, papers=papers, ) - -GRAPH_BASE = os.getenv("GRAPH_URL", "http://graph:8000") +@app.get("/papers/random", response_model=PapersResponse) # ✨ +def get_random_papers( + page: int = Query(1, ge=1), + page_size: int = Query(20, ge=1, le=100), +): + sample_ids = random.sample(list(paper_db.keys()), k=min(40, len(paper_db))) + sliced, total = paginate(sample_ids, page, page_size) + return PapersResponse( + total_results=total, + max_display=len(sliced), + page=page, + page_size=page_size, + papers=[build_paper(pid) for pid in sliced], + ) + + + +GRAPH_BASE = os.getenv("GRAPH_URL", "http://graph-service:8002") -def ensure_kw2pids(keyword: str) -> List[str]: - """kw2pids.json 에 없으면 Graph Service 로부터 새로 가져와 저장""" +async def ensure_kw2pids(keyword: str) -> List[str]: if keyword in kw2pids: return kw2pids[keyword] - import requests - resp = requests.post( - f"{GRAPH_BASE}/graph", - json={"root": keyword, "top1": 5, "top2": 3}, - timeout=15, - ) + import httpx + async with httpx.AsyncClient() as client: + resp = await client.post(f"{GRAPH_BASE}/graph", + json={"root": keyword, "top1": 5, "top2": 3}, + timeout=15) if resp.status_code != 200: raise HTTPException(502, "Graph Service error") + data = resp.json() - kw2pids.update(data["kw2pids"]) # 메모리 캐시 - with open(os.path.join(BASE_DIR, "kw2pids.json"), "w") as f: - json.dump(kw2pids, f, ensure_ascii=False) + kw2pids.update(data["kw2pids"]) + + async with save_lock: + with open(os.path.join(BASE_DIR, "kw2pids.json"), "w") as f: + json.dump(kw2pids, f, ensure_ascii=False) + return kw2pids.get(keyword, []) \ No newline at end of file From 6bee4bd504e1f7429984faec22e68e8a2e8c64e0 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Mon, 16 Jun 2025 12:53:39 +0000 Subject: [PATCH 28/35] api:paper_service --- .gitignore | 5 +- .../data/__init__.py => NoneType} | 0 app/runtime/api.py | 5 - app/services/graph_service/graph_service.py | 31 +++-- app/services/papers_service/Dockerfile | 2 +- .../papers_service/data/dummy_data.py | 128 ------------------ app/services/papers_service/data/ids.json | 0 app/services/papers_service/data/kw2pids.json | 53 -------- app/services/papers_service/papers_service.py | 112 ++++++++------- app/services/papers_service/requirements.txt | 3 +- 10 files changed, 91 insertions(+), 248 deletions(-) rename app/{services/papers_service/data/__init__.py => NoneType} (100%) delete mode 100644 app/services/papers_service/data/dummy_data.py delete mode 100644 app/services/papers_service/data/ids.json delete mode 100644 app/services/papers_service/data/kw2pids.json diff --git a/.gitignore b/.gitignore index ac57f41..f12775e 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ app/model/model_save/* #data app/services/graph_service/data/indices/paper_ivf.index app/services/graph_service/data/indices +app/services/papers_service/data app/indices app/routers/previous/output/summaries_dir @@ -25,6 +26,8 @@ app/data app/routers/output/summaries_dir/* + + #logs app/scripts/logs app/scripts/pyrouge_root @@ -66,7 +69,6 @@ MANIFEST app/data/ app/model app/tools/ -app/services/papers_service/data # PyInstaller # Usually these files are written by a python script from a template @@ -216,3 +218,4 @@ cython_debug/ .pypirc app/data/inductive/train.jsonl app/routers/output/summaries_dir/ +app/services/papers_service/data/paper_db.json diff --git a/app/services/papers_service/data/__init__.py b/app/NoneType similarity index 100% rename from app/services/papers_service/data/__init__.py rename to app/NoneType diff --git a/app/runtime/api.py b/app/runtime/api.py index 884fcfa..a10137d 100644 --- a/app/runtime/api.py +++ b/app/runtime/api.py @@ -46,11 +46,6 @@ def recommend( root["children"].append(cluster_node) return {"results": root } - - - - - # 로컬 실행용 if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8004) diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py index 534a410..054837a 100644 --- a/app/services/graph_service/graph_service.py +++ b/app/services/graph_service/graph_service.py @@ -59,18 +59,29 @@ async def fetch_from_ai_and_cache(root: str, top1: int, top2: int): resp.raise_for_status() data = resp.json() - mapping = { - n["id"]: {"value": n.get("sim", 0.8), "children": n.get("children", [])} - for n in data["results"]["children"][:top1] - } + # 2-1) keyword_tree + mapping = { n["id"]:{"value": n.get("sim",0.8),"children": n.get("children",[])} + for n in data["results"]["children"][:top1] } keyword_tree = manual_tree_with_full_values(root, mapping) - kw2pids = { - child["id"]: child["pids"] - for n in data["results"]["children"] - for child in n.get("children", []) - if "pids" in child - } + # 2-2) kw2pids ☑ root + 1-depth + 2-depth + kw2pids = {} + + # root → 모든 1-depth pids 합집합 + root_pids = [] + for n in data["results"]["children"][:top1]: + root_pids.extend(n.get("pids", [])) + kw2pids[root] = root_pids + + # 1-depth + for n in data["results"]["children"][:top1]: + if "pids" in n: + kw2pids[n["id"]] = n["pids"] + + # 2-depth + for child in n.get("children", []): + if "pids" in child: + kw2pids[child["id"]] = child["pids"] if redis: await redis.set( diff --git a/app/services/papers_service/Dockerfile b/app/services/papers_service/Dockerfile index 0bdfd6d..c1a9f76 100644 --- a/app/services/papers_service/Dockerfile +++ b/app/services/papers_service/Dockerfile @@ -20,7 +20,7 @@ RUN tar -xzf data/inductive_test_checkpoint_collected.tar.gz -C data/ && \ rm data/inductive_test_checkpoint_collected.tar.gz # 4. 코드 복사 (덮어쓰지 않도록 이후에) -COPY data/kw2pids.json data/ +COPY data/paper_db.json ./ COPY papers_service.py ./ # 5. 앱 실행 diff --git a/app/services/papers_service/data/dummy_data.py b/app/services/papers_service/data/dummy_data.py deleted file mode 100644 index 56c8b38..0000000 --- a/app/services/papers_service/data/dummy_data.py +++ /dev/null @@ -1,128 +0,0 @@ -# papers_service/dummy_data.py - -def get_dummy_papers(): - """ - 더미 논문 리스트 반환 - """ - return [ - { - "paper_id": "p1", - "title": "Dummy Paper A", - "abstract": "This is the abstract of dummy paper A.", - "authors": ["Alice"], - "year": 2023, - "citation_count": 5, - "sim_score": 0.90, - "summary": "이 논문은 A에 대해 간략히 설명합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic" - }, - { - "paper_id": "p2", - "title": "Dummy Paper B", - "abstract": "This is the abstract of dummy paper B.", - "authors": ["Bob", "Carol"], - "year": 2020, - "citation_count": 3, - "sim_score": 0.85, - "summary": "이 논문은 B의 주요 기여를 요약합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic" - }, - { - "paper_id": "p3", - "title": "Dummy Paper C", - "abstract": "This is the abstract of dummy paper C.", - "authors": ["Dave"], - "year": 2022, - "citation_count": 7, - "sim_score": 0.80, - "summary": "이 논문에서는 C를 제안하고 실험 결과를 제공합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic" - }, - { - "paper_id": "p4", - "title": "Dummy Paper D", - "abstract": "This is the abstract of dummy paper D.", - "authors": ["Eve", "Frank"], - "year": 2021, - "citation_count": 10, - "sim_score": 0.75, - "summary": "이 논문은 D 기법의 유효성을 평가합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic" - }, - { - "paper_id": "p5", - "title": "Dummy Paper E", - "abstract": "This is the abstract of dummy paper E.", - "authors": ["Grace"], - "year": 2019, - "citation_count": 12, - "sim_score": 0.70, - "summary": "이 논문에서는 E 알고리즘을 제안합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic" - }, - { - "paper_id": "p6", - "title": "Dummy Paper F", - "abstract": "This is the abstract of dummy paper F.", - "authors": ["Heidi"], - "year": 2020, - "citation_count": 8, - "sim_score": 0.65, - "summary": "이 논문은 F 시스템의 성능을 분석합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic" - }, - { - "paper_id": "p7", - "title": "Dummy Paper G", - "abstract": "This is the abstract of dummy paper G.", - "authors": ["Ivan", "Judy"], - "year": 2022, - "citation_count": 6, - "sim_score": 0.60, - "summary": "이 논문에서는 G 모델을 제안하고 평가합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic" - }, - { - "paper_id": "p8", - "title": "Dummy Paper H", - "abstract": "This is the abstract of dummy paper H.", - "authors": ["Kevin"], - "year": 2021, - "citation_count": 9, - "sim_score": 0.55, - "summary": "이 논문은 H 프로토콜의 보안성을 검증합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic", - }, - { - "paper_id": "p9", - "title": "Dummy Paper I", - "abstract": "This is the abstract of dummy paper I.", - "authors": ["Laura"], - "year": 2023, - "citation_count": 4, - "sim_score": 0.50, - "summary": "이 논문에서는 I 프레임워크를 소개합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic" - }, - { - "paper_id": "p10", - "title": "Dummy Paper J", - "abstract": "This is the abstract of dummy paper J.", - "authors": ["Mallory", "Niaj"], - "year": 2018, - "citation_count": 15, - "sim_score": 0.45, - "summary": "이 논문은 J 방법론의 활용 사례를 제시합니다.", - "url": "https://www.semanticscholar.org", - "domain": "Mathematic" - } - ] \ No newline at end of file diff --git a/app/services/papers_service/data/ids.json b/app/services/papers_service/data/ids.json deleted file mode 100644 index e69de29..0000000 diff --git a/app/services/papers_service/data/kw2pids.json b/app/services/papers_service/data/kw2pids.json deleted file mode 100644 index 368bd49..0000000 --- a/app/services/papers_service/data/kw2pids.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "machine learning": [ - "40108038", - "59572248", - "5799960", - "14188576", - "119242784" - ], - "neural network": [ - "41418788", - "119472164", - "11501607", - "119111722", - "14909482" - ], - "graph representation": [ - "55836730", - "118751294", - "51183683", - "118849608", - "118816857", - "5734610" - ], - "text summarization": [ - "59572248", - "5799960", - "14188576", - "51183683", - "85459157", - "56099032" - ], - "natural language processing": [ - "11501607", - "59572248", - "14909482" - ], - "deep learning": [ - "11534505", - "59572248", - "13074624", - "53641451" - ], - "transformer": [ - "41418788", - "5799960", - "11501607" - ], - "embedding": [ - "59408549", - "14516333", - "119144587" - ] -} \ No newline at end of file diff --git a/app/services/papers_service/papers_service.py b/app/services/papers_service/papers_service.py index d534163..c325a39 100644 --- a/app/services/papers_service/papers_service.py +++ b/app/services/papers_service/papers_service.py @@ -1,13 +1,15 @@ # papers_service/main.py -import os, json, random, asyncio +import os, json, random +import asyncio from typing import List, Optional, Dict from fastapi import FastAPI, Query, HTTPException from pydantic import BaseModel +import httpx -app = FastAPI(title="Papers Service (Stub)") +app = FastAPI(title="Papers Service") -# ────────────── Pydantic Models ────────────── +# ───────── Pydantic Models ───────── class Author(BaseModel): name: str @@ -33,89 +35,98 @@ class PapersResponse(BaseModel): page_size: int papers: List[Paper] -# ────────────── Data Load ────────────── +# ───────── Data Load ───────── BASE_DIR = os.path.join(os.path.dirname(__file__), "data") -def safe_load(fname, default): +def safe_load(fname): + path = os.path.join(BASE_DIR, fname) try: - with open(os.path.join(BASE_DIR, fname), encoding="utf-8") as f: + with open(path, encoding="utf-8") as f: return json.load(f) except FileNotFoundError: - print(f"⚠️ {fname} not found, using default") - return default + print(f"⚠️ {fname} not found → 빈 dict 사용") + return {} -paper_db: Dict[str, dict] = safe_load("inductive_test_checkpoint_collected.json", {}) -kw2pids: Dict[str, List[str]] = safe_load("kw2pids.json", {}) - -save_lock = asyncio.Lock() +paper_db: Dict[str, dict] = safe_load("paper_db.json") # ← 최근에 만든 DB +kw2pids: Dict[str, List[str]] = safe_load("kw2pids.json") +save_lock = asyncio.Lock() # ────────────── Helper ────────────── def build_paper(pid: str) -> Paper: - entry = paper_db[pid] + e = paper_db[pid] return Paper( - paper_id=pid, - title=entry.get("title"), - abstract=entry.get("abstract"), - url=entry.get("url"), - venue=entry.get("venue"), - year=entry.get("year"), - reference_count=entry.get("referenceCount"), - citation_count=entry.get("citationCount"), - influentialCitationCount=entry.get("influentialCitationCount"), - fieldsOfStudy=entry.get("fieldsOfStudy"), - tldr=entry.get("tldr", {}).get("text") if entry.get("tldr") else None, - authors=[Author(name=a["name"]) for a in entry.get("authors", [])], - sim_score=random.uniform(0, 1), # stub + paper_id = pid, + title = e.get("title"), + abstract = e.get("abstract"), + url = e.get("url"), + venue = e.get("venue"), + year = e.get("year"), + reference_count = e.get("referenceCount"), + citation_count = e.get("citationCount"), + influentialCitationCount = e.get("influentialCitationCount"), + fieldsOfStudy = e.get("fieldsOfStudy"), + tldr = e.get("tldr", {}).get("text") if entry.get("tldr") else None, + authors = [Author(name=a["name"]) + for a in e.get("authors", [])], + sim_score = random.uniform(0, 1), # stub ) def paginate(ids: List[str], page: int, page_size: int): total = len(ids) - max_page = max((total - 1) // page_size + 1, 1) - if page > max_page: - return [], total start = (page - 1) * page_size - return ids[start : start + page_size], total + return ids[start:start + page_size], total + +async def build_papers(ids: List[str]) -> List[Paper]: + papers: List[Paper] = [] + for pid in ids: + if await ensure_paper(pid): + papers.append(build_paper(pid)) + return papers -# ────────────── API ────────────── +# ───────── API ───────── @app.get("/papers", response_model=PapersResponse) -def get_papers_by_keyword( +async def get_papers_by_keyword( kw: str = Query(..., description="검색 키워드"), page: int = Query(1, ge=1), page_size: int = Query(20, ge=1, le=100), ): - if kw not in kw2pids: - raise HTTPException(status_code=404, detail=f"Keyword '{kw}' not found.") + # ① kw2pids 확보 (없으면 Graph Service 호출) + ids = await ensure_kw2pids(kw) + if not ids: + raise HTTPException(404, f"Keyword '{kw}' not found.") - ids = kw2pids[kw] + # ② pagination sliced, total = paginate(ids, page, page_size) + + # ③ paper_db에 있는 것만 반환 papers = [build_paper(pid) for pid in sliced if pid in paper_db] return PapersResponse( - total_results=total, - max_display=len(papers), - page=page, - page_size=page_size, - papers=papers, + total_results = total, + max_display = len(papers), + page = page, + page_size = page_size, + papers = papers, ) -@app.get("/papers/random", response_model=PapersResponse) # ✨ +@app.get("/papers/random", response_model=PapersResponse) def get_random_papers( page: int = Query(1, ge=1), page_size: int = Query(20, ge=1, le=100), ): - sample_ids = random.sample(list(paper_db.keys()), k=min(40, len(paper_db))) + sample_ids = random.sample(list(paper_db.keys()), + k=min(40, len(paper_db))) sliced, total = paginate(sample_ids, page, page_size) return PapersResponse( - total_results=total, - max_display=len(sliced), - page=page, - page_size=page_size, - papers=[build_paper(pid) for pid in sliced], + total_results = total, + max_display = len(sliced), + page = page, + page_size = page_size, + papers = [build_paper(pid) for pid in sliced], ) - GRAPH_BASE = os.getenv("GRAPH_URL", "http://graph-service:8002") async def ensure_kw2pids(keyword: str) -> List[str]: @@ -137,4 +148,7 @@ async def ensure_kw2pids(keyword: str) -> List[str]: with open(os.path.join(BASE_DIR, "kw2pids.json"), "w") as f: json.dump(kw2pids, f, ensure_ascii=False) - return kw2pids.get(keyword, []) \ No newline at end of file + return kw2pids.get(keyword, []) + + + diff --git a/app/services/papers_service/requirements.txt b/app/services/papers_service/requirements.txt index 4174eb1..bf2e342 100644 --- a/app/services/papers_service/requirements.txt +++ b/app/services/papers_service/requirements.txt @@ -1,3 +1,4 @@ fastapi uvicorn[standard] -pydantic \ No newline at end of file +pydantic +httpx>=0.26 \ No newline at end of file From 843493cb3979b73f40ed1c4c565a322d7cf826ae Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Mon, 16 Jun 2025 12:59:38 +0000 Subject: [PATCH 29/35] db: paper_db --- app/services/papers_service/Dockerfile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/app/services/papers_service/Dockerfile b/app/services/papers_service/Dockerfile index c1a9f76..ae6806e 100644 --- a/app/services/papers_service/Dockerfile +++ b/app/services/papers_service/Dockerfile @@ -13,14 +13,15 @@ RUN pip install gdown beautifulsoup4 # 3. Google Drive에서 tar.gz 파일 다운로드 → data/ 안에 저장 RUN mkdir -p data && \ - gdown --id 1tFYFjScIyu9RvAHWWWf-SGHwTs7F5866 -O data/inductive_test_checkpoint_collected.tar.gz + gdown --id 1ZKbW2ky1Fjz7-DFXh1wKiBsEkOI73IaX \ + -O app/services/papers_service/data/paper_db.tar.gz + # 2. data/ 폴더 안에서 압축 해제 + 압축 파일 삭제 -RUN tar -xzf data/inductive_test_checkpoint_collected.tar.gz -C data/ && \ - rm data/inductive_test_checkpoint_collected.tar.gz +RUN tar -xzf data/paper_db.tar.gz -C data/ && \ + rm data/paper_db.tar.gz # 4. 코드 복사 (덮어쓰지 않도록 이후에) -COPY data/paper_db.json ./ COPY papers_service.py ./ # 5. 앱 실행 From ebff14aa0fedc86029ae932e372bd4169b26ffee Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Mon, 16 Jun 2025 13:09:35 +0000 Subject: [PATCH 30/35] api:papers_service: db docker --- app/services/papers_service/Dockerfile | 34 ++++++++++++-------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/app/services/papers_service/Dockerfile b/app/services/papers_service/Dockerfile index ae6806e..5681f2e 100644 --- a/app/services/papers_service/Dockerfile +++ b/app/services/papers_service/Dockerfile @@ -1,28 +1,26 @@ FROM python:3.9-slim +# 1. 필수 패키지 + gdown 설치 +RUN apt-get update && \ + apt-get install -y --no-install-recommends python3-pip git && \ + pip3 install --no-cache-dir gdown && \ + rm -rf /var/lib/apt/lists/* +# 2. 프로젝트 코드 복사 +# (컨텍스트 최상단에서 COPY . . 라면 /app 구조도 함께 포함됩니다) WORKDIR /app +COPY . . -# 1. 의존성 설치 -COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt - -RUN apt-get update && apt-get install -y tar - -# 2. gdown 설치 -RUN pip install gdown beautifulsoup4 - -# 3. Google Drive에서 tar.gz 파일 다운로드 → data/ 안에 저장 -RUN mkdir -p data && \ +# 3. Google Drive에서 tar.gz 다운로드 → data/ 안에 저장·압축 해제 +RUN mkdir -p app/data && \ gdown --id 1ZKbW2ky1Fjz7-DFXh1wKiBsEkOI73IaX \ - -O app/services/papers_service/data/paper_db.tar.gz - + -O app/data/paper_db.tar.gz && \ + tar -xzf app/data/paper_db.tar.gz \ + -C app/data && \ + rm app/data/paper_db.tar.gz -# 2. data/ 폴더 안에서 압축 해제 + 압축 파일 삭제 -RUN tar -xzf data/paper_db.tar.gz -C data/ && \ - rm data/paper_db.tar.gz -# 4. 코드 복사 (덮어쓰지 않도록 이후에) -COPY papers_service.py ./ +# 4. 파이썬 의존성 설치 +RUN pip3 install --no-cache-dir -r requirements.txt # 경로는 필요에 맞게 # 5. 앱 실행 CMD ["uvicorn", "papers_service:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file From 4d513cad367da93858a116fbd911a57752cd43b0 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Mon, 16 Jun 2025 13:17:08 +0000 Subject: [PATCH 31/35] =?UTF-8?q?api(paper=5Fservice):=20tar.gz=EB=A1=9C?= =?UTF-8?q?=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/services/papers_service/Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/services/papers_service/Dockerfile b/app/services/papers_service/Dockerfile index 5681f2e..03d2255 100644 --- a/app/services/papers_service/Dockerfile +++ b/app/services/papers_service/Dockerfile @@ -12,13 +12,12 @@ COPY . . # 3. Google Drive에서 tar.gz 다운로드 → data/ 안에 저장·압축 해제 RUN mkdir -p app/data && \ - gdown --id 1ZKbW2ky1Fjz7-DFXh1wKiBsEkOI73IaX \ + gdown --id 13KL8qntiJlyxc3ObqjB7t8NEXoRyYyXX \ -O app/data/paper_db.tar.gz && \ tar -xzf app/data/paper_db.tar.gz \ -C app/data && \ rm app/data/paper_db.tar.gz - # 4. 파이썬 의존성 설치 RUN pip3 install --no-cache-dir -r requirements.txt # 경로는 필요에 맞게 From 1d82b8b67f50f4200a8050b37d0ee0843cfd799c Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Mon, 16 Jun 2025 14:00:40 +0000 Subject: [PATCH 32/35] goood --- app/services/papers_service/papers_service.py | 106 ++++++++---------- 1 file changed, 44 insertions(+), 62 deletions(-) diff --git a/app/services/papers_service/papers_service.py b/app/services/papers_service/papers_service.py index c325a39..d9faea1 100644 --- a/app/services/papers_service/papers_service.py +++ b/app/services/papers_service/papers_service.py @@ -1,7 +1,7 @@ -# papers_service/main.py -import os, json, random -import asyncio -from typing import List, Optional, Dict +# papers_service/main.py + +import os, json, random, asyncio +from typing import List, Dict, Optional from fastapi import FastAPI, Query, HTTPException from pydantic import BaseModel @@ -15,41 +15,41 @@ class Author(BaseModel): class Paper(BaseModel): paper_id: str - abstract: Optional[str] - title: Optional[str] - url: Optional[str] - venue: Optional[str] - year: Optional[int] - reference_count: Optional[int] - citation_count: Optional[int] + title: Optional[str] + abstract: Optional[str] + url: Optional[str] + venue: Optional[str] + year: Optional[int] + reference_count: Optional[int] + citation_count: Optional[int] influentialCitationCount: Optional[int] - fieldsOfStudy: Optional[List[str]] - tldr: Optional[str] - authors: List[Author] - sim_score: float + fieldsOfStudy: Optional[List[str]] + tldr: Optional[str] + authors: List[Author] + sim_score: float class PapersResponse(BaseModel): total_results: int - max_display: int - page: int - page_size: int - papers: List[Paper] + max_display: int + page: int + page_size: int + papers: List[Paper] # ───────── Data Load ───────── BASE_DIR = os.path.join(os.path.dirname(__file__), "data") -def safe_load(fname): +def safe_load(fname: str) -> dict: path = os.path.join(BASE_DIR, fname) try: with open(path, encoding="utf-8") as f: return json.load(f) except FileNotFoundError: - print(f"⚠️ {fname} not found → 빈 dict 사용") + print(f"⚠️ {fname} not found → 빈 dict 사용") return {} -paper_db: Dict[str, dict] = safe_load("paper_db.json") # ← 최근에 만든 DB -kw2pids: Dict[str, List[str]] = safe_load("kw2pids.json") -save_lock = asyncio.Lock() +paper_db: Dict[str, dict] = safe_load("paper_db.json") +kw2pids: Dict[str, List[str]] = safe_load("kw2pids.json") +save_lock = asyncio.Lock() # 파일 캐시 동시 접근 보호 # ────────────── Helper ────────────── def build_paper(pid: str) -> Paper: @@ -65,7 +65,7 @@ def build_paper(pid: str) -> Paper: citation_count = e.get("citationCount"), influentialCitationCount = e.get("influentialCitationCount"), fieldsOfStudy = e.get("fieldsOfStudy"), - tldr = e.get("tldr", {}).get("text") if entry.get("tldr") else None, + tldr = e.get("tldr", {}).get("text") if e.get("tldr") else None, authors = [Author(name=a["name"]) for a in e.get("authors", [])], sim_score = random.uniform(0, 1), # stub @@ -85,20 +85,19 @@ async def build_papers(ids: List[str]) -> List[Paper]: # ───────── API ───────── @app.get("/papers", response_model=PapersResponse) -async def get_papers_by_keyword( - kw: str = Query(..., description="검색 키워드"), +async def get_papers( + root: str = Query(..., description="검색 루트(처음 입력)"), + kw: str = Query(..., description="사용자가 선택한 키워드"), page: int = Query(1, ge=1), page_size: int = Query(20, ge=1, le=100), ): - # ① kw2pids 확보 (없으면 Graph Service 호출) - ids = await ensure_kw2pids(kw) + ids = await ensure_kw2pids(root, kw) if not ids: - raise HTTPException(404, f"Keyword '{kw}' not found.") + raise HTTPException(404, f"Keyword '{kw}' not found") - # ② pagination sliced, total = paginate(ids, page, page_size) - # ③ paper_db에 있는 것만 반환 + # paper_db 에 존재하는 PID 만 반환 papers = [build_paper(pid) for pid in sliced if pid in paper_db] return PapersResponse( @@ -109,46 +108,29 @@ async def get_papers_by_keyword( papers = papers, ) - -@app.get("/papers/random", response_model=PapersResponse) -def get_random_papers( - page: int = Query(1, ge=1), - page_size: int = Query(20, ge=1, le=100), -): - sample_ids = random.sample(list(paper_db.keys()), - k=min(40, len(paper_db))) - sliced, total = paginate(sample_ids, page, page_size) - return PapersResponse( - total_results = total, - max_display = len(sliced), - page = page, - page_size = page_size, - papers = [build_paper(pid) for pid in sliced], - ) - - GRAPH_BASE = os.getenv("GRAPH_URL", "http://graph-service:8002") -async def ensure_kw2pids(keyword: str) -> List[str]: +async def ensure_kw2pids(root: str, keyword: str, + top1: int = 5, top2: int = 3) -> List[str]: + """keyword 가 캐시에 없으면 graph-service 를 호출해 kw2pids 갱신""" if keyword in kw2pids: return kw2pids[keyword] - import httpx async with httpx.AsyncClient() as client: - resp = await client.post(f"{GRAPH_BASE}/graph", - json={"root": keyword, "top1": 5, "top2": 3}, - timeout=15) + resp = await client.post( + f"{GRAPH_BASE}/graph", + json={"root": root, "top1": top1, "top2": top2}, + timeout=15 + ) if resp.status_code != 200: - raise HTTPException(502, "Graph Service error") + raise HTTPException(502, "graph_service error") - data = resp.json() - kw2pids.update(data["kw2pids"]) + data = resp.json() # {keyword_tree:…, kw2pids:{…}} + kw2pids.update(data["kw2pids"]) # 여러 키워드 한 번에 캐시 + # 파일에도 저장 async with save_lock: with open(os.path.join(BASE_DIR, "kw2pids.json"), "w") as f: json.dump(kw2pids, f, ensure_ascii=False) - return kw2pids.get(keyword, []) - - - + return kw2pids.get(keyword, []) \ No newline at end of file From 36b4b6adb6f72d3bcbbb55cb05aa520baa6a77c8 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Mon, 16 Jun 2025 14:54:07 +0000 Subject: [PATCH 33/35] as --- app/runtime/api.py | 1 + app/runtime/graph_builder.py | 47 ++++++++++++++++++- app/runtime/requirements.txt | 3 +- app/services/papers_service/papers_service.py | 3 +- 4 files changed, 50 insertions(+), 4 deletions(-) diff --git a/app/runtime/api.py b/app/runtime/api.py index a10137d..47b7cd1 100644 --- a/app/runtime/api.py +++ b/app/runtime/api.py @@ -6,6 +6,7 @@ from runtime.cluster_searcher import search_clusters, cluster2pids, meta from runtime.graph_builder import build_tree + app = FastAPI(title="SearchForest-AI Recommend API") diff --git a/app/runtime/graph_builder.py b/app/runtime/graph_builder.py index d6bc7df..ec7e99e 100644 --- a/app/runtime/graph_builder.py +++ b/app/runtime/graph_builder.py @@ -9,7 +9,7 @@ import torch from runtime.cluster_searcher import meta, cluster2pids # ← meta 와 함께 추가로 import - +kw2pids: dict[str, list[str]] = {} # keyword → [paper_ids] # ── 전역 설정 ─────────────────────────────────────────── @@ -121,6 +121,19 @@ def contains_kw(abs_txt: str, kw: str) -> bool: max_features=40_000, ) +# from sklearn.feature_extraction.text import TfidfVectorizer + +# # 전처리한 phrase 문서를 그대로 feeding +# vectorizer = TfidfVectorizer( +# tokenizer=lambda s: s, preprocessor=lambda s: s, lowercase=False, +# ngram_range=(1,3), min_df=5, max_df=0.8 +# ) +# tfidf_mat = vectorizer.fit_transform(docs) # shape (N_docs, N_terms) +# idf = vectorizer.idf_ +# vocab = vectorizer.vocabulary_ # dict{phrase: idx} + + + def top_keywords(pids, n=8): docs = [_as_text(G.nodes[p].get("abstract", "")).lower() for p in pids if G.has_node(p)] @@ -156,7 +169,17 @@ def build_tree(root_kw: str, cid: int, depth: int = 1): )) pids_lvl0 = cluster2pids[cid] - tree = {"id": root_kw, "value": 1.0, "children": []} + + # ① root 에도 pids 부여 + tree = { + "id": root_kw, + "value": 1.0, + "pids": pids_lvl0, # ★ 추가 + "children": [] + } + + # kw2pids 전역 캐시에도 root 등록 + kw2pids[root_kw] = pids_lvl0 # ── depth-1 (최대 3개) ────────────────────── for kw1, sc1 in select_kw_scored(root_kw, cand, tfidf_dict, k=3): @@ -172,16 +195,36 @@ def build_tree(root_kw: str, cid: int, depth: int = 1): "id": kw1, "value": round(sc1, 4), "pids": hop1, # 필요 없으면 제거 + "children": [] } + kw2pids[kw1] = hop1 + # ── depth-2 : parent=kw1, 최대 3개 ─────── if depth > 1: for kw2, sc2 in select_kw_scored(kw1, cand, tfidf_dict, k=3): + # node1["children"].append({ + # "id": kw2, + # "value": round(sc2, 4), + # }) + # hop-2 pids (root → kw1 → kw2) + kw2_emb = model.encode([kw2], + normalize_embeddings=True)[0] + hop2 = [ + p for p in hop1 + if (emb := get_abs_emb(p)) is not None + and util.cos_sim(kw2_emb, emb).item() > COS_TH2 + ] + node1["children"].append({ "id": kw2, "value": round(sc2, 4), + "pids": hop2, }) + # kw2pids에 2-depth 저장 + kw2pids[kw2] = hop2 + tree["children"].append(node1) return tree diff --git a/app/runtime/requirements.txt b/app/runtime/requirements.txt index 60b8699..4be3dea 100644 --- a/app/runtime/requirements.txt +++ b/app/runtime/requirements.txt @@ -7,4 +7,5 @@ networkx tqdm fastapi uvicorn -pydantic \ No newline at end of file +pydantic +spacy \ No newline at end of file diff --git a/app/services/papers_service/papers_service.py b/app/services/papers_service/papers_service.py index d9faea1..1450f44 100644 --- a/app/services/papers_service/papers_service.py +++ b/app/services/papers_service/papers_service.py @@ -133,4 +133,5 @@ async def ensure_kw2pids(root: str, keyword: str, with open(os.path.join(BASE_DIR, "kw2pids.json"), "w") as f: json.dump(kw2pids, f, ensure_ascii=False) - return kw2pids.get(keyword, []) \ No newline at end of file + return kw2pids.get(keyword, []) + \ No newline at end of file From 75b9871f7fb92f5c85e64246eb448bd0f7783068 Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Mon, 16 Jun 2025 15:04:12 +0000 Subject: [PATCH 34/35] tldr --- app/services/graph_service/graph_service.py | 1 + app/services/papers_service/papers_service.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/app/services/graph_service/graph_service.py b/app/services/graph_service/graph_service.py index 054837a..63eb2b4 100644 --- a/app/services/graph_service/graph_service.py +++ b/app/services/graph_service/graph_service.py @@ -102,6 +102,7 @@ async def build_graph(req: GraphRequest): keyword_tree, kw2pids = await fetch_from_ai_and_cache( req.root, req.top1, req.top2 ) + return {"keyword_tree": keyword_tree, "kw2pids": kw2pids} # ✨ diff --git a/app/services/papers_service/papers_service.py b/app/services/papers_service/papers_service.py index 1450f44..478e3f8 100644 --- a/app/services/papers_service/papers_service.py +++ b/app/services/papers_service/papers_service.py @@ -65,7 +65,7 @@ def build_paper(pid: str) -> Paper: citation_count = e.get("citationCount"), influentialCitationCount = e.get("influentialCitationCount"), fieldsOfStudy = e.get("fieldsOfStudy"), - tldr = e.get("tldr", {}).get("text") if e.get("tldr") else None, + tldr = e.get("tldr", {}).get("text"), authors = [Author(name=a["name"]) for a in e.get("authors", [])], sim_score = random.uniform(0, 1), # stub From a95c7a29e5451e30b1044327a933eefa4800298d Mon Sep 17 00:00:00 2001 From: hyun-hyang Date: Mon, 16 Jun 2025 15:11:43 +0000 Subject: [PATCH 35/35] finish --- app/services/papers_service/papers_service.py | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/app/services/papers_service/papers_service.py b/app/services/papers_service/papers_service.py index 478e3f8..16b7d91 100644 --- a/app/services/papers_service/papers_service.py +++ b/app/services/papers_service/papers_service.py @@ -65,7 +65,7 @@ def build_paper(pid: str) -> Paper: citation_count = e.get("citationCount"), influentialCitationCount = e.get("influentialCitationCount"), fieldsOfStudy = e.get("fieldsOfStudy"), - tldr = e.get("tldr", {}).get("text"), + tldr = (e.get("tldr") or {}).get("text"), # ← 수정 authors = [Author(name=a["name"]) for a in e.get("authors", [])], sim_score = random.uniform(0, 1), # stub @@ -119,19 +119,24 @@ async def ensure_kw2pids(root: str, keyword: str, async with httpx.AsyncClient() as client: resp = await client.post( f"{GRAPH_BASE}/graph", - json={"root": root, "top1": top1, "top2": top2}, + json={"root": keyword, "top1": top1, "top2": top2}, timeout=15 ) if resp.status_code != 200: raise HTTPException(502, "graph_service error") - data = resp.json() # {keyword_tree:…, kw2pids:{…}} - kw2pids.update(data["kw2pids"]) # 여러 키워드 한 번에 캐시 + data = resp.json() + kw2pids.update(data["kw2pids"]) - # 파일에도 저장 - async with save_lock: - with open(os.path.join(BASE_DIR, "kw2pids.json"), "w") as f: - json.dump(kw2pids, f, ensure_ascii=False) + # 🔽 파일 캐시 저장 시도 → 읽기 전용이면 무시 + try: + async with save_lock: + path = os.path.join(BASE_DIR, "kw2pids.json") + with open(path, "w") as f: + json.dump(kw2pids, f, ensure_ascii=False) + except OSError: + # read-only · 컨테이너 환경에선 무시하고 넘어감 + pass return kw2pids.get(keyword, []) \ No newline at end of file