Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
12cba7e
fix: update graph_service files
hyun-hyang Jun 12, 2025
4e75978
fix: update graph_service files
hyun-hyang Jun 12, 2025
b5cbe2e
fix: update graph_service files
hyun-hyang Jun 12, 2025
67c6665
s
hyun-hyang Jun 12, 2025
085d52f
s
hyun-hyang Jun 12, 2025
3b79d27
fix: update graph_service files
hyun-hyang Jun 12, 2025
78cbb75
s
hyun-hyang Jun 12, 2025
b0e0463
hi
hyun-hyang Jun 12, 2025
4195a90
.
hyun-hyang Jun 12, 2025
b9d14b8
s
hyun-hyang Jun 12, 2025
ccd8a35
s
hyun-hyang Jun 12, 2025
f07ba06
s
hyun-hyang Jun 12, 2025
af76635
s
hyun-hyang Jun 12, 2025
cd80170
s
hyun-hyang Jun 12, 2025
47cdcf3
s
hyun-hyang Jun 12, 2025
be1c604
S
hyun-hyang Jun 12, 2025
17034e8
s
hyun-hyang Jun 12, 2025
b006898
s
hyun-hyang Jun 12, 2025
daa25eb
s
hyun-hyang Jun 12, 2025
d212dde
롤백 가보자
hyun-hyang Jun 12, 2025
4601f4c
Merge remote-tracking branch 'origin/model' into model
hyun-hyang Jun 12, 2025
2f5d44b
ignore
hyun-hyang Jun 13, 2025
415da91
im back
hyun-hyang Jun 13, 2025
c69cae0
Merge branches 'model' and 'model' of https://github.com/soopsong/sea…
hyun-hyang Jun 13, 2025
4525bbd
s
hyun-hyang Jun 13, 2025
43032d7
feat: safe parfe
hyun-hyang Jun 13, 2025
81f608d
?
hyun-hyang Jun 16, 2025
888ec98
docker: graph_service 및 compose정리 완료
hyun-hyang Jun 16, 2025
c8c6153
working: papers_service
hyun-hyang Jun 16, 2025
6bee4bd
api:paper_service
hyun-hyang Jun 16, 2025
843493c
db: paper_db
hyun-hyang Jun 16, 2025
ebff14a
api:papers_service: db docker
hyun-hyang Jun 16, 2025
4d513ca
api(paper_service): tar.gz로 변경
hyun-hyang Jun 16, 2025
1d82b8b
goood
hyun-hyang Jun 16, 2025
36b4b6a
as
hyun-hyang Jun 16, 2025
75b9871
tldr
hyun-hyang Jun 16, 2025
a95c7a2
finish
hyun-hyang Jun 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ app/model/model_save/*
#data
app/services/graph_service/data/indices/paper_ivf.index
app/services/graph_service/data/indices
app/services/papers_service/data

app/indices
app/routers/previous/output/summaries_dir
Expand All @@ -25,11 +26,17 @@ app/data
app/routers/output/summaries_dir/*




#logs
app/scripts/logs
app/scripts/pyrouge_root
app/scripts/running_logs

app/services/old



#etc
app/etc/*

Expand Down Expand Up @@ -58,6 +65,11 @@ share/python-wheels/
*.egg
MANIFEST


app/data/
app/model
app/tools/

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
Expand Down Expand Up @@ -206,3 +218,4 @@ cython_debug/
.pypirc
app/data/inductive/train.jsonl
app/routers/output/summaries_dir/
app/services/papers_service/data/paper_db.json
File renamed without changes.
16 changes: 9 additions & 7 deletions app/runtime/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,23 @@ ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
NVIDIA_VISIBLE_DEVICES=all

WORKDIR /app

# 필수 패키지
RUN apt-get update && apt-get install -y \
python3-pip python3-dev git && \
rm -rf /var/lib/apt/lists/*

# 프로젝트 소스 복사
WORKDIR /workspace
COPY .. /workspace
# 1) requirements만 먼저 복사 → 레이어 캐시
COPY runtime/requirements.txt ./requirements.txt
RUN pip install --no-cache-dir --upgrade pip \
&& pip install --no-cache-dir -r requirements.txt

# 의존성(인퍼런스 전용)
RUN pip3 install --upgrade pip \
&& pip3 install -r app/runtime/requirements.txt
# 2) 실제 런타임 코드 복사
COPY runtime/ ./runtime/

# 지표·인덱스(volume 으로 붙여도 OK)
# COPY indices/ /workspace/indices

EXPOSE 8004
CMD ["uvicorn", "app.runtime.api:app", "--host", "0.0.0.0", "--port", "8004"]
CMD ["uvicorn", "runtime.api:app", "--host", "0.0.0.0", "--port", "8004"]
File renamed without changes.
8 changes: 2 additions & 6 deletions app/runtime/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from runtime.cluster_searcher import search_clusters, cluster2pids, meta
from runtime.graph_builder import build_tree


app = FastAPI(title="SearchForest-AI Recommend API")


Expand All @@ -32,7 +33,7 @@ class RecResponse(BaseModel):
@app.get("/inference", response_model=RecResponse)
def recommend(
query: str = Query(..., description="검색 쿼리"),
top_k: int = Query(5, gt=1, le=10) # default 5
top_k: int = Query(10, gt=1, le=10) # default 10
):
# 1) 쿼리 기준 top-k 클러스터
hits = search_clusters(query, top_k)
Expand All @@ -46,11 +47,6 @@ def recommend(
root["children"].append(cluster_node)
return {"results": root }






# 로컬 실행용
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8004)
Expand Down
47 changes: 45 additions & 2 deletions app/runtime/graph_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import torch
from runtime.cluster_searcher import meta, cluster2pids # ← meta 와 함께 추가로 import


kw2pids: dict[str, list[str]] = {} # keyword → [paper_ids]


# ── 전역 설정 ───────────────────────────────────────────
Expand Down Expand Up @@ -121,6 +121,19 @@ def contains_kw(abs_txt: str, kw: str) -> bool:
max_features=40_000,
)

# from sklearn.feature_extraction.text import TfidfVectorizer

# # 전처리한 phrase 문서를 그대로 feeding
# vectorizer = TfidfVectorizer(
# tokenizer=lambda s: s, preprocessor=lambda s: s, lowercase=False,
# ngram_range=(1,3), min_df=5, max_df=0.8
# )
# tfidf_mat = vectorizer.fit_transform(docs) # shape (N_docs, N_terms)
# idf = vectorizer.idf_
# vocab = vectorizer.vocabulary_ # dict{phrase: idx}



def top_keywords(pids, n=8):
docs = [_as_text(G.nodes[p].get("abstract", "")).lower()
for p in pids if G.has_node(p)]
Expand Down Expand Up @@ -156,7 +169,17 @@ def build_tree(root_kw: str, cid: int, depth: int = 1):
))
pids_lvl0 = cluster2pids[cid]

tree = {"id": root_kw, "value": 1.0, "children": []}

# ① root 에도 pids 부여
tree = {
"id": root_kw,
"value": 1.0,
"pids": pids_lvl0, # ★ 추가
"children": []
}

# kw2pids 전역 캐시에도 root 등록
kw2pids[root_kw] = pids_lvl0

# ── depth-1 (최대 3개) ──────────────────────
for kw1, sc1 in select_kw_scored(root_kw, cand, tfidf_dict, k=3):
Expand All @@ -172,16 +195,36 @@ def build_tree(root_kw: str, cid: int, depth: int = 1):
"id": kw1,
"value": round(sc1, 4),
"pids": hop1, # 필요 없으면 제거
"children": []
}

kw2pids[kw1] = hop1

# ── depth-2 : parent=kw1, 최대 3개 ───────
if depth > 1:
for kw2, sc2 in select_kw_scored(kw1, cand, tfidf_dict, k=3):
# node1["children"].append({
# "id": kw2,
# "value": round(sc2, 4),
# })
# hop-2 pids (root → kw1 → kw2)
kw2_emb = model.encode([kw2],
normalize_embeddings=True)[0]
hop2 = [
p for p in hop1
if (emb := get_abs_emb(p)) is not None
and util.cos_sim(kw2_emb, emb).item() > COS_TH2
]

node1["children"].append({
"id": kw2,
"value": round(sc2, 4),
"pids": hop2,
})

# kw2pids에 2-depth 저장
kw2pids[kw2] = hop2

tree["children"].append(node1)

return tree
6 changes: 5 additions & 1 deletion app/runtime/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,8 @@ torch
sentence-transformers
faiss-gpu
networkx
tqdm
tqdm
fastapi
uvicorn
pydantic
spacy
2 changes: 1 addition & 1 deletion app/runtime/run_uvicorn.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/bin/bash

uvicorn runtime.api:app --reload --port 8004
uvicorn runtime.api:app --host 0.0.0.0 --port 8004 --reload
17 changes: 17 additions & 0 deletions app/services/get_meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import aiohttp, asyncio

S2_URL = "https://api.semanticscholar.org/graph/v1/paper/{}"
FIELDS = "title,abstract,year,venue,referenceCount,citationCount," \
"influentialCitationCount,fieldsOfStudy,authors,tldr"

async def fetch_paper(session, pid):
url = S2_URL.format(pid)
params = {"fields": FIELDS}
async with session.get(url, params=params, timeout=10) as r:
r.raise_for_status()
return await r.json()

async def fetch_many(pids):
async with aiohttp.ClientSession() as session:
tasks = [fetch_paper(session, pid) for pid in pids]
return await asyncio.gather(*tasks)
3 changes: 1 addition & 2 deletions app/services/graph_service/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
FROM python:3.9-slim

WORKDIR /app
COPY requirements.txt .
COPY . .
RUN pip install --no-cache-dir -r requirements.txt

COPY . .


# FastAPI Uvicorn 실행
Expand Down
115 changes: 0 additions & 115 deletions app/services/graph_service/dummy_data.py

This file was deleted.

Loading