From 67b6eca671b0a80737d19337d8f1b2588820f506 Mon Sep 17 00:00:00 2001 From: Xujie Han <1498126985@qq.com> Date: Tue, 2 Dec 2025 17:58:02 +0800 Subject: [PATCH 1/6] : health test --- backend/app/api/v1/__init__.py | 1 + backend/tests/test_health.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 backend/tests/test_health.py diff --git a/backend/app/api/v1/__init__.py b/backend/app/api/v1/__init__.py index 2cd794b..ca8e43f 100644 --- a/backend/app/api/v1/__init__.py +++ b/backend/app/api/v1/__init__.py @@ -17,3 +17,4 @@ api_router.include_router(library.router, prefix="/library", tags=["library"]) + \ No newline at end of file diff --git a/backend/tests/test_health.py b/backend/tests/test_health.py new file mode 100644 index 0000000..2fd899e --- /dev/null +++ b/backend/tests/test_health.py @@ -0,0 +1,15 @@ +"""Integration tests for the health check endpoint.""" +from __future__ import annotations + +import pytest +from httpx import AsyncClient + +pytestmark = pytest.mark.asyncio + + +async def test_health_endpoint_returns_ok(async_client: AsyncClient) -> None: + response = await async_client.get("/api/v1/health") + + assert response.status_code == 200 + assert response.json() == {"status": "ok"} + From d719fcdb291e1281513a5538180ce6dd21642231 Mon Sep 17 00:00:00 2001 From: Xujie Han <1498126985@qq.com> Date: Wed, 3 Dec 2025 11:26:58 +0800 Subject: [PATCH 2/6] : paper test --- backend/app/api/v1/endpoints/papers.py | 2 + backend/tests/test_papers.py | 189 +++++++++++++++++++++++++ 2 files changed, 191 insertions(+) create mode 100644 backend/tests/test_papers.py diff --git a/backend/app/api/v1/endpoints/papers.py b/backend/app/api/v1/endpoints/papers.py index c48d4fa..156f941 100644 --- a/backend/app/api/v1/endpoints/papers.py +++ b/backend/app/api/v1/endpoints/papers.py @@ -182,6 +182,8 @@ async def upload_paper( original_display_name = normalize_original_filename(file.filename) storage_candidate = sanitize_storage_filename(original_display_name) + # Ensure the upload directory exists even if MEDIA_ROOT is cleaned between tests. + UPLOAD_DIR.mkdir(parents=True, exist_ok=True) stored_filename, destination = ensure_unique_storage_name(UPLOAD_DIR, storage_candidate) await asyncio.to_thread(destination.write_bytes, cleaned_bytes) diff --git a/backend/tests/test_papers.py b/backend/tests/test_papers.py new file mode 100644 index 0000000..2ff7f29 --- /dev/null +++ b/backend/tests/test_papers.py @@ -0,0 +1,189 @@ +"""Integration tests for paper library (upload/list/import) endpoints.""" +from __future__ import annotations + +import os +from pathlib import Path + +import pytest +from httpx import AsyncClient + +from app.api.v1.endpoints import papers as papers_endpoint + +pytestmark = pytest.mark.asyncio + +MEDIA_ROOT = Path(os.environ["MEDIA_ROOT"]) + + +async def _register_user(client: AsyncClient, email: str, password: str) -> None: + payload: dict[str, str] = {"email": email, "password": password} + response = await client.post("/api/v1/users", json=payload) + assert response.status_code == 201 + + +async def _login(client: AsyncClient, email: str, password: str) -> str: + response = await client.post( + "/api/v1/auth/token", + data={"username": email, "password": password}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + assert response.status_code == 200 + return response.json()["access_token"] + + +def _auth_headers(token: str) -> dict[str, str]: + return {"Authorization": f"Bearer {token}"} + + +async def test_search_papers_returns_not_implemented(async_client: AsyncClient) -> None: + response = await async_client.get("/api/v1/papers", params={"q": "test"}) + + assert response.status_code == 501 + assert response.json()["detail"] == "Search service not yet available" + + +async def test_get_paper_returns_not_implemented(async_client: AsyncClient) -> None: + response = await async_client.get("/api/v1/papers/1") + + assert response.status_code == 501 + assert response.json()["detail"] == "Paper retrieval not yet available" + + +async def test_list_uploaded_papers_empty(async_client: AsyncClient) -> None: + await _register_user(async_client, "papers-empty@example.com", "StrongPass1") + token = await _login(async_client, "papers-empty@example.com", "StrongPass1") + + response = await async_client.get( + "/api/v1/papers/uploads", + headers=_auth_headers(token), + ) + + assert response.status_code == 200 + data = response.json() + assert data["items"] == [] + assert data["total"] == 0 + assert data["page"] == 1 + assert data["page_size"] == 12 + assert data["total_pages"] == 0 + + +async def test_upload_pdf_and_list_back(async_client: AsyncClient) -> None: + await _register_user(async_client, "papers-upload@example.com", "StrongPass2") + token = await _login(async_client, "papers-upload@example.com", "StrongPass2") + + pdf_bytes = b"%PDF-1.4\ncontent" + upload_response = await async_client.post( + "/api/v1/papers/upload", + headers=_auth_headers(token), + files={"file": ("sample.pdf", pdf_bytes, "application/pdf")}, + ) + + assert upload_response.status_code == 201 + uploaded = upload_response.json() + assert uploaded["original_filename"] == "sample.pdf" + assert uploaded["content_type"] == "application/pdf" + assert uploaded["file_size"] == len(pdf_bytes) + assert isinstance(uploaded["id"], int) + assert isinstance(uploaded["file_url"], str) and uploaded["file_url"].startswith("/media/uploads/") + assert uploaded["file_hash"] + + stored_filename = uploaded["file_url"].rsplit("/", 1)[-1] + stored_path = MEDIA_ROOT / "uploads" / stored_filename + assert stored_path.is_file() + + list_response = await async_client.get( + "/api/v1/papers/uploads", + headers=_auth_headers(token), + ) + + assert list_response.status_code == 200 + listing = list_response.json() + assert listing["total"] == 1 + assert listing["page"] == 1 + assert listing["total_pages"] == 1 + assert len(listing["items"]) == 1 + assert listing["items"][0]["id"] == uploaded["id"] + + +async def test_upload_rejects_non_pdf(async_client: AsyncClient) -> None: + await _register_user(async_client, "papers-non-pdf@example.com", "StrongPass3") + token = await _login(async_client, "papers-non-pdf@example.com", "StrongPass3") + + response = await async_client.post( + "/api/v1/papers/upload", + headers=_auth_headers(token), + files={"file": ("notes.txt", b"hello", "text/plain")}, + ) + + assert response.status_code == 400 + assert response.json()["detail"] == "仅支持上传 PDF 文件" + + +async def test_upload_rejects_oversized_pdf(async_client: AsyncClient) -> None: + await _register_user(async_client, "papers-large@example.com", "StrongPass4") + token = await _login(async_client, "papers-large@example.com", "StrongPass4") + + max_bytes = papers_endpoint.MAX_UPLOAD_BYTES + oversized = b"%PDF-1.4\n" + b"0" * (max_bytes + 1) + + response = await async_client.post( + "/api/v1/papers/upload", + headers=_auth_headers(token), + files={"file": ("large.pdf", oversized, "application/pdf")}, + ) + + assert response.status_code == 413 + assert response.json()["detail"] == "文件体积超过 25MB 限制" + + +async def test_get_uploaded_paper_detail_requires_auth(async_client: AsyncClient) -> None: + response = await async_client.get("/api/v1/papers/uploads/1") + + assert response.status_code == 401 + + +async def test_get_uploaded_paper_detail_not_found(async_client: AsyncClient) -> None: + await _register_user(async_client, "papers-detail@example.com", "StrongPass5") + token = await _login(async_client, "papers-detail@example.com", "StrongPass5") + + response = await async_client.get( + "/api/v1/papers/uploads/999", + headers=_auth_headers(token), + ) + + assert response.status_code == 404 + assert response.json()["detail"] == "未找到对应的上传文件" + + +async def test_import_paper_creates_uploaded_placeholder(async_client: AsyncClient) -> None: + await _register_user(async_client, "papers-import@example.com", "StrongPass6") + token = await _login(async_client, "papers-import@example.com", "StrongPass6") + + payload = { + "title": "Test Import Paper", + "abstract": "A paper about testing import.", + "doi": "10.1234/example", + "arxiv_id": None, + "pdf_url": None, + "folder_id": None, + } + + response = await async_client.post( + "/api/v1/papers/import", + headers=_auth_headers(token), + json=payload, + ) + + assert response.status_code == 201 + data = response.json() + + paper = data["paper"] + uploaded = data["uploaded"] + assert paper["title"] == payload["title"] + assert paper["abstract"] == payload["abstract"] + assert uploaded is not None + assert uploaded["original_filename"] == "Test Import Paper.pdf" + assert uploaded["file_size"] == 0 + assert uploaded["file_hash"] is None + assert isinstance(uploaded["file_url"], str) + assert uploaded["file_url"].startswith("#") + From b8625bb2f8d38a0ba6a97de7a15fc38d3a698690 Mon Sep 17 00:00:00 2001 From: Xujie Han <1498126985@qq.com> Date: Wed, 3 Dec 2025 11:30:19 +0800 Subject: [PATCH 3/6] : acdemic search test --- backend/tests/test_academic.py | 135 +++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 backend/tests/test_academic.py diff --git a/backend/tests/test_academic.py b/backend/tests/test_academic.py new file mode 100644 index 0000000..3c23681 --- /dev/null +++ b/backend/tests/test_academic.py @@ -0,0 +1,135 @@ +"""Tests for academic search endpoints.""" +from __future__ import annotations + +import pytest +from httpx import AsyncClient + +from app.schemas.academic import ( + ChatMessage, + IntelligentAnswer, + IntelligentSearchRequest, + IntelligentSearchResponse, + Paper, + PaperSearchResponse, + QueryInterpretation, +) + +pytestmark = pytest.mark.asyncio + + +class DummyAcademicSearchService: + async def search(self, request, sources=None): + assert request.query == "transformer" + papers = [ + { + "id": "p1", + "title": "Attention Is All You Need", + "authors": ["Vaswani et al."], + "abstract": "We propose the Transformer...", + "year": 2017, + "venue": "NIPS", + "url": "https://example.com/p1", + "citation_count": 12345, + "source": "semantic_scholar", + }, + ] + return { + "papers": papers, + "total": 1, + "sources_used": sources or ["semantic_scholar"], + "source_errors": None, + "hits_per_source": {"semantic_scholar": 1}, + } + + +class DummyIntelligentSearchService: + async def search(self, request: IntelligentSearchRequest) -> IntelligentSearchResponse: + paper = Paper( + id="p2", + title="Survey of Transformers", + authors=["Alice"], + abstract="A survey on transformers.", + year=2020, + venue="ICLR", + url="https://example.com/p2", + citation_count=100, + source="semantic_scholar", + ) + interpretation = QueryInterpretation( + original_query=request.query, + intent="search", + needs_search=True, + ) + answer = IntelligentAnswer( + response="This is a dummy intelligent answer.", + ) + return IntelligentSearchResponse( + papers=[paper], + total=1, + interpretation=interpretation, + answer=answer, + conversation=[ChatMessage(role="user", content=request.query)], + search_performed=True, + search_reason="dummy", + ) + + +async def test_academic_search_uses_service(monkeypatch: pytest.MonkeyPatch, async_client: AsyncClient) -> None: + from app.api.v1.endpoints import academic as academic_endpoint + + monkeypatch.setattr(academic_endpoint, "AcademicSearchService", lambda: DummyAcademicSearchService()) + + payload = {"query": "transformer", "limit": 10} + + response = await async_client.post("/api/v1/academic/search", json=payload) + + assert response.status_code == 200 + data = PaperSearchResponse(**response.json()) + assert data.total == 1 + assert len(data.papers) == 1 + assert data.papers[0].title == "Attention Is All You Need" + assert data.sources_used == ["semantic_scholar"] + assert data.source_errors is None + + +async def test_intelligent_search_returns_plain_response_without_auth( + monkeypatch: pytest.MonkeyPatch, + async_client: AsyncClient, +) -> None: + from app.api.v1.endpoints import academic as academic_endpoint + + monkeypatch.setattr(academic_endpoint, "IntelligentSearchService", lambda: DummyIntelligentSearchService()) + + payload = {"query": "large language models", "limit": 5} + + response = await async_client.post("/api/v1/academic/intelligent-search", json=payload) + + assert response.status_code == 200 + data = IntelligentSearchResponse(**response.json()) + assert data.total == 1 + assert len(data.papers) == 1 + assert data.papers[0].title == "Survey of Transformers" + assert data.answer.response == "This is a dummy intelligent answer." + # When user is anonymous, the endpoint should not add conversation_id. + assert data.conversation_id is None + + +async def test_intelligent_search_value_error_returns_400( + monkeypatch: pytest.MonkeyPatch, + async_client: AsyncClient, +) -> None: + from app.api.v1.endpoints import academic as academic_endpoint + + class FailingService: + async def search(self, request: IntelligentSearchRequest) -> IntelligentSearchResponse: # pragma: no cover - behavior via HTTP + raise ValueError("Invalid intelligent search request") + + monkeypatch.setattr(academic_endpoint, "IntelligentSearchService", lambda: FailingService()) + + payload = {"query": "", "limit": 5} + + response = await async_client.post("/api/v1/academic/intelligent-search", json=payload) + + assert response.status_code == 400 + assert response.json()["detail"] == "Invalid intelligent search request" + From c7e2f0f68883a6d46df50faf8a03044ad5070eb0 Mon Sep 17 00:00:00 2001 From: Xujie Han <1498126985@qq.com> Date: Wed, 3 Dec 2025 11:47:28 +0800 Subject: [PATCH 4/6] : acdemic search deep test --- backend/.coverage | Bin 69632 -> 69632 bytes backend/tests/test_academic.py | 243 ++++++++++++++++++++++++++ backend/tests/test_auth.py | 90 +++++++++- backend/tests/test_conversations.py | 176 +++++++++++++++++++ backend/tests/test_library.py | 253 ++++++++++++++++++++++++++++ backend/tests/test_notes.py | 170 +++++++++++++++++++ 6 files changed, 931 insertions(+), 1 deletion(-) create mode 100644 backend/tests/test_conversations.py create mode 100644 backend/tests/test_library.py create mode 100644 backend/tests/test_notes.py diff --git a/backend/.coverage b/backend/.coverage index 065f94426ee0c29d891514fbc8763e7025e8d680..e3997df6e97a6addba03a08e9cae70d7fe6eb5a1 100644 GIT binary patch literal 69632 zcmeI5d2kfhoyWUpdZxSQ=pJ22LP*nyLkJxsAwZnEB~AmzfP}M{@$DJ zIgoIL9wjmOHE4dXkN550`}8~e^-SM!`&w^Uk-7uHJ~=GaQ4WfxsU}IHD2j)_EcnwF z76Od60+vn*UoXg0)sI}Vi(4q0`4LLI$G%>yvi-e%w)GubAO8*Ov#iW}U;dMS`WON@|vZ zf%~L>B`9@!eTwAucX>T>*z50+!o7-ibR?v7p@t5&K~skgtj6(C$nEWdSYf3HB>IEi zJ~_Bk+M(>M#0RKn3oC=+_y!PByghz&gfvHuUM>ZdZY8MrJxVC<#T;)}xutyXkcG|9 z$)WctI8AcU1OKgQseuj2q^#-&Ax|LKg}m>snnclR^sUb&K zG#)GB_ud;(5*NQxO2(|j$?yj%C6oxr0kf>;{KYvm1-Vig4BhL4{+3&ggadfp4Hw^C z3m>+JvN$&1>7@7Fi!%zsl^Lx}*NmT3~3uakAq{_E!(R7%n_2`WWCpA_iEMFz!| z;ui;Ub84A!X)xp`uEchUG`U0Klc@5{7kOBn+>H_Vs?ug%mib@Ej z!4nuzf^v^it?rFi3xDa11X-t+H|MuIua|WaS-@SJ2wI|rW;Va5h~C5FY@}W-)v#$< zNV}2~RHNFiK1B{if?7rLd6lFVBUwvoWklTr$*E$^`OQVwOOAS+yH*WwQI=uz^YZ9u zyhNaS3H_PU5`nfPi@-Gv!5{B_zZ~wB#`}&ogX3i;bNp6~GaMz+w^WM7+*yVkN zn0_Br`smM?<}%unba{=kvuZ&0L2c46dxKD9p$?9>01`T|ONN8x&OjuL^DkP(*%P%r zNq^%4$#soA9951FhvK~<5DIyrSw+oOXo5g#s!;c%!imlkuS6iC zR0f4I1V!{3O-)qYi)t)Qzq%ioAk{v(D_-2uMM+V-uU8o-7jKQ) z)_rnF@&pyw32kY4nep?o0lHf5TJ(uV$sFp+MV&O8Ur|9vBWhuc*E8-AH0ph_=C-`dmD^-$qLZznaMAa*G@&}#e)sB(*6!Ol1Z?I^XcV=1?Tm$Z19g8yV81PB2_ zfDj-A2mwNX5Fi8y0YZQfAOr}3TaSQ|Hqa~@{~N>;lz0X{$U+DZ0)zk|KnM^5ga9Ex z2oM5<03kpK5CS(P0h_^CENV}9)HrBkQK9+_Kz()Xyy_YYq{<+kro_|Y$2VmHiPMAt zAwUQa0)zk|KnM^5ga9Ex2oM5<03nbdU^5oe+HC-X#aP6vcL7Xr1bY4-mMroefCMfQ zAOr{jLVyq;1PB2_fDj-A2mwNX5Fi9@Ap(v%1Ehu)M^Ku?xVeRrN0LGa5CVh%AwUQa z0)zk|KnM^5ga9Ex2;A%hgmxxsr(K&v@FR(?!N_*6(kuJn$5|u6km6beKLp&<8{VkM z=x2moow8?#;_q_F{r#@KK$qeRx!}i;gYa{S0{Q*Fn>`~C{|Ny?fDj-A2mwNX5Fi8y z0YZQfAOr}3&m~}_jg%dI|DP7GQ1G8Dga9Ex2oM5<03kpK5CVh%AwUQa0)zk|a0?PJ z8tpuq|L;>~MO-=O6n_MwWvBIyn=6 zl`A#uU>pq)uXN_vYGxQV!&j%6lI5_aVeW|WS64s0c=_;$tq?(7K8PK58VZ-e{`utt zSI=8vGe=#rQrmv|FPk^DNhV<_>?)}pPQHrurW>`64l%lp8(AiVa z_^E01jiZfk7$JImIgQSrSehuRNrbidR)h?e(fBqTUQS)W7JnFqsRC*_4vtcvu*n?| zvJHQR)@?TQX)YKYJp{f}RLwR><&ezT(qlof8F_$kJK53a#$;w zR(68q%n(Z<;C zV&++}Stz3UnP78;kr!e-KLZ8rBU@wjTqy*z*@eIUZ*DpYS+|9 zV&t9ZXd4`Du#`Y>3crQg8f)Z=(ed2K3vTN)2o;$8p~g@n%@)Cm!`w}?Q&E8VNX#yT z!0hakmJ+{{rH1QI_53v*9V&pUws&{&*fD0>3iG3BmkQCmwHNMC6Pu6lh+A?NPk}(# zcx>0m6S3o8HTY=59`2WaIJ5g79K)`U?}!~?hOfkq(5UA4XD7w2;YJ^KQY{qS)YK$# zc@Q$4$fcP`PjKDm;{PYqO1~ z4hWmGfr((4%VvRfEZtw2?|?u}*5N0gF~(v?E*?F#_UMVwK{{tYFT#%8l9wKT#%2~! zBnMG2RHIQh5-zU1~^fIUBEXR}XzUbb+OsB&wki zj`-p(Kp@Wwd7-jx#1(_;=@LzyV6V{Bs5svOo42=&9gdy&s05{P^fFELJE^nK7`~61 z7-|?c3}3{p!WkpceF%_E|8@Z5P+^EDXH)`2-uM>VP~EmFBP z7o4+LOknxg;p4lnmf&=b!Y67_M=?OOi7SO}8uV9CAK;c+fIzkxGQ;eKu^&gaHT2>H z_Iu1G*d|P&8yK*=^RC?e+0gD|zdd^K!-IP-A9?KD$5-DRd@$CtvgxgX-Qrh|AE+8$ zaPfiTBma8x(I5PL$KlIoj$K{$cSFZsZ)7?;^X2S;sx>l#FOH2@h9R-VnEy_#Dw39 zcZp96e-PdhMundW=Y$QyQel-46+*&2!kxk;;bGx<;Thq(!hS&!?iYN*Y@tAC5uCz& zp$hIbd{g+L_>TBX;rqg4;?Kp`g!jc)#R1_Lx739ti6I0C0YZQfAOr{jLVyq;1PB2_ z;0sOwjYa7N9oW>jV>7=Eo5iizENa1KRx>sWo3NSPh|SCfZ00V#%Xn!=|bho5mV!mbtK*UX4v@6*f{OHXRk%w9dt5 zS~)h(IoQl7!=_?3Hbt|r5oTgzpMecqij8GDHdYB6n-d$p1RJgx8`Csw%thERQ?bb| z#HP3a8!;an;}mQP^RQ{p#b#;_HU>QWr#}CTi93T2;B< z>l&zqH>`T$t*R^6GSMO(B3+#>e;};5gGzrOhDSciK zXf~{8qSGfuGZO9vz2&V;l-Hr>2?Q0F9M>pW!$b>pXmlz4aDf%S$E$=it>P6-G)spT z_9vuxB0+B$WxS=-WaT{cvSD;JmuzC96*@eFa*>)b(#yxx+n8vvKGA-;UkRqG72eK7 zXX#KISK6>PN_4?wDfTL|4`;ai$to0Xx7Y6tyWQaBlny2;>QA>E9P|!=Ox`Lcnx|hT z#i85`97>VNS#BKC;ZL9J^+R&9b-rX97bVED-QFHh$;zZarliH8vu%Rn6H-DUs8Ych zv4x4UI*5QsB<%H}9K8bCVV!lT_V)39UjXXkE;lYEAZgbLTGd0XCaBnS<{4E*bH=K( zd?jSy73^x#+#cD}i(IqltOZofKCfR1MgSK`ySE2AL6Ohw5~z*0yF1|P!monoGJ_Z1 z*{`6?!DeW)#T!F2lx5zv z88xr2!N_*6(kuI+sf<7_ajo))ygj|)jf&jm_4l|sWzP=iDnJjlKUD&a|4U8lDEQ^Q z?P9U;p0E#onSY(oXg_cNp7|;BBKtbqdE1k=D${Z6yVkwdO3SY;U$-pakMmFPZQL09 zW9}c>3iEO9ZZ^z)DSkrs&oKlc=O6?Kft!(l&L%VjeS|(aK0=4)szYaUrO716<*ho$ z<FtD8t+a4_J5VhpnLGR{8U)DC^wT}-q-BQLcoKcvV(Pj5=4T-nV; z*H4Oce=soMg<)q1u34Y#4|_dsXuJYG83C!ehlw^$@@WEan^g8GgCJi$X@HWcP)Dt< zyP4?XN%ma3vsR_E`O!KB5q!%9(m!2q{i1eKP+f}eLx5JP&bC5xO1tAC=`91QPuGn1 zr)AiCP;bgRbvD#m>XP+3pqinxIZTKpCon29luBt?i_r7`b<|!;d|kXxbP2x~4hXjk z7W@>Y=+H0y~e$VV@+KnF?pEG{N@R{K|h8Fr1Jxo_pzkt(Bv}~wjsOYR4zl)JtQKwh^ zo9BU|D>KCjYU+)(AX}c9ENY!n+dZWQG}mXMsa4m>+Gv70b*l?bQ$M+Y>+F?KrXGnzit+&P0v8>n$gH@Gc!$W z+W2A343I0*;hvWB5^5;#m4cX_yf;B-Va0UN%F*vwq6gCy6V3nMHVw4&6rPDYQ>%(VE`ypZr5xTd6{IR|95~E+r1tRA zXYAJ&f~bzd4Wc+Bq^oTx05u(*k?YU@Z^)k{#l-yo#wnnvqaQNP$COLco(Gyb%7Ut? zwWup|K_>6UudphU>QIh;hf-vgWgDrX8}~<@|8L6zC+s&KJ$?Sa!vQKf>MN|0l9pBx zbgVb-4dnFX{C}IEFG=cLsCr{{ogGwkw1Jw+we$aNwv4<|=l|QSprRuat6pjC!bS_o z=qO(anF;3qH}IgTqh3)p)ecDN{Qo)*6!l+%ar6J}Y)0_3`G0sxfUY8=CR80%tTlm( zj!IosNo_5l7qgN9H668*s+Lj&JK#;idWtd>!L<4RHiJG&pp)WFGrX}n%HDW|kRpM` z|9Sk=X#T%XEQ0s={gtrTe%gM>zSYj#UbStuUa>xB-C#9aUbXaErt`<(P_hsLga9Ex z2oM5<03kpK{3!^uu7tjwj=`QbAje-sYF?qQg9zPB?P2fs4v^3>rqbrE)S=m$c979= zR}wry3h6g#*0+I}j{D8yZh#?(9|)U z9=32i5bS9(7Aiq%NS-ltpb=(vt$7A{Z+&sL-j?K@`_w}BaUsdeO zFY8)a%y;!H?dmM_cl9jK_pd0b*lS9~rSe7x$KX~cBaVyGOIqBu6jJpUmqQk-`ntLc zee3h*7T0$~5#qD;7uWUU0m!7-wY*2h$nO^?AD!R}p|2DEc|&Rg2L`Hg)H2AZv$t=lEd7$cLQm(4VyPovsMWBu7s|P=A8Xs! z+daO&H@~v0C#q&osjI)Mw4G zcx!QCO@D9nelaxu;tBA}zcj_1?C|0A^3_pCk@75F($!z80Uy$7YKQmIb!7P7`;ELx zhISHp6|G#E?_CyMGBR}@8wYyx;A>{Q5~I*3FSLAb91%+T|JvJRVWAB9B`bTE4DD-C zk0}-50s${j(Uz_0>6DjBN!FpWcWtq+u)H`f4nsZtEMq`F4p5U9>r$Ii}f z-dlCzc#9{*z{*a-$=27?GF}nVm*kH%q!RMTz$Ng$8^OO`{8fei75UxEE)PS&+buhL zQ2pKtE{m{ug8Xe?a=H-}`SQj{{cbRQ`C|?#vOF?SVn&i3$^Stn&(`d}m(~0^wddy50PsZXjE0@dWu> z&XSoN%9V#xCp%^gEw70$M)aA*CAg_~7gVw1x5ki8^1i@<{&GARE{t{t_YmKr5gX&mKr5gX&n;>BG(UHyv}!zZ*y3^U15yT^d9J1LgrH!?t!;VmYa zTZ?jDZ{NA_N$??gG!M>0Md&Q9EcUHm3@3*?TAEBUYqvc53#D^Q5Zc^ml0#~t-MveT zD@)^772q3WAaI|YNsg}t7FKnQUprxZv1jQj_{xs{(s=mpjukSC?BFc$^VYS%T!Trb z)v|!Eb1AB1`TD^EEiJBsjNwaTxMhbXEP~$-(NY&_ONOCQNeT-p>-{BTv&7eqg*6#lp%+$^8P8 zEZTd1gU;OQo8++f#~c*i>c!~>owCIo zTp&1QUvX70eC1DXpKSalV{pz8Iw)n9nq*3?u!92R^Z!ii3i_@HO#iaf-h~^o4c7LBU(Wb^ZdsH9X>_{MS^=$q zRzNGD70?Q31+)TM0j&TbBg}cUHqqGA{J(XidDI?a44(fF%bRltqNwTppjk`HkUUg` zA@l#{Vdkve^HB5urWW&%fzbEO{~Px;j~@u!Z5AC*({n?zP}Af%FxzNO8^~ht{J&v{ zKzo}1*Y7doc#2pzq+0Kv|7Y6F1p}wut>kL{pB|ElD!Q8gr-rDnLINXxRB!kBe_}|1-ar30hvYnT{%>T>DZF-B4pK~i z|DQH(t$Xnj9Lm0$KsBfL1^&pcT*xXa%$aS^=$qRzNH8p;W+vT2b+i ztXm5Id?>4_E2S0C3TOqi0$KsBfL1^&pcT*xXa%$aT7i$Q0?Eoe18Psjn^FUTe?Gc$ z=$dNhupa1_5wXCj+RzNGD z70?Q31+)TM0j+>mKr5gX&1t~@SEYy;TOWI!*aMbJU2W$ zJUN^n&I%`oqr=uP6SCmF;D3V6!Lz|1gNK5j2Hy*A3vLRo4Xy|-3i^WO!5P8A;K<;R zU}Df73=1*=^WXEg`Y-v<_>cMz`akyX^1tri;D5%y)L-xS_?`Z#{;~cXf12OnkMJ9O z&nMn??-lPk?{V*8?`Pikz1zI6c%Snw_b&8Sd&|7jy#?Mp?_h6&*XA{QNzZcMao=$N z?mp#ibbsmI=YHG0#l7CW%Kf<8?{>LoxeMJR+!^jTcck0ky1dG_@y+~M{uqCd|A^no zZ{c6ySMyKse!haA!B60a^8s>hb>`? z*fH!-Hks|mT3C`LoE^?<&L-z^=OO1O&Rx!}&KI4loliLZ&I;!Y=LF|)=Rl{!8R0ZI zuKiDYtNo(=w7t=Oz`obM!@kM>oPC*nzTIOlu}`t*+cWKn_P%z`_N;$eTdWtXr>x&w z_gnW^w_7(^*IJib7g)Vkr?tpB+L~oevf8a?OIV4-&cy49za{>ZcsTLX#CH>4Pkbry zsYE%kCef8RGqE5sH!(diHZdYmpWx^C|Q9=Kbb9<{jo&&1=oe%?r$4v(sE; z9&OGtCzAUD<6Ctp#_JaVIAn~^Uowrp~PV$&jDQf!#yi!MBu;qY9>AYYJ3xgo0L z`Zz|&9P;@ZSIKpX9Y(HIoN&nJ6dMWh**M40AlJk>hE?(ziHsUSm3%rn$|Qa&PUpK- zawnMH^oUqB|isAg1#pm}8^2zx8ewADrpWmyJOXBkbR^oJS zLp>?ixJo_|MWNKKZ}_;vbp6E&lio!NMe;(0p}0U{5T38l57sO6((7vIpQq6E)+%J~ z8ih9NS7_NKg{IY~&@fl?sOhL{tda!(xz*k{n9q5jN8zlQ-8HPPR5)|ixe8}YKc|K} zx)e^Iu|nb0Da#d3nYyfolZy%`Z(OQy()7+6PF$jJ;-rGYj)`a2aLi(bV>-@KIC{*P z3fo7Yp|Gv}bcLhZPE$B?)W;N#7EScTj>Mj@MdbPf4@g%&$Xp=lkd&@ku4ZOM%z;##!0b2`YG(bwG?(=I!wc)0uG!Q|Yk^ zlc^4cA~{B3DE3$Ahocp`{(cI%+g`(Jn?jrKt1w~jqtHlHR~H%6xP)@E6nU@QjwGI&TmwQMKY%_%w!e%+Zz;m zVZB1`)hT2=Q^RUnp~X@PO)II;FhyK5(+K05nbklA?_j>d?Y^gQt8f**UdLlJtSVDz zT27p+VcK!7M%Cgr+)XI8s)?9P%Vcm24w{A|139nC`Tv9VFNxSDUKY=YN5rqhed1f< zYvMX_g}5-9`=2I`6LZ8=F-GJ?onYa+;hW*#!zaTHF!R4Vye+&jye7OfToSDN0pUJjF7!hZY!6<6S^s0fgTar2JA+$->w~L;i($^cEI2JVE|?Qc4fYROgLGi~ zJN?)E=lsX~U;97wzwO`bf8M{+zsT?Nm;0yt$NPuDY=4YD+|T%qx66COd)|A}`@MI+ zcaL{F%=JI(ebQU+b$bQxWbY{N5SZ!j>ot0TN8Ro2X7{h|qwcTWAHqEUX7}^%mF~rE z$zATA?jG+R=1z0RxWnCy>+oIt4gNg;Gyfg`IsZQY2LCeu48Me*$Iszs@r8UIKbRlD z_u)D2bHcWPT(o_C&f zHaNd54^~9$4cfou=5&(y5BMPp2q$IGwB*^q!>HwCF^|pm8*O zh8i{)6o=2;XsFWhD#kWJ$0^?2M90S6w7wm>X-6DT-_}mYC?3^D_g6f86dkR&bvWHm z@vv6fuDCHv+Z5*->Ap42(tQ+X8|kPT=jg~9SLq1FB1iLzeL;sSc72M2YHHL&*Bqvf zL)UCk3|+HXF-V2uYKRYAvr!#~u9;H|vPC0nkgeXJII`wOeU;X$7@LRDx|r)mY@!*( z`4Kc-;}>X3aciC?6@xlLamz5OHoDd|x6nWxZ>~~bac&Fs6xZjdt2kXxx#CokvKnur zj^bpB+BFu`QY=z5QR5^v6$gSEiv4CvYaCFbnC~Q2#jxD+PsNT;-m5Vq?<$7no?VJz zx#b->Apm9T46-wt8*rnpN_MCiJ00@2;%z>8OYxR8*{=A7X0lE3rabwdm^1!%vQ;rG z)@)JCc9J(0J05vMF_i2dilJn$$0f@cShSJ9)HqF^u5pSyRpTUivc@6#v*IvCo~W@Pe^Ts)SU_VpxBBL@}%`{y{OUE^br|tBb#nn;>nF4RI5stK@eo#@2fBTg6ZBPacjr z)%+rPNO4m$`Av;qCl4ylHIZK{&gRIk6gOnaFBOBb4=ApyC-*DP)RAA*xJrJmIJt%V zOtHw2pVl}@exlglNq(#t9s>VJvFni^Duxxz`xJwMKTzxla&L_pxkoW{+`AP+$Nhe+ zTFM~bi&aZi$#+!@xTNkd zCATXE&2CdHeDV#&p-=uxaS)QP*VrewD)#Hif7V!#TWTDVn`<19ufhI*I?uRKKL3AH zyd<6#e-azS1F!~gr?^$z06PFK7Z;0uagI1!oFa~ab$|oKIM@TwB!sZSUE!AS#qh7; zx%=6oOM>W#I5&MsPsT7PP?H0Q3LpZ}VUAH~CNa8~g|S z`}{loTVX%IHU4G(g|I@f!avJD$)E2Z>Q99o0VDmKANmRJ9q&!=MeiB!G4CPo=ia^E zx4c`tFM6N$F7?j$R>7`-GrWb~k={&ivbVpN_v$_9H11pOYwmx$e|9&x_q+GH-*mqQ z>jnSemfiE*Zg+{h$UVlL?M`(&U}r$q^Ej+~?_8!~HUV?RlN7;ky$Luclb#?>$47-%Ahy4Ma>{NCvtQ<^Z z9c%<^U>+mRcIQ>+Z_X3W?_llV9_J3{tIl=KmCnUZ3HAt_=`3{S!Ro;Tr_E`0l1{?j zVZUZ?vLCk}wtr@S-@eWM3hWZN!oJup*(>Zb?S=L{`(S&5-DWr2N!zk^!A^k}t-n~0 zSiiP@WPQiF)%ue4Y3mYeoz-nEfxQC9ShKCE))*^q)mv_&n%I`uoOm|z$HYU4pC#^2 z+@828ac$y?#6^j|#PY-$uwUSa#6gJz68k2a5+ac>cbfk&pEsY3b`0EWe$%|!{DOJ4 zSvJ?2=bC4mC!6zO&%k7JwApHAOlG`iY&Bjoo-rOX9x{Gve9!m>bjRq$D)THD+S9D< zu*wmLmdh%0W3*Z2a74p$Du)4bnl+rt9C?sutE@5`gLhf1awy_kkbYK#Fw|s~nTSoz zta1pVOm;A0-8NP^2r*N~Dl-t%8CIE&m=vsXAfiaK%Cs1htTGkR&$7xCMDDZ7WJHIv z$|OV!YBmwkghEb0G)$**03fFgP@VDeAa8(jjl!*It;@~ zR@onMR2!>|MjSqhRrW({9nLE4h{IZ0r46wWin}jju8~#ti4l&DLd?R^kum02Wkifn zv^=7Kqr(w>C}k_63w0lc$U*-WLQTjfe?QB!_4wSS5=n#c2TKv>wE% zmj`)0l%NiSHxFZ#OoT9eWR)~xegvzeVtj#Bl8CK&RuPC&g%Gg?atIKcTUf=95l-PD z=C-hki&zi!<%sEeR$+)K=mAcQ(1&cqWQtX+7(p8tB17M<6L1tJ77;=+m^n!dAOc17 zp>L8H16KYxBHzi%7bAj><%6AuFGP7(#@TV}#63Li8ZQiHICzTZkwdc>y9^Eaejr!5GWO zBU&~qABPA|Qa%x>HS&x$OvJ~$I9ruDKN$| zI&Z4!byh~_P34+c8J#zk&9Sm{UP@C9Syqm$m!_n6=)I{rI0brd3QV+&-kXxmjNY3{ zZeeBg-jv9&GJ0=HfMV#qDIfX~dT+{uet_PaazPvP-V}IB8ND|pWkT;wIUp%|Z%QhL z-kXx$54|@fyPx!4PE(*wWW78E<&w?|)sfdiWW6-C1+qf#O}!39MDI<#*yNO@_cEHS zI_1cDSrXEt@xrK%mC<+wcy}3%S4d^hc!hwnETpXu7^UsWe?qeK=|4 zx!i|SNy}x_1Bx7%N#8=lg$)R-jE3t%!=c~8?gNH)>w?dt+qy}Qq1n37ThVLXP%yMw z7aCGJE$m!?wuzh;78ueDoz{isK%;e`Bcso{%w=e^E;I|etP8DzChJ0@pvSt|Qyl#d+6&_(%(kqk|gw~k=wp&ahpXrUZ#+vuPi z?%HUe9PA7Ilc$>)+9yw?8M-Hj8#071Y+*l^^W&|_vYbWRR^ z5RH>tjG=FGXd$#s4lN{I6Rs7*i98dYv@&!|zJoC|OulgnL%-w?%yFb$%3T9aWR?uB zCbUY}J;2Z@9T-BQQ95vT^hpPL3fd%00T{ZZ1AShaB&UwyL>9@NDno;WT>}jL(T0?0 zk2dr%bVnN=Dx*2tFrr3pv<(NX(XKLdMjM9HXpFF1fT1s1Fe*k{v_N0!itykW^o?AR zT5x@!D_U@Upeb5lUg(Jy3=h!~Ex5?h5iQUQ4bieWLqD{jhG>Tt)No`}L(6a?FXT{i zbVBQG?non)djg!u23exYBNH^_-T;Os2wMXfdY}n+D6~K`vx7$tXqd1sfT07LPlnJ8+y;QAXTUB1>3J~jF&*i6(D$lN!h!>^ z1;8;QFfC|UxdXtosXV}|^7H?#;SY$|E?yDOiO0pm;^*QXafkRS>;SkD=KdwILYyfU zih1H-F+sG6W|4%M|2yFuum|9&aAWvONUUF40j+>mKr5gX&B4 zOF;x&p31>8Mg`ZWa`JHwE>PuQeYk=vRJkNXz$L1je6oXUR5@5ls^B734i;Z4xJs3S zHNFZiQ|06XFkGj~!8$<&7piiwj# IntelligentSearchRe assert response.status_code == 400 assert response.json()["detail"] == "Invalid intelligent search request" + +async def test_academic_search_service_aggregates_and_scores(monkeypatch: pytest.MonkeyPatch) -> None: + from app.services.academic import search_service + + service = search_service.AcademicSearchService() + + class DummySemantic: + async def search(self, query: str, venue=None, year=None, limit: int = 20): # noqa: D401 + assert query == "deep learning for nlp" + return [ + { + "paper_id": "s1", + "title": "Deep Learning for NLP", + "authors": ["Alice"], + "abstract": "A study on deep learning in NLP.", + "year": 2020, + "venue": "ACL", + "url": "https://example.com/s1", + "citation_count": 10, + "source": "semantic_scholar", + }, + { + "paper_id": "s2", + "title": "Duplicate Paper", + "authors": ["Bob"], + "abstract": "Another paper.", + "year": 2019, + "venue": "EMNLP", + "url": "https://example.com/s2", + "citation_count": 5, + "source": "semantic_scholar", + }, + ] + + class DummyArxiv: + async def search(self, query: str, category=None, max_results: int = 20): # noqa: D401 + # Domain should map to cs.CL + assert category == "cs.CL" + return [ + { + "paper_id": "a1", + "title": "Deep Learning for NLP", + "authors": ["Alice"], + "abstract": "Same title from arXiv.", + "year": 2021, + "venue": "NeurIPS", + "url": "https://example.com/a1", + "citation_count": 3, + "source": "arxiv", + } + ] + + service.providers = { + "semantic_scholar": DummySemantic(), + "arxiv": DummyArxiv(), + "openalex": DummySemantic(), + } + + request = PaperSearchRequest(query="deep learning for nlp", domain="nlp", limit=10) + result = await service.search(request, sources=["semantic_scholar", "arxiv"]) + + assert result["total"] == 2 # duplicate title deduped across sources + assert len(result["papers"]) == 2 + assert set(result["sources_used"]) == {"semantic_scholar", "arxiv"} + assert result["source_errors"] is None + for paper in result["papers"]: + assert 0.0 <= paper.get("score", 0.0) <= 1.0 + + +async def test_intelligent_legacy_search_uses_query_parser_and_academic_service() -> None: + from app.services.academic.intelligent_service import IntelligentSearchService + + class DummyLLM: + def __init__(self) -> None: + self.is_configured = False + + class DummyQueryParser: + async def parse(self, text: str) -> QueryInterpretation: # noqa: D401 + return QueryInterpretation( + original_query=text, + intent="search", + needs_search=True, + keywords=["transformer", "models"], + authors=[], + venue=None, + year_range=[2020, 2023], + domain="nlp", + follow_up_questions=[], + ) + + class DummyAcademicService: + async def search(self, request: PaperSearchRequest, sources=None): # noqa: D401 + assert request.query == "transformer models" + assert request.year == 2023 + papers = [ + { + "id": "p1", + "title": "Transformer Models", + "authors": ["Alice"], + "abstract": "About transformers.", + "year": 2023, + "venue": "NeurIPS", + "url": "https://example.com/p1", + "citation_count": 42, + "source": "semantic_scholar", + } + ] + return { + "papers": papers, + "total": 1, + "sources_used": sources or ["semantic_scholar"], + "source_errors": None, + "hits_per_source": {"semantic_scholar": 1}, + } + + service = IntelligentSearchService( + academic_service=DummyAcademicService(), + llm_client=DummyLLM(), + query_parser=DummyQueryParser(), + ) + + request = IntelligentSearchRequest(query="transformer models", limit=5, sources=["semantic_scholar"]) + response = await service.search(request) + + assert response.total == 1 + assert len(response.papers) == 1 + assert response.papers[0].title == "Transformer Models" + assert response.interpretation.intent == "search" + assert response.search_performed is True + assert response.search_metadata is not None + assert response.search_metadata.total_results == 1 + assert isinstance(response.answer.response, str) and response.answer.response + + +def test_academic_domain_mapping() -> None: + from app.services.academic.search_service import AcademicSearchService + + assert AcademicSearchService._map_domain_to_arxiv_category(None) is None + assert AcademicSearchService._map_domain_to_arxiv_category("NLP") == "cs.CL" + assert AcademicSearchService._map_domain_to_arxiv_category("computer vision") == "cs.CV" + # "domain" 中包含 "ai",会被映射到 cs.AI + assert AcademicSearchService._map_domain_to_arxiv_category("unknown domain") == "cs.AI" + + +def test_semantic_scholar_normalize_paper() -> None: + from app.services.academic.providers.semantic_scholar import SemanticScholarProvider + + provider = SemanticScholarProvider() + raw = { + "paperId": "ABC123", + "title": "Sample Paper", + "authors": [{"name": "Alice"}, {"name": "Bob"}], + "abstract": "Test abstract", + "year": 2024, + "venue": "ICLR", + "url": "https://example.com", + "citationCount": 10, + "publicationDate": "2024-01-01", + "externalIds": {"DOI": "10.1000/xyz"}, + "tldr": {"text": "Short summary"}, + } + normalized = provider.normalize_paper(raw) + assert normalized["paper_id"] == "ABC123" + assert normalized["authors"] == ["Alice", "Bob"] + assert normalized["citation_count"] == 10 + assert normalized["source"] == "semantic_scholar" + + +def test_openalex_normalize_result_and_abstract() -> None: + from app.services.academic.providers.openalex import OpenAlexProvider + + provider = OpenAlexProvider() + inverted_index = { + "deep": [0], + "learning": [1], + "models": [2], + } + raw = { + "id": "https://openalex.org/W123", + "title": "Deep Learning Models", + "authorships": [{"author": {"display_name": "Alice"}}, {"author": {"display_name": "Bob"}}], + "abstract_inverted_index": inverted_index, + "publication_year": 2022, + "publication_date": "2022-01-01", + "cited_by_count": 5, + "ids": {"doi": "10.1000/xyz"}, + "concepts": [{"display_name": "Machine learning"}], + "locations": [ + { + "source": {"display_name": "ICLR"}, + "landing_page_url": "https://example.com/paper", + } + ], + } + normalized = provider._normalize_result(raw) + assert normalized["paper_id"] == "W123" + assert normalized["authors"] == ["Alice", "Bob"] + assert normalized["abstract"] == "deep learning models" + assert normalized["venue"] == "ICLR" + assert normalized["url"] == "https://example.com/paper" + assert normalized["source"] == "openalex" + + +def test_arxiv_provider_build_query_and_parse_xml() -> None: + from app.services.academic.providers.arxiv import ArxivProvider + + provider = ArxivProvider() + query = provider._build_query(' "deep learning" transformer ', category="cs.LG") + assert "all:\"deep learning\"" in query + assert "all:transformer" in query + assert "+AND+cat:cs.LG" in query + + xml_content = """ + + + http://arxiv.org/abs/2101.00001v1 + Sample Title + Sample abstract. + Alice + 2021-01-01T00:00:00Z + 2021-01-02T00:00:00Z + + + + + Accepted to ICML 2021 + ICML 2021 + 10.1000/xyz + + + """.strip() + + papers = provider._parse_xml_response(xml_content) + assert len(papers) == 1 + paper = papers[0] + assert paper["paper_id"] == "2101.00001" + assert paper["title"] == "Sample Title" + assert paper["authors"] == ["Alice"] + assert paper["year"] == 2021 + assert paper["url"] == "https://arxiv.org/abs/2101.00001" diff --git a/backend/tests/test_auth.py b/backend/tests/test_auth.py index 562f898..b650d65 100644 --- a/backend/tests/test_auth.py +++ b/backend/tests/test_auth.py @@ -1,9 +1,13 @@ """Integration tests for user registration and authentication flows.""" from __future__ import annotations +from urllib.parse import parse_qs, urlparse + import pytest from httpx import AsyncClient, Response +from app.api.v1.endpoints import auth as auth_endpoint + pytestmark = pytest.mark.asyncio @@ -93,4 +97,88 @@ async def test_get_current_user_success(async_client: AsyncClient) -> None: me_data = me_response.json() assert me_data["email"] == "eric@example.com" assert me_data["full_name"] == "Eric" - assert me_data["is_active"] is True \ No newline at end of file + assert me_data["is_active"] is True + + +async def test_github_login_unconfigured(monkeypatch: pytest.MonkeyPatch, async_client: AsyncClient) -> None: + # Ensure GitHub OAuth is seen as not configured + auth_endpoint.settings.github_client_id = None + auth_endpoint.settings.github_client_secret = None + + response = await async_client.get("/api/v1/auth/github/login") + + assert response.status_code == 503 + assert response.json()["detail"] == "GitHub OAuth 未配置" + + +async def test_github_login_sets_state_cookie(monkeypatch: pytest.MonkeyPatch, async_client: AsyncClient) -> None: + # Configure dummy GitHub OAuth credentials + auth_endpoint.settings.github_client_id = "dummy-client-id" + auth_endpoint.settings.github_client_secret = "dummy-client-secret" + + response = await async_client.get("/api/v1/auth/github/login", params={"next": "/dashboard"}) + + # Should redirect to GitHub authorize URL + assert response.status_code == 302 + location = response.headers["location"] + parsed = urlparse(location) + assert parsed.netloc == "github.com" + assert parsed.path == "/login/oauth/authorize" + query = parse_qs(parsed.query) + assert query.get("client_id") == ["dummy-client-id"] + assert "state" in query + + # State cookie should be set for CSRF protection + state_cookie = response.cookies.get(auth_endpoint.GITHUB_STATE_COOKIE) + assert state_cookie is not None + + +async def test_github_callback_success_creates_user_and_redirects( + monkeypatch: pytest.MonkeyPatch, + async_client: AsyncClient, +) -> None: + # Configure dummy GitHub OAuth credentials + auth_endpoint.settings.github_client_id = "dummy-client-id" + auth_endpoint.settings.github_client_secret = "dummy-client-secret" + + # Stub out external GitHub calls + async def fake_exchange(code: str, redirect_uri: str) -> dict[str, str]: + return {"access_token": "gh-token"} + + async def fake_fetch_profile(access_token: str) -> tuple[dict[str, str], str | None]: + return ( + {"id": 123, "name": "Git User", "login": "gituser", "avatar_url": "http://avatar"}, + "git@example.com", + ) + + monkeypatch.setattr(auth_endpoint, "_exchange_github_code_for_token", fake_exchange) + monkeypatch.setattr(auth_endpoint, "_fetch_github_profile", fake_fetch_profile) + + import json as _json + + # Manually set the OAuth state cookie with a known nonce + state = "test-nonce" + cookie_payload = _json.dumps({"nonce": state, "next": "/dashboard"}) + async_client.cookies.set(auth_endpoint.GITHUB_STATE_COOKIE, cookie_payload) + + # Then call callback with matching state and a dummy code + callback_resp = await async_client.get( + "/api/v1/auth/github/callback", + params={"code": "dummy-code", "state": state}, + ) + + assert callback_resp.status_code == 302 + redirect_url = callback_resp.headers["location"] + parsed = urlparse(redirect_url) + query = parse_qs(parsed.query) + # Should redirect to frontend with token and provider + assert "token" in query + assert query.get("provider") == ["github"] + assert query.get("next") == ["/dashboard"] + + token = query["token"][0] + # Token should be valid for /users/me + me_resp = await get_me(async_client, token) + assert me_resp.status_code == 200 + me_data = me_resp.json() + assert me_data["email"] == "git@example.com" diff --git a/backend/tests/test_conversations.py b/backend/tests/test_conversations.py new file mode 100644 index 0000000..c03fd92 --- /dev/null +++ b/backend/tests/test_conversations.py @@ -0,0 +1,176 @@ +"""Tests for conversation API endpoints.""" +from __future__ import annotations + +import pytest +from httpx import AsyncClient + +pytestmark = pytest.mark.asyncio + + +async def _register_user(async_client: AsyncClient, email: str, password: str) -> None: + payload: dict[str, str] = {"email": email, "password": password} + response = await async_client.post("/api/v1/users", json=payload) + assert response.status_code == 201 + + +async def _login(async_client: AsyncClient, email: str, password: str) -> str: + response = await async_client.post( + "/api/v1/auth/token", + data={"username": email, "password": password}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + assert response.status_code == 200 + return response.json()["access_token"] + + +def _auth_headers(token: str) -> dict[str, str]: + return {"Authorization": f"Bearer {token}"} + + +async def test_create_and_get_conversation(async_client: AsyncClient) -> None: + await _register_user(async_client, "conv-user@example.com", "StrongPass1") + token = await _login(async_client, "conv-user@example.com", "StrongPass1") + + create_payload = {"title": "My first conversation"} + create_resp = await async_client.post( + "/api/v1/conversations/", + json=create_payload, + headers=_auth_headers(token), + ) + + assert create_resp.status_code == 201 + conv_data = create_resp.json() + assert conv_data["title"] == "My first conversation" + assert conv_data["message_count"] is None + assert conv_data["last_message_preview"] is None + conv_id = conv_data["id"] + + get_resp = await async_client.get( + f"/api/v1/conversations/{conv_id}", + headers=_auth_headers(token), + ) + + assert get_resp.status_code == 200 + detail = get_resp.json() + assert detail["id"] == conv_id + assert detail["title"] == "My first conversation" + assert detail["messages"] == [] + + +async def test_list_conversations_includes_message_metadata(async_client: AsyncClient) -> None: + await _register_user(async_client, "conv-list@example.com", "StrongPass2") + token = await _login(async_client, "conv-list@example.com", "StrongPass2") + + # Create two conversations + for idx in range(2): + resp = await async_client.post( + "/api/v1/conversations/", + json={"title": f"Conversation {idx}"}, + headers=_auth_headers(token), + ) + assert resp.status_code == 201 + conv_id = resp.json()["id"] + # Add one message to each conversation + msg_resp = await async_client.post( + f"/api/v1/conversations/{conv_id}/messages", + json={"role": "user", "content": f"hello {idx}"}, + headers=_auth_headers(token), + ) + assert msg_resp.status_code == 201 + + list_resp = await async_client.get( + "/api/v1/conversations/", + headers=_auth_headers(token), + ) + + assert list_resp.status_code == 200 + data = list_resp.json() + assert data["total"] == 2 + assert len(data["conversations"]) == 2 + for conv in data["conversations"]: + assert conv["message_count"] == 1 + assert conv["last_message_preview"].startswith("hello ") + + +async def test_update_conversation_updates_title_and_metadata(async_client: AsyncClient) -> None: + await _register_user(async_client, "conv-update@example.com", "StrongPass3") + token = await _login(async_client, "conv-update@example.com", "StrongPass3") + + create_resp = await async_client.post( + "/api/v1/conversations/", + json={"title": "Old title"}, + headers=_auth_headers(token), + ) + conv_id = create_resp.json()["id"] + + # Add a message so that last_message_preview is populated + msg_resp = await async_client.post( + f"/api/v1/conversations/{conv_id}/messages", + json={"role": "assistant", "content": "This is the last message preview."}, + headers=_auth_headers(token), + ) + assert msg_resp.status_code == 201 + + update_resp = await async_client.patch( + f"/api/v1/conversations/{conv_id}", + json={"title": "New title"}, + headers=_auth_headers(token), + ) + + assert update_resp.status_code == 200 + updated = update_resp.json() + assert updated["title"] == "New title" + assert updated["message_count"] == 1 + assert "This is the last message preview." in updated["last_message_preview"] + + +async def test_delete_conversation_marks_deleted(async_client: AsyncClient) -> None: + await _register_user(async_client, "conv-delete@example.com", "StrongPass4") + token = await _login(async_client, "conv-delete@example.com", "StrongPass4") + + create_resp = await async_client.post( + "/api/v1/conversations/", + json={"title": "To be deleted"}, + headers=_auth_headers(token), + ) + conv_id = create_resp.json()["id"] + + delete_resp = await async_client.delete( + f"/api/v1/conversations/{conv_id}", + headers=_auth_headers(token), + ) + assert delete_resp.status_code == 204 + + # After deletion, getting the conversation should return 404 + get_resp = await async_client.get( + f"/api/v1/conversations/{conv_id}", + headers=_auth_headers(token), + ) + assert get_resp.status_code == 404 + assert get_resp.json()["detail"] == "对话不存在或无权访问" + + +async def test_add_message_requires_ownership(async_client: AsyncClient) -> None: + # User A creates a conversation + await _register_user(async_client, "owner@example.com", "StrongPass5") + owner_token = await _login(async_client, "owner@example.com", "StrongPass5") + create_resp = await async_client.post( + "/api/v1/conversations/", + json={"title": "Owner conversation"}, + headers=_auth_headers(owner_token), + ) + conv_id = create_resp.json()["id"] + + # User B tries to post message to A's conversation + await _register_user(async_client, "intruder@example.com", "StrongPass6") + intruder_token = await _login(async_client, "intruder@example.com", "StrongPass6") + + msg_resp = await async_client.post( + f"/api/v1/conversations/{conv_id}/messages", + json={"role": "user", "content": "I should not be allowed."}, + headers=_auth_headers(intruder_token), + ) + + assert msg_resp.status_code == 404 + assert msg_resp.json()["detail"] == "对话不存在或无权访问" + diff --git a/backend/tests/test_library.py b/backend/tests/test_library.py new file mode 100644 index 0000000..cb19ffa --- /dev/null +++ b/backend/tests/test_library.py @@ -0,0 +1,253 @@ +"""Integration tests for library folder and upload management endpoints.""" +from __future__ import annotations + +import os +from pathlib import Path + +import pytest +from httpx import AsyncClient + +MEDIA_ROOT = Path(os.environ["MEDIA_ROOT"]) + +pytestmark = pytest.mark.asyncio + + +async def _register_user(async_client: AsyncClient, email: str, password: str) -> None: + payload: dict[str, str] = {"email": email, "password": password} + response = await async_client.post("/api/v1/users", json=payload) + assert response.status_code == 201 + + +async def _login(async_client: AsyncClient, email: str, password: str) -> str: + response = await async_client.post( + "/api/v1/auth/token", + data={"username": email, "password": password}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + assert response.status_code == 200 + return response.json()["access_token"] + + +def _auth_headers(token: str) -> dict[str, str]: + return {"Authorization": f"Bearer {token}"} + + +async def _upload_pdf(async_client: AsyncClient, token: str, name: str = "sample.pdf") -> int: + pdf_bytes = b"%PDF-1.4\ncontent" + resp = await async_client.post( + "/api/v1/papers/upload", + headers=_auth_headers(token), + files={"file": (name, pdf_bytes, "application/pdf")}, + ) + assert resp.status_code == 201 + return resp.json()["id"] + + +async def test_library_folders_crud(async_client: AsyncClient) -> None: + await _register_user(async_client, "library@example.com", "StrongPass1") + token = await _login(async_client, "library@example.com", "StrongPass1") + + # Initially no folders + list_resp = await async_client.get( + "/api/v1/library/folders", + headers=_auth_headers(token), + ) + assert list_resp.status_code == 200 + data = list_resp.json() + assert data["folders"] == [] + assert data["unfiled_count"] == 0 + + # Create a folder + create_resp = await async_client.post( + "/api/v1/library/folders", + headers=_auth_headers(token), + json={"name": "Reading", "color": "#fff"}, + ) + assert create_resp.status_code == 201 + folder = create_resp.json() + assert folder["name"] == "Reading" + assert folder["paper_count"] == 0 + folder_id = folder["id"] + + # Rename folder + rename_resp = await async_client.patch( + f"/api/v1/library/folders/{folder_id}", + headers=_auth_headers(token), + json={"name": "Updated", "color": "#000"}, + ) + assert rename_resp.status_code == 200 + renamed = rename_resp.json() + assert renamed["name"] == "Updated" + assert renamed["paper_count"] == 0 + + # Delete folder (still empty) + delete_resp = await async_client.delete( + f"/api/v1/library/folders/{folder_id}", + headers=_auth_headers(token), + ) + assert delete_resp.status_code == 204 + + +async def test_delete_library_folder_blocked_when_not_empty(async_client: AsyncClient) -> None: + await _register_user(async_client, "library2@example.com", "StrongPass2") + token = await _login(async_client, "library2@example.com", "StrongPass2") + + # Create folder + create_folder = await async_client.post( + "/api/v1/library/folders", + headers=_auth_headers(token), + json={"name": "WithPapers", "color": None}, + ) + folder_id = create_folder.json()["id"] + + # Upload a PDF into this folder + pdf_bytes = b"%PDF-1.4\ncontent" + upload_resp = await async_client.post( + "/api/v1/papers/upload", + headers=_auth_headers(token), + files={"file": ("paper.pdf", pdf_bytes, "application/pdf")}, + data={"folder_id": str(folder_id)}, + ) + assert upload_resp.status_code == 201 + + # Attempt to delete folder should fail + delete_resp = await async_client.delete( + f"/api/v1/library/folders/{folder_id}", + headers=_auth_headers(token), + ) + assert delete_resp.status_code == 400 + assert delete_resp.json()["detail"] == "请先移动或删除文件夹中的文献" + + +async def test_assign_uploaded_paper_folder(async_client: AsyncClient) -> None: + await _register_user(async_client, "library3@example.com", "StrongPass3") + token = await _login(async_client, "library3@example.com", "StrongPass3") + + paper_id = await _upload_pdf(async_client, token) + + # Create folder and assign paper into it + create_folder = await async_client.post( + "/api/v1/library/folders", + headers=_auth_headers(token), + json={"name": "FolderA", "color": None}, + ) + folder_id = create_folder.json()["id"] + + assign_resp = await async_client.patch( + f"/api/v1/library/uploads/{paper_id}/folder", + headers=_auth_headers(token), + json={"folder_id": folder_id}, + ) + assert assign_resp.status_code == 200 + assigned = assign_resp.json() + assert assigned["folder_id"] == folder_id + + # Move back to unfiled + unassign_resp = await async_client.patch( + f"/api/v1/library/uploads/{paper_id}/folder", + headers=_auth_headers(token), + json={"folder_id": None}, + ) + assert unassign_resp.status_code == 200 + assert unassign_resp.json()["folder_id"] is None + + +async def test_assign_uploaded_paper_folder_not_found(async_client: AsyncClient) -> None: + await _register_user(async_client, "library4@example.com", "StrongPass4") + token = await _login(async_client, "library4@example.com", "StrongPass4") + + # Paper does not exist for this user + resp = await async_client.patch( + "/api/v1/library/uploads/999/folder", + headers=_auth_headers(token), + json={"folder_id": None}, + ) + assert resp.status_code == 404 + assert resp.json()["detail"] == "未找到对应的上传文件" + + +async def test_download_local_uploaded_paper(async_client: AsyncClient) -> None: + await _register_user(async_client, "library5@example.com", "StrongPass5") + token = await _login(async_client, "library5@example.com", "StrongPass5") + + pdf_bytes = b"%PDF-1.4\ncontent" + upload_resp = await async_client.post( + "/api/v1/papers/upload", + headers=_auth_headers(token), + files={"file": ("local.pdf", pdf_bytes, "application/pdf")}, + ) + assert upload_resp.status_code == 201 + paper_id = upload_resp.json()["id"] + + # GET download should return the file + download_resp = await async_client.get( + f"/api/v1/library/uploads/{paper_id}/download", + headers=_auth_headers(token), + ) + assert download_resp.status_code == 200 + assert download_resp.headers.get("content-type", "").startswith("application/pdf") + + # HEAD should also succeed + head_resp = await async_client.head( + f"/api/v1/library/uploads/{paper_id}/download", + headers=_auth_headers(token), + ) + assert head_resp.status_code == 200 + + +async def test_download_remote_uploaded_paper(monkeypatch: pytest.MonkeyPatch, async_client: AsyncClient) -> None: + await _register_user(async_client, "library6@example.com", "StrongPass6") + token = await _login(async_client, "library6@example.com", "StrongPass6") + + # Create a placeholder upload with remote URL via import endpoint + payload = { + "title": "Remote Paper", + "abstract": "Remote", + "doi": None, + "arxiv_id": None, + "pdf_url": "https://example.com/remote.pdf", + "folder_id": None, + } + import_resp = await async_client.post( + "/api/v1/papers/import", + headers=_auth_headers(token), + json=payload, + ) + assert import_resp.status_code == 201 + uploaded = import_resp.json()["uploaded"] + paper_id = uploaded["id"] + + # Patch httpx client used in library download to avoid network + from app.api.v1.endpoints import library as library_endpoint + + class DummyResponse: + def __init__(self, content: bytes) -> None: + self.content = content + + def raise_for_status(self) -> None: + return None + + class DummyClient: + def __init__(self, *args, **kwargs) -> None: + self.args = args + self.kwargs = kwargs + + async def __aenter__(self) -> "DummyClient": + return self + + async def __aexit__(self, exc_type, exc, tb) -> None: # noqa: D401 + return None + + async def get(self, url: str): + assert url == "https://example.com/remote.pdf" + return DummyResponse(b"%PDF-1.4\nremote") + + monkeypatch.setattr(library_endpoint.httpx, "AsyncClient", DummyClient) + + download_resp = await async_client.get( + f"/api/v1/library/uploads/{paper_id}/download", + headers=_auth_headers(token), + ) + assert download_resp.status_code == 200 + assert download_resp.headers.get("content-type", "").startswith("application/pdf") + diff --git a/backend/tests/test_notes.py b/backend/tests/test_notes.py new file mode 100644 index 0000000..38011ff --- /dev/null +++ b/backend/tests/test_notes.py @@ -0,0 +1,170 @@ +"""Integration tests for notes endpoints.""" +from __future__ import annotations + +import pytest +from httpx import AsyncClient + +pytestmark = pytest.mark.asyncio + + +async def _register_user(async_client: AsyncClient, email: str, password: str) -> None: + payload: dict[str, str] = {"email": email, "password": password} + response = await async_client.post("/api/v1/users", json=payload) + assert response.status_code == 201 + + +async def _login(async_client: AsyncClient, email: str, password: str) -> str: + response = await async_client.post( + "/api/v1/auth/token", + data={"username": email, "password": password}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + assert response.status_code == 200 + return response.json()["access_token"] + + +def _auth_headers(token: str) -> dict[str, str]: + return {"Authorization": f"Bearer {token}"} + + +async def _create_note( + async_client: AsyncClient, + token: str, + title: str = "My Note", + content: str | None = "content", + uploaded_paper_id: int | None = None, +) -> int: + resp = await async_client.post( + "/api/v1/notes", + headers=_auth_headers(token), + json={ + "title": title, + "content": content, + "uploaded_paper_id": uploaded_paper_id, + "tags": ["tag1", "tag2"], + }, + ) + assert resp.status_code == 201 + return resp.json()["id"] + + +async def test_notes_crud_flow(async_client: AsyncClient) -> None: + await _register_user(async_client, "notes@example.com", "StrongPass1") + token = await _login(async_client, "notes@example.com", "StrongPass1") + + # Initially no notes + list_resp = await async_client.get( + "/api/v1/notes", + headers=_auth_headers(token), + ) + assert list_resp.status_code == 200 + data = list_resp.json() + assert data["items"] == [] + assert data["total"] == 0 + + # Create a note + note_id = await _create_note(async_client, token, title="First", content="hello world") + + # List again + list_resp2 = await async_client.get( + "/api/v1/notes", + headers=_auth_headers(token), + ) + data2 = list_resp2.json() + assert data2["total"] == 1 + assert len(data2["items"]) == 1 + assert data2["items"][0]["title"] == "First" + + # Get detail + detail_resp = await async_client.get( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + ) + assert detail_resp.status_code == 200 + detail = detail_resp.json() + assert detail["id"] == note_id + assert detail["title"] == "First" + assert detail["content"] == "hello world" + assert detail["tags"] == ["tag1", "tag2"] + + # Update note + update_resp = await async_client.patch( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + json={"title": "Updated", "content": "updated content"}, + ) + assert update_resp.status_code == 200 + updated = update_resp.json() + assert updated["title"] == "Updated" + assert updated["content"] == "updated content" + + # Delete note + delete_resp = await async_client.delete( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + ) + assert delete_resp.status_code == 204 + + # Fetching deleted note should 404 + not_found = await async_client.get( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + ) + assert not_found.status_code == 404 + assert not_found.json()["detail"] == "笔记不存在" + + +async def test_create_note_with_invalid_uploaded_paper_id(async_client: AsyncClient) -> None: + await _register_user(async_client, "notes2@example.com", "StrongPass2") + token = await _login(async_client, "notes2@example.com", "StrongPass2") + + # Referencing a non-existent uploaded paper should 404 + resp = await async_client.post( + "/api/v1/notes", + headers=_auth_headers(token), + json={ + "title": "Linked", + "content": "content", + "uploaded_paper_id": 999, + "tags": [], + }, + ) + assert resp.status_code == 404 + assert resp.json()["detail"] == "未找到关联的上传论文" + + +async def test_update_note_with_invalid_uploaded_paper_id(async_client: AsyncClient) -> None: + await _register_user(async_client, "notes3@example.com", "StrongPass3") + token = await _login(async_client, "notes3@example.com", "StrongPass3") + + note_id = await _create_note(async_client, token, title="To update", content=None) + + resp = await async_client.patch( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + json={"uploaded_paper_id": 12345}, + ) + assert resp.status_code == 404 + assert resp.json()["detail"] == "未找到关联的上传论文" + + +async def test_list_notes_with_filters(async_client: AsyncClient) -> None: + await _register_user(async_client, "notes4@example.com", "StrongPass4") + token = await _login(async_client, "notes4@example.com", "StrongPass4") + + # Create two notes + await _create_note(async_client, token, title="Deep Learning", content="about transformers") + await _create_note(async_client, token, title="Classical Mechanics", content="physics") + + # Search filter + list_resp = await async_client.get( + "/api/v1/notes", + headers=_auth_headers(token), + params={"search": "Deep"}, + ) + assert list_resp.status_code == 200 + data = list_resp.json() + assert data["total"] == 1 + assert len(data["items"]) == 1 + assert data["items"][0]["title"] == "Deep Learning" + From 0a8f6e6ac1a94b658474d8e4ea8b465d6032e6a5 Mon Sep 17 00:00:00 2001 From: Xujie Han <1498126985@qq.com> Date: Thu, 4 Dec 2025 10:19:39 +0800 Subject: [PATCH 5/6] : update gitignore to delete trace for htmlcov --- .gitignore | 1 + backend/tests/test_mineru_comprehensive.py | 2 +- redis.conf | 6 +- scripts/dev.sh | 5 + tests/TESTING.md | 186 +++++++++++++++ tests/conftest.py | 63 +++++ tests/test_library.py | 253 +++++++++++++++++++++ tests/test_notes.py | 170 ++++++++++++++ 8 files changed, 682 insertions(+), 4 deletions(-) create mode 100644 tests/TESTING.md create mode 100644 tests/conftest.py create mode 100644 tests/test_library.py create mode 100644 tests/test_notes.py diff --git a/.gitignore b/.gitignore index 389745e..9a352d4 100644 --- a/.gitignore +++ b/.gitignore @@ -101,3 +101,4 @@ backend/app/media/* # LLM提供的问题解决方案 solutions/ tmp/ +htmlcov #测试报告 diff --git a/backend/tests/test_mineru_comprehensive.py b/backend/tests/test_mineru_comprehensive.py index 437fdf6..936828e 100644 --- a/backend/tests/test_mineru_comprehensive.py +++ b/backend/tests/test_mineru_comprehensive.py @@ -20,7 +20,7 @@ ) logger = logging.getLogger(__name__) -from backend.app.services.mineru_cli import MineruCLIError, parse_pdf_async +from app.services.mineru_cli import MineruCLIError, parse_pdf_async def analyze_content_types(content_list: List[Dict[str, Any]]) -> Dict[str, int]: diff --git a/redis.conf b/redis.conf index 529914c..9c8808b 100644 --- a/redis.conf +++ b/redis.conf @@ -1,7 +1,7 @@ # Redis configuration for InsightReading -# Bind to local interfaces so services in Docker network -# 和宿主机本地开发进程(uvicorn/celery)都能访问 -bind 127.0.0.1 +# Bind to all interfaces inside the container +# 这样 Docker 端口映射才能从宿主机访问 +bind 0.0.0.0 # Disable protected mode because access is restricted by Docker network diff --git a/scripts/dev.sh b/scripts/dev.sh index 8298ccc..dc1e8eb 100755 --- a/scripts/dev.sh +++ b/scripts/dev.sh @@ -15,6 +15,11 @@ error() { printf "\\033[1;31m[error]\\033[0m %s\\n" "$*" >&2; } if command -v docker >/dev/null 2>&1; then info "Starting postgres/redis via docker compose…" docker compose -f "$COMPOSE_FILE" up -d postgres redis + info "Ensuring papers database exists…" + docker compose -f "$COMPOSE_FILE" exec -T postgres psql -U postgres -d postgres <<'SQL' >/dev/null +SELECT 'CREATE DATABASE papers' +WHERE NOT EXISTS (SELECT FROM pg_database WHERE datname = 'papers')\gexec +SQL info "Ensuring notes table exists…" docker compose -f "$COMPOSE_FILE" exec -T postgres psql -U postgres -d papers <<'SQL' >/dev/null CREATE TABLE IF NOT EXISTS notes ( diff --git a/tests/TESTING.md b/tests/TESTING.md new file mode 100644 index 0000000..04c4910 --- /dev/null +++ b/tests/TESTING.md @@ -0,0 +1,186 @@ +# InsightReading 测试指南(后端为主) + +本项目目前主要是后端 FastAPI 的测试,分布在: + +- `backend/tests/`:后端单元 / 集成测试(官方主测试目录) +- `tests/`:基于后端 API 的额外集成测试 + +前端目前只有 ESLint 检查(无单元测试)。 + +--- + +## 一、测试环境要求 + +- Python:建议使用 **3.11**(与后端说明保持一致) +- 依赖: + - `pytest` + - `pytest-asyncio` + - `pytest-cov` + - 以及 `backend/pyproject.toml` / `backend/requirements-dev.txt` 中列出的依赖 + +### 1. 创建虚拟环境并安装依赖 + +在项目根目录: + +```bash +cd backend +python3.11 -m venv .venv +source .venv/bin/activate # Windows: .venv\Scripts\activate + +# 推荐:安装带 dev 额外依赖的后端包 +pip install -e ".[dev]" +# 或者使用 requirements-dev.txt +# pip install -r requirements-dev.txt +``` + +--- + +## 二、运行全部后端测试 + 覆盖率 + +在项目根目录或 `backend/` 下都可以运行(建议在项目根目录,方便同时覆盖 `backend/tests` 和根目录 `tests`): + +```bash +cd /path/to/InsightReading +source backend/.venv/bin/activate # 确保已激活虚拟环境 + +pytest --cov=app --cov-report=term-missing +``` + +说明: + +- `--cov=app`:统计 `backend/app` 包的代码覆盖率。 +- `--cov-report=term-missing`:在终端展示每个文件未覆盖的行号,便于补测试。 + +### 生成 HTML 覆盖率报告 + +如果希望用浏览器直观查看覆盖率: + +```bash +pytest --cov=app --cov-report=html +``` + +生成的报告默认在 `htmlcov/` 目录下(当前工作目录),用浏览器打开: + +```bash +open htmlcov/index.html # macOS +# 或者手动用浏览器打开该文件 +``` + +--- + +## 三、只运行部分测试 + +### 1. 只跑后端某个测试文件 + +示例:只跑后端库相关测试: + +```bash +pytest backend/tests/test_library.py +``` + +### 2. 只跑根目录 tests/ 下的集成测试 + +```bash +pytest tests +``` + +### 3. 只跑某个具体测试用例 + +```bash +pytest backend/tests/test_notes.py::test_notes_crud_flow +pytest tests/test_library.py::test_download_local_uploaded_paper +``` + +--- + +## 四、前端检查(Lint) + +前端当前只配置了 ESLint,用于静态检查: + +```bash +cd frontend +npm install # 第一次或依赖变更时执行 +npm run lint +``` + +如需增加前端单元测试,可以自行引入 Jest / Vitest 等测试框架,并在 `frontend/package.json` 中配置测试脚本。 + +--- + +## 五、常见问题与解决方法 + +### 1. 找不到 `async_client` fixture + +错误示例: + +```text +E fixture 'async_client' not found +``` + +原因: + +- 顶层 `tests/` 和 `backend/tests/` 里的测试都依赖一个名为 `async_client` 的异步客户端 fixture,它在对应目录的 `conftest.py` 中定义: + - `backend/tests/conftest.py` + - `tests/conftest.py` + +检查: + +- 确认你是从项目根目录运行 `pytest`,而不是在某个子目录下。 +- 确认本地未误改 / 删除上述两个 `conftest.py` 文件。 + +### 2. `ModuleNotFoundError: No module named 'pytest_asyncio'` + +错误示例: + +```text +ImportError while loading conftest +ModuleNotFoundError: No module named 'pytest_asyncio' +``` + +解决: + +```bash +source backend/.venv/bin/activate +pip install pytest-asyncio +``` + +建议始终使用 `pip install -e ".[dev]"` 或 `pip install -r requirements-dev.txt` 安装依赖,确保测试相关包完整安装。 + +### 3. 数据库 / MEDIA_ROOT 相关问题 + +- 测试使用 **独立的内存 SQLite** 数据库(不依赖真实 Postgres),在 `conftest.py` 中通过: + + ```python + TEST_DATABASE_URL = "sqlite+aiosqlite:///:memory:?cache=shared" + ``` + +- 媒体文件目录在测试中会被重定向到一个临时目录 `tmp_media`,并通过环境变量 `MEDIA_ROOT` 注入,无需手动创建。 + +如果看到与数据库连接或媒体目录相关的异常,优先检查: + +- 是否正确激活了虚拟环境; +- 是否误修改了 `conftest.py` 中的 `TEST_DATABASE_URL` 或 `MEDIA_ROOT` 设置。 + +--- + +## 六、推荐的日常使用方式 + +开发中常用命令总结(在项目根目录执行): + +```bash +# 1. 激活后端虚拟环境 +source backend/.venv/bin/activate + +# 2. 跑全部后端测试 + 覆盖率 +pytest --cov=app --cov-report=term-missing + +# 3. 调试某个模块时,单独跑对应文件或用例 +pytest backend/tests/test_notes.py::test_notes_crud_flow + +# 4. 前端代码改动后做静态检查 +cd frontend +npm run lint +``` + +根据需要,你可以在本文件中继续补充项目特有的测试约定或案例。 + diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..a95c502 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,63 @@ +"""Test fixtures shared by top-level integration tests.""" +from __future__ import annotations + +import asyncio +import os +import shutil +from pathlib import Path +from typing import AsyncIterator, Generator + +import pytest +import pytest_asyncio # type: ignore[import-not-found] +from httpx import ASGITransport, AsyncClient +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine + +from app.db.session import get_db +from app.models import user # noqa: F401 +from app.models.base import Base + +TEST_DATABASE_URL = "sqlite+aiosqlite:///:memory:?cache=shared" +TEST_MEDIA_ROOT = Path(__file__).resolve().parent / "tmp_media" +os.environ.setdefault("MEDIA_ROOT", str(TEST_MEDIA_ROOT)) + +from app.main import app + + +@pytest.fixture(scope="session") +def event_loop() -> Generator[asyncio.AbstractEventLoop, None, None]: + """Create a single event loop for the async tests.""" + + loop = asyncio.new_event_loop() + yield loop + loop.close() + + +@pytest_asyncio.fixture() +async def async_client() -> AsyncIterator[AsyncClient]: + """Yield an AsyncClient wired to an isolated SQLite database.""" + + engine = create_async_engine(TEST_DATABASE_URL, future=True, connect_args={"uri": True}) + + if TEST_MEDIA_ROOT.exists(): + shutil.rmtree(TEST_MEDIA_ROOT) + TEST_MEDIA_ROOT.mkdir(parents=True, exist_ok=True) + + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + + session_factory = async_sessionmaker(engine, expire_on_commit=False, class_=AsyncSession) + + async def override_get_db() -> AsyncIterator[AsyncSession]: + async with session_factory() as session: + yield session + + app.dependency_overrides[get_db] = override_get_db + + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as client: + yield client + + app.dependency_overrides.clear() + await engine.dispose() + shutil.rmtree(TEST_MEDIA_ROOT, ignore_errors=True) + diff --git a/tests/test_library.py b/tests/test_library.py new file mode 100644 index 0000000..cb19ffa --- /dev/null +++ b/tests/test_library.py @@ -0,0 +1,253 @@ +"""Integration tests for library folder and upload management endpoints.""" +from __future__ import annotations + +import os +from pathlib import Path + +import pytest +from httpx import AsyncClient + +MEDIA_ROOT = Path(os.environ["MEDIA_ROOT"]) + +pytestmark = pytest.mark.asyncio + + +async def _register_user(async_client: AsyncClient, email: str, password: str) -> None: + payload: dict[str, str] = {"email": email, "password": password} + response = await async_client.post("/api/v1/users", json=payload) + assert response.status_code == 201 + + +async def _login(async_client: AsyncClient, email: str, password: str) -> str: + response = await async_client.post( + "/api/v1/auth/token", + data={"username": email, "password": password}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + assert response.status_code == 200 + return response.json()["access_token"] + + +def _auth_headers(token: str) -> dict[str, str]: + return {"Authorization": f"Bearer {token}"} + + +async def _upload_pdf(async_client: AsyncClient, token: str, name: str = "sample.pdf") -> int: + pdf_bytes = b"%PDF-1.4\ncontent" + resp = await async_client.post( + "/api/v1/papers/upload", + headers=_auth_headers(token), + files={"file": (name, pdf_bytes, "application/pdf")}, + ) + assert resp.status_code == 201 + return resp.json()["id"] + + +async def test_library_folders_crud(async_client: AsyncClient) -> None: + await _register_user(async_client, "library@example.com", "StrongPass1") + token = await _login(async_client, "library@example.com", "StrongPass1") + + # Initially no folders + list_resp = await async_client.get( + "/api/v1/library/folders", + headers=_auth_headers(token), + ) + assert list_resp.status_code == 200 + data = list_resp.json() + assert data["folders"] == [] + assert data["unfiled_count"] == 0 + + # Create a folder + create_resp = await async_client.post( + "/api/v1/library/folders", + headers=_auth_headers(token), + json={"name": "Reading", "color": "#fff"}, + ) + assert create_resp.status_code == 201 + folder = create_resp.json() + assert folder["name"] == "Reading" + assert folder["paper_count"] == 0 + folder_id = folder["id"] + + # Rename folder + rename_resp = await async_client.patch( + f"/api/v1/library/folders/{folder_id}", + headers=_auth_headers(token), + json={"name": "Updated", "color": "#000"}, + ) + assert rename_resp.status_code == 200 + renamed = rename_resp.json() + assert renamed["name"] == "Updated" + assert renamed["paper_count"] == 0 + + # Delete folder (still empty) + delete_resp = await async_client.delete( + f"/api/v1/library/folders/{folder_id}", + headers=_auth_headers(token), + ) + assert delete_resp.status_code == 204 + + +async def test_delete_library_folder_blocked_when_not_empty(async_client: AsyncClient) -> None: + await _register_user(async_client, "library2@example.com", "StrongPass2") + token = await _login(async_client, "library2@example.com", "StrongPass2") + + # Create folder + create_folder = await async_client.post( + "/api/v1/library/folders", + headers=_auth_headers(token), + json={"name": "WithPapers", "color": None}, + ) + folder_id = create_folder.json()["id"] + + # Upload a PDF into this folder + pdf_bytes = b"%PDF-1.4\ncontent" + upload_resp = await async_client.post( + "/api/v1/papers/upload", + headers=_auth_headers(token), + files={"file": ("paper.pdf", pdf_bytes, "application/pdf")}, + data={"folder_id": str(folder_id)}, + ) + assert upload_resp.status_code == 201 + + # Attempt to delete folder should fail + delete_resp = await async_client.delete( + f"/api/v1/library/folders/{folder_id}", + headers=_auth_headers(token), + ) + assert delete_resp.status_code == 400 + assert delete_resp.json()["detail"] == "请先移动或删除文件夹中的文献" + + +async def test_assign_uploaded_paper_folder(async_client: AsyncClient) -> None: + await _register_user(async_client, "library3@example.com", "StrongPass3") + token = await _login(async_client, "library3@example.com", "StrongPass3") + + paper_id = await _upload_pdf(async_client, token) + + # Create folder and assign paper into it + create_folder = await async_client.post( + "/api/v1/library/folders", + headers=_auth_headers(token), + json={"name": "FolderA", "color": None}, + ) + folder_id = create_folder.json()["id"] + + assign_resp = await async_client.patch( + f"/api/v1/library/uploads/{paper_id}/folder", + headers=_auth_headers(token), + json={"folder_id": folder_id}, + ) + assert assign_resp.status_code == 200 + assigned = assign_resp.json() + assert assigned["folder_id"] == folder_id + + # Move back to unfiled + unassign_resp = await async_client.patch( + f"/api/v1/library/uploads/{paper_id}/folder", + headers=_auth_headers(token), + json={"folder_id": None}, + ) + assert unassign_resp.status_code == 200 + assert unassign_resp.json()["folder_id"] is None + + +async def test_assign_uploaded_paper_folder_not_found(async_client: AsyncClient) -> None: + await _register_user(async_client, "library4@example.com", "StrongPass4") + token = await _login(async_client, "library4@example.com", "StrongPass4") + + # Paper does not exist for this user + resp = await async_client.patch( + "/api/v1/library/uploads/999/folder", + headers=_auth_headers(token), + json={"folder_id": None}, + ) + assert resp.status_code == 404 + assert resp.json()["detail"] == "未找到对应的上传文件" + + +async def test_download_local_uploaded_paper(async_client: AsyncClient) -> None: + await _register_user(async_client, "library5@example.com", "StrongPass5") + token = await _login(async_client, "library5@example.com", "StrongPass5") + + pdf_bytes = b"%PDF-1.4\ncontent" + upload_resp = await async_client.post( + "/api/v1/papers/upload", + headers=_auth_headers(token), + files={"file": ("local.pdf", pdf_bytes, "application/pdf")}, + ) + assert upload_resp.status_code == 201 + paper_id = upload_resp.json()["id"] + + # GET download should return the file + download_resp = await async_client.get( + f"/api/v1/library/uploads/{paper_id}/download", + headers=_auth_headers(token), + ) + assert download_resp.status_code == 200 + assert download_resp.headers.get("content-type", "").startswith("application/pdf") + + # HEAD should also succeed + head_resp = await async_client.head( + f"/api/v1/library/uploads/{paper_id}/download", + headers=_auth_headers(token), + ) + assert head_resp.status_code == 200 + + +async def test_download_remote_uploaded_paper(monkeypatch: pytest.MonkeyPatch, async_client: AsyncClient) -> None: + await _register_user(async_client, "library6@example.com", "StrongPass6") + token = await _login(async_client, "library6@example.com", "StrongPass6") + + # Create a placeholder upload with remote URL via import endpoint + payload = { + "title": "Remote Paper", + "abstract": "Remote", + "doi": None, + "arxiv_id": None, + "pdf_url": "https://example.com/remote.pdf", + "folder_id": None, + } + import_resp = await async_client.post( + "/api/v1/papers/import", + headers=_auth_headers(token), + json=payload, + ) + assert import_resp.status_code == 201 + uploaded = import_resp.json()["uploaded"] + paper_id = uploaded["id"] + + # Patch httpx client used in library download to avoid network + from app.api.v1.endpoints import library as library_endpoint + + class DummyResponse: + def __init__(self, content: bytes) -> None: + self.content = content + + def raise_for_status(self) -> None: + return None + + class DummyClient: + def __init__(self, *args, **kwargs) -> None: + self.args = args + self.kwargs = kwargs + + async def __aenter__(self) -> "DummyClient": + return self + + async def __aexit__(self, exc_type, exc, tb) -> None: # noqa: D401 + return None + + async def get(self, url: str): + assert url == "https://example.com/remote.pdf" + return DummyResponse(b"%PDF-1.4\nremote") + + monkeypatch.setattr(library_endpoint.httpx, "AsyncClient", DummyClient) + + download_resp = await async_client.get( + f"/api/v1/library/uploads/{paper_id}/download", + headers=_auth_headers(token), + ) + assert download_resp.status_code == 200 + assert download_resp.headers.get("content-type", "").startswith("application/pdf") + diff --git a/tests/test_notes.py b/tests/test_notes.py new file mode 100644 index 0000000..2c5bcd9 --- /dev/null +++ b/tests/test_notes.py @@ -0,0 +1,170 @@ +"""Integration tests for notes endpoints.""" +from __future__ import annotations + +import pytest +from httpx import AsyncClient + +pytestmark = pytest.mark.asyncio + + +async def _register_user(async_client: AsyncClient, email: str, password: str) -> None: + payload: dict[str, str] = {"email": email, "password": password} + response = await async_client.post("/api/v1/users", json=payload) + assert response.status_code == 201 + + +async def _login(async_client: AsyncClient, email: str, password: str) -> str: + response = await async_client.post( + "/api/v1/auth/token", + data={"username": email, "password": password}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + assert response.status_code == 200 + return response.json()["access_token"] + + +def _auth_headers(token: str) -> dict[str, str]: + return {"Authorization": f"Bearer {token}"} + + +async def _create_note( + async_client: AsyncClient, + token: str, + title: str = "My Note", + content: str | None = "content", + uploaded_paper_id: int | None = None, +) -> int: + resp = await async_client.post( + "/api/v1/notes", + headers=_auth_headers(token), + json={ + "title": title, + "content": content, + "uploaded_paper_id": uploaded_paper_id, + "tags": ["tag1", "tag2"], + }, + ) + assert resp.status_code == 201 + return resp.json()["id"] + + +async def test_notes_crud_flow(async_client: AsyncClient) -> None: + await _register_user(async_client, "notes@example.com", "StrongPass1") + token = await _login(async_client, "notes@example.com", "StrongPass1") + + # Initially no notes + list_resp = await async_client.get( + "/api/v1/notes", + headers=_auth_headers(token), + ) + assert list_resp.status_code == 200 + data = list_resp.json() + assert data["items"] == [] + assert data["total"] == 0 + + # Create a note + note_id = await _create_note(async_client, token, title="First", content="hello world") + + # List again + list_resp2 = await async_client.get( + "/api/v1/notes", + headers=_auth_headers(token), + ) + data2 = list_resp2.json() + assert data2["total"] == 1 + assert len(data2["items"]) == 1 + assert data2["items"][0]["title"] == "First" + + # Get detail + detail_resp = await async_client.get( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + ) + assert detail_resp.status_code == 200 + detail = detail_resp.json() + assert detail["id"] == note_id + assert detail["title"] == "First" + assert detail["content"] == "hello world" + assert detail["tags"] == ["tag1", "tag2"] + + # Update note + update_resp = await async_client.patch( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + json={"title": "Updated", "content": "updated content"}, + ) + assert update_resp.status_code == 200 + updated = update_resp.json() + assert updated["title"] == "Updated" + assert updated["content"] == "updated content" + + # Delete note + delete_resp = await async_client.delete( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + ) + assert delete_resp.status_code == 204 + + # Fetching deleted note should 404 + not_found = await async_client.get( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + ) + assert not_found.status_code == 404 + assert not_found.json()["detail"] == "笔记不存在" + + +async def test_create_note_with_invalid_uploaded_paper_id(async_client: AsyncClient) -> None: + await _register_user(async_client, "notes2@example.com", "StrongPass2") + token = await _login(async_client, "notes2@example.com", "StrongPass2") + + # Referencing a non-existent uploaded paper should 404 + resp = await async_client.post( + "/api/v1/notes", + headers=_auth_headers(token), + json={ + "title": "Linked", + "content": "content", + "uploaded_paper_id": 999, + "tags": [], + }, + ) + assert resp.status_code == 404 + assert resp.json()["detail"] == "未找到关联的上传论文" + + +async def test_update_note_with_invalid_uploaded_paper_id(async_client: AsyncClient) -> None: + await _register_user(async_client, "notes3@example.com", "StrongPass3") + token = await _login(async_client, "notes3@example.com", "StrongPass3") + + note_id = await _create_note(async_client, token, title="To update", content=None) + + resp = await async_client.patch( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + json={"uploaded_paper_id": 12345}, + ) + assert resp.status_code == 404 + assert resp.json()["detail"] == "未找到关联的上传论文" + + +async def test_list_notes_with_filters(async_client: AsyncClient) -> None: + await _register_user(async_client, "notes4@example.com", "StrongPass4") + token = await _login(async_client, "notes4@example.com", "StrongPass4") + + # Create two notes + await _create_note(async_client, token, title="Deep Learning", content="about transformers") + await _create_note(async_client, token, title="Classical Mechanics", content="physics") + + # Search filter + list_resp = await async_client.get( + "/api/v1/notes", + headers=_auth_headers(token), + params={"search": "Deep"}, + ) + assert list_resp.status_code == 200 + data = list_resp.json() + assert data["total"] == 1 + assert len(data["items"]) == 1 + assert data["items"][0]["title"] == "Deep Learning" + From 9ac4b7569505aa2aadeba374c880333e03cbb38d Mon Sep 17 00:00:00 2001 From: Xujie Han <1498126985@qq.com> Date: Mon, 8 Dec 2025 15:40:39 +0800 Subject: [PATCH 6/6] stash --- backend/tests/test_mineru_comprehensive.py | 32 ++++++++++++++++------ 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/backend/tests/test_mineru_comprehensive.py b/backend/tests/test_mineru_comprehensive.py index 936828e..613d117 100644 --- a/backend/tests/test_mineru_comprehensive.py +++ b/backend/tests/test_mineru_comprehensive.py @@ -14,6 +14,8 @@ allow_module_level=True, ) +pytestmark = pytest.mark.asyncio + logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' @@ -23,6 +25,26 @@ from app.services.mineru_cli import MineruCLIError, parse_pdf_async +TEST_MEDIA_DIR = Path(__file__).resolve().parent / "tmp_media" +TEST_FILES: List[Path] = [ + TEST_MEDIA_DIR / "sample_table.pdf", + TEST_MEDIA_DIR / "complex_test.pdf", +] + + +@pytest.fixture(params=TEST_FILES) +def pdf_path(request: pytest.FixtureRequest) -> Path: + """Provide test PDF paths for MinerU E2E tests. + + Skips the test gracefully if the corresponding file does not exist. + """ + + path: Path = request.param + if not path.exists(): + pytest.skip(f"Test PDF not found: {path}") + return path + + def analyze_content_types(content_list: List[Dict[str, Any]]) -> Dict[str, int]: """Analyze and count different content types.""" type_counts = {} @@ -250,15 +272,9 @@ async def main(): logger.info("MinerU Parser - Comprehensive Test Suite") logger.info("="*80) - # Test files - test_files = [ - Path('backend/tests/tmp_media/sample_table.pdf'), - Path('backend/tests/tmp_media/complex_test.pdf'), - ] - # Check if files exist - available_files = [f for f in test_files if f.exists()] - missing_files = [f for f in test_files if not f.exists()] + available_files = [f for f in TEST_FILES if f.exists()] + missing_files = [f for f in TEST_FILES if not f.exists()] if missing_files: logger.warning(f"\n⚠️ Missing test files:")