diff --git a/.gitignore b/.gitignore index 1e929b4..0bf6d49 100644 --- a/.gitignore +++ b/.gitignore @@ -101,6 +101,7 @@ backend/app/media/* # LLM提供的问题解决方案 solutions/ tmp/ +htmlcov #测试报告 .claude/ openai.json scripts/dev.sh diff --git a/backend/.coverage b/backend/.coverage index 065f944..e3997df 100644 Binary files a/backend/.coverage and b/backend/.coverage differ diff --git a/backend/app/api/v1/__init__.py b/backend/app/api/v1/__init__.py index 2cd794b..ca8e43f 100644 --- a/backend/app/api/v1/__init__.py +++ b/backend/app/api/v1/__init__.py @@ -17,3 +17,4 @@ api_router.include_router(library.router, prefix="/library", tags=["library"]) + \ No newline at end of file diff --git a/backend/app/api/v1/endpoints/papers.py b/backend/app/api/v1/endpoints/papers.py index 9f31ed7..d799b12 100644 --- a/backend/app/api/v1/endpoints/papers.py +++ b/backend/app/api/v1/endpoints/papers.py @@ -173,6 +173,8 @@ async def upload_paper( original_display_name = normalize_original_filename(file.filename) storage_candidate = sanitize_storage_filename(original_display_name) + # Ensure the upload directory exists even if MEDIA_ROOT is cleaned between tests. + UPLOAD_DIR.mkdir(parents=True, exist_ok=True) stored_filename, destination = ensure_unique_storage_name(UPLOAD_DIR, storage_candidate) await asyncio.to_thread(destination.write_bytes, cleaned_bytes) diff --git a/backend/tests/test_academic.py b/backend/tests/test_academic.py new file mode 100644 index 0000000..1e6d3b0 --- /dev/null +++ b/backend/tests/test_academic.py @@ -0,0 +1,378 @@ +"""Tests for academic search endpoints.""" +from __future__ import annotations + +import pytest +from httpx import AsyncClient + +from app.schemas.academic import ( + ChatMessage, + IntelligentAnswer, + IntelligentSearchRequest, + IntelligentSearchResponse, + Paper, + PaperSearchRequest, + PaperSearchResponse, + QueryInterpretation, +) + +pytestmark = pytest.mark.asyncio + + +class DummyAcademicSearchService: + async def search(self, request, sources=None): + assert request.query == "transformer" + papers = [ + { + "id": "p1", + "title": "Attention Is All You Need", + "authors": ["Vaswani et al."], + "abstract": "We propose the Transformer...", + "year": 2017, + "venue": "NIPS", + "url": "https://example.com/p1", + "citation_count": 12345, + "source": "semantic_scholar", + }, + ] + return { + "papers": papers, + "total": 1, + "sources_used": sources or ["semantic_scholar"], + "source_errors": None, + "hits_per_source": {"semantic_scholar": 1}, + } + + +class DummyIntelligentSearchService: + async def search(self, request: IntelligentSearchRequest) -> IntelligentSearchResponse: + paper = Paper( + id="p2", + title="Survey of Transformers", + authors=["Alice"], + abstract="A survey on transformers.", + year=2020, + venue="ICLR", + url="https://example.com/p2", + citation_count=100, + source="semantic_scholar", + ) + interpretation = QueryInterpretation( + original_query=request.query, + intent="search", + needs_search=True, + ) + answer = IntelligentAnswer( + response="This is a dummy intelligent answer.", + ) + return IntelligentSearchResponse( + papers=[paper], + total=1, + interpretation=interpretation, + answer=answer, + conversation=[ChatMessage(role="user", content=request.query)], + search_performed=True, + search_reason="dummy", + ) + + +async def test_academic_search_uses_service(monkeypatch: pytest.MonkeyPatch, async_client: AsyncClient) -> None: + from app.api.v1.endpoints import academic as academic_endpoint + + monkeypatch.setattr(academic_endpoint, "AcademicSearchService", lambda: DummyAcademicSearchService()) + + payload = {"query": "transformer", "limit": 10} + + response = await async_client.post("/api/v1/academic/search", json=payload) + + assert response.status_code == 200 + data = PaperSearchResponse(**response.json()) + assert data.total == 1 + assert len(data.papers) == 1 + assert data.papers[0].title == "Attention Is All You Need" + assert data.sources_used == ["semantic_scholar"] + assert data.source_errors is None + + +async def test_intelligent_search_returns_plain_response_without_auth( + monkeypatch: pytest.MonkeyPatch, + async_client: AsyncClient, +) -> None: + from app.api.v1.endpoints import academic as academic_endpoint + + monkeypatch.setattr(academic_endpoint, "IntelligentSearchService", lambda: DummyIntelligentSearchService()) + + payload = {"query": "large language models", "limit": 5} + + response = await async_client.post("/api/v1/academic/intelligent-search", json=payload) + + assert response.status_code == 200 + data = IntelligentSearchResponse(**response.json()) + assert data.total == 1 + assert len(data.papers) == 1 + assert data.papers[0].title == "Survey of Transformers" + assert data.answer.response == "This is a dummy intelligent answer." + # When user is anonymous, the endpoint should not add conversation_id. + assert data.conversation_id is None + + +async def test_intelligent_search_value_error_returns_400( + monkeypatch: pytest.MonkeyPatch, + async_client: AsyncClient, +) -> None: + from app.api.v1.endpoints import academic as academic_endpoint + + class FailingService: + async def search(self, request: IntelligentSearchRequest) -> IntelligentSearchResponse: # pragma: no cover - behavior via HTTP + raise ValueError("Invalid intelligent search request") + + monkeypatch.setattr(academic_endpoint, "IntelligentSearchService", lambda: FailingService()) + + payload = {"query": "", "limit": 5} + + response = await async_client.post("/api/v1/academic/intelligent-search", json=payload) + + assert response.status_code == 400 + assert response.json()["detail"] == "Invalid intelligent search request" + + +async def test_academic_search_service_aggregates_and_scores(monkeypatch: pytest.MonkeyPatch) -> None: + from app.services.academic import search_service + + service = search_service.AcademicSearchService() + + class DummySemantic: + async def search(self, query: str, venue=None, year=None, limit: int = 20): # noqa: D401 + assert query == "deep learning for nlp" + return [ + { + "paper_id": "s1", + "title": "Deep Learning for NLP", + "authors": ["Alice"], + "abstract": "A study on deep learning in NLP.", + "year": 2020, + "venue": "ACL", + "url": "https://example.com/s1", + "citation_count": 10, + "source": "semantic_scholar", + }, + { + "paper_id": "s2", + "title": "Duplicate Paper", + "authors": ["Bob"], + "abstract": "Another paper.", + "year": 2019, + "venue": "EMNLP", + "url": "https://example.com/s2", + "citation_count": 5, + "source": "semantic_scholar", + }, + ] + + class DummyArxiv: + async def search(self, query: str, category=None, max_results: int = 20): # noqa: D401 + # Domain should map to cs.CL + assert category == "cs.CL" + return [ + { + "paper_id": "a1", + "title": "Deep Learning for NLP", + "authors": ["Alice"], + "abstract": "Same title from arXiv.", + "year": 2021, + "venue": "NeurIPS", + "url": "https://example.com/a1", + "citation_count": 3, + "source": "arxiv", + } + ] + + service.providers = { + "semantic_scholar": DummySemantic(), + "arxiv": DummyArxiv(), + "openalex": DummySemantic(), + } + + request = PaperSearchRequest(query="deep learning for nlp", domain="nlp", limit=10) + result = await service.search(request, sources=["semantic_scholar", "arxiv"]) + + assert result["total"] == 2 # duplicate title deduped across sources + assert len(result["papers"]) == 2 + assert set(result["sources_used"]) == {"semantic_scholar", "arxiv"} + assert result["source_errors"] is None + for paper in result["papers"]: + assert 0.0 <= paper.get("score", 0.0) <= 1.0 + + +async def test_intelligent_legacy_search_uses_query_parser_and_academic_service() -> None: + from app.services.academic.intelligent_service import IntelligentSearchService + + class DummyLLM: + def __init__(self) -> None: + self.is_configured = False + + class DummyQueryParser: + async def parse(self, text: str) -> QueryInterpretation: # noqa: D401 + return QueryInterpretation( + original_query=text, + intent="search", + needs_search=True, + keywords=["transformer", "models"], + authors=[], + venue=None, + year_range=[2020, 2023], + domain="nlp", + follow_up_questions=[], + ) + + class DummyAcademicService: + async def search(self, request: PaperSearchRequest, sources=None): # noqa: D401 + assert request.query == "transformer models" + assert request.year == 2023 + papers = [ + { + "id": "p1", + "title": "Transformer Models", + "authors": ["Alice"], + "abstract": "About transformers.", + "year": 2023, + "venue": "NeurIPS", + "url": "https://example.com/p1", + "citation_count": 42, + "source": "semantic_scholar", + } + ] + return { + "papers": papers, + "total": 1, + "sources_used": sources or ["semantic_scholar"], + "source_errors": None, + "hits_per_source": {"semantic_scholar": 1}, + } + + service = IntelligentSearchService( + academic_service=DummyAcademicService(), + llm_client=DummyLLM(), + query_parser=DummyQueryParser(), + ) + + request = IntelligentSearchRequest(query="transformer models", limit=5, sources=["semantic_scholar"]) + response = await service.search(request) + + assert response.total == 1 + assert len(response.papers) == 1 + assert response.papers[0].title == "Transformer Models" + assert response.interpretation.intent == "search" + assert response.search_performed is True + assert response.search_metadata is not None + assert response.search_metadata.total_results == 1 + assert isinstance(response.answer.response, str) and response.answer.response + + +def test_academic_domain_mapping() -> None: + from app.services.academic.search_service import AcademicSearchService + + assert AcademicSearchService._map_domain_to_arxiv_category(None) is None + assert AcademicSearchService._map_domain_to_arxiv_category("NLP") == "cs.CL" + assert AcademicSearchService._map_domain_to_arxiv_category("computer vision") == "cs.CV" + # "domain" 中包含 "ai",会被映射到 cs.AI + assert AcademicSearchService._map_domain_to_arxiv_category("unknown domain") == "cs.AI" + + +def test_semantic_scholar_normalize_paper() -> None: + from app.services.academic.providers.semantic_scholar import SemanticScholarProvider + + provider = SemanticScholarProvider() + raw = { + "paperId": "ABC123", + "title": "Sample Paper", + "authors": [{"name": "Alice"}, {"name": "Bob"}], + "abstract": "Test abstract", + "year": 2024, + "venue": "ICLR", + "url": "https://example.com", + "citationCount": 10, + "publicationDate": "2024-01-01", + "externalIds": {"DOI": "10.1000/xyz"}, + "tldr": {"text": "Short summary"}, + } + normalized = provider.normalize_paper(raw) + assert normalized["paper_id"] == "ABC123" + assert normalized["authors"] == ["Alice", "Bob"] + assert normalized["citation_count"] == 10 + assert normalized["source"] == "semantic_scholar" + + +def test_openalex_normalize_result_and_abstract() -> None: + from app.services.academic.providers.openalex import OpenAlexProvider + + provider = OpenAlexProvider() + inverted_index = { + "deep": [0], + "learning": [1], + "models": [2], + } + raw = { + "id": "https://openalex.org/W123", + "title": "Deep Learning Models", + "authorships": [{"author": {"display_name": "Alice"}}, {"author": {"display_name": "Bob"}}], + "abstract_inverted_index": inverted_index, + "publication_year": 2022, + "publication_date": "2022-01-01", + "cited_by_count": 5, + "ids": {"doi": "10.1000/xyz"}, + "concepts": [{"display_name": "Machine learning"}], + "locations": [ + { + "source": {"display_name": "ICLR"}, + "landing_page_url": "https://example.com/paper", + } + ], + } + normalized = provider._normalize_result(raw) + assert normalized["paper_id"] == "W123" + assert normalized["authors"] == ["Alice", "Bob"] + assert normalized["abstract"] == "deep learning models" + assert normalized["venue"] == "ICLR" + assert normalized["url"] == "https://example.com/paper" + assert normalized["source"] == "openalex" + + +def test_arxiv_provider_build_query_and_parse_xml() -> None: + from app.services.academic.providers.arxiv import ArxivProvider + + provider = ArxivProvider() + query = provider._build_query(' "deep learning" transformer ', category="cs.LG") + assert "all:\"deep learning\"" in query + assert "all:transformer" in query + assert "+AND+cat:cs.LG" in query + + xml_content = """ + + + http://arxiv.org/abs/2101.00001v1 + Sample Title + Sample abstract. + Alice + 2021-01-01T00:00:00Z + 2021-01-02T00:00:00Z + + + + + Accepted to ICML 2021 + ICML 2021 + 10.1000/xyz + + + """.strip() + + papers = provider._parse_xml_response(xml_content) + assert len(papers) == 1 + paper = papers[0] + assert paper["paper_id"] == "2101.00001" + assert paper["title"] == "Sample Title" + assert paper["authors"] == ["Alice"] + assert paper["year"] == 2021 + assert paper["url"] == "https://arxiv.org/abs/2101.00001" diff --git a/backend/tests/test_auth.py b/backend/tests/test_auth.py index 562f898..b650d65 100644 --- a/backend/tests/test_auth.py +++ b/backend/tests/test_auth.py @@ -1,9 +1,13 @@ """Integration tests for user registration and authentication flows.""" from __future__ import annotations +from urllib.parse import parse_qs, urlparse + import pytest from httpx import AsyncClient, Response +from app.api.v1.endpoints import auth as auth_endpoint + pytestmark = pytest.mark.asyncio @@ -93,4 +97,88 @@ async def test_get_current_user_success(async_client: AsyncClient) -> None: me_data = me_response.json() assert me_data["email"] == "eric@example.com" assert me_data["full_name"] == "Eric" - assert me_data["is_active"] is True \ No newline at end of file + assert me_data["is_active"] is True + + +async def test_github_login_unconfigured(monkeypatch: pytest.MonkeyPatch, async_client: AsyncClient) -> None: + # Ensure GitHub OAuth is seen as not configured + auth_endpoint.settings.github_client_id = None + auth_endpoint.settings.github_client_secret = None + + response = await async_client.get("/api/v1/auth/github/login") + + assert response.status_code == 503 + assert response.json()["detail"] == "GitHub OAuth 未配置" + + +async def test_github_login_sets_state_cookie(monkeypatch: pytest.MonkeyPatch, async_client: AsyncClient) -> None: + # Configure dummy GitHub OAuth credentials + auth_endpoint.settings.github_client_id = "dummy-client-id" + auth_endpoint.settings.github_client_secret = "dummy-client-secret" + + response = await async_client.get("/api/v1/auth/github/login", params={"next": "/dashboard"}) + + # Should redirect to GitHub authorize URL + assert response.status_code == 302 + location = response.headers["location"] + parsed = urlparse(location) + assert parsed.netloc == "github.com" + assert parsed.path == "/login/oauth/authorize" + query = parse_qs(parsed.query) + assert query.get("client_id") == ["dummy-client-id"] + assert "state" in query + + # State cookie should be set for CSRF protection + state_cookie = response.cookies.get(auth_endpoint.GITHUB_STATE_COOKIE) + assert state_cookie is not None + + +async def test_github_callback_success_creates_user_and_redirects( + monkeypatch: pytest.MonkeyPatch, + async_client: AsyncClient, +) -> None: + # Configure dummy GitHub OAuth credentials + auth_endpoint.settings.github_client_id = "dummy-client-id" + auth_endpoint.settings.github_client_secret = "dummy-client-secret" + + # Stub out external GitHub calls + async def fake_exchange(code: str, redirect_uri: str) -> dict[str, str]: + return {"access_token": "gh-token"} + + async def fake_fetch_profile(access_token: str) -> tuple[dict[str, str], str | None]: + return ( + {"id": 123, "name": "Git User", "login": "gituser", "avatar_url": "http://avatar"}, + "git@example.com", + ) + + monkeypatch.setattr(auth_endpoint, "_exchange_github_code_for_token", fake_exchange) + monkeypatch.setattr(auth_endpoint, "_fetch_github_profile", fake_fetch_profile) + + import json as _json + + # Manually set the OAuth state cookie with a known nonce + state = "test-nonce" + cookie_payload = _json.dumps({"nonce": state, "next": "/dashboard"}) + async_client.cookies.set(auth_endpoint.GITHUB_STATE_COOKIE, cookie_payload) + + # Then call callback with matching state and a dummy code + callback_resp = await async_client.get( + "/api/v1/auth/github/callback", + params={"code": "dummy-code", "state": state}, + ) + + assert callback_resp.status_code == 302 + redirect_url = callback_resp.headers["location"] + parsed = urlparse(redirect_url) + query = parse_qs(parsed.query) + # Should redirect to frontend with token and provider + assert "token" in query + assert query.get("provider") == ["github"] + assert query.get("next") == ["/dashboard"] + + token = query["token"][0] + # Token should be valid for /users/me + me_resp = await get_me(async_client, token) + assert me_resp.status_code == 200 + me_data = me_resp.json() + assert me_data["email"] == "git@example.com" diff --git a/backend/tests/test_conversations.py b/backend/tests/test_conversations.py new file mode 100644 index 0000000..c03fd92 --- /dev/null +++ b/backend/tests/test_conversations.py @@ -0,0 +1,176 @@ +"""Tests for conversation API endpoints.""" +from __future__ import annotations + +import pytest +from httpx import AsyncClient + +pytestmark = pytest.mark.asyncio + + +async def _register_user(async_client: AsyncClient, email: str, password: str) -> None: + payload: dict[str, str] = {"email": email, "password": password} + response = await async_client.post("/api/v1/users", json=payload) + assert response.status_code == 201 + + +async def _login(async_client: AsyncClient, email: str, password: str) -> str: + response = await async_client.post( + "/api/v1/auth/token", + data={"username": email, "password": password}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + assert response.status_code == 200 + return response.json()["access_token"] + + +def _auth_headers(token: str) -> dict[str, str]: + return {"Authorization": f"Bearer {token}"} + + +async def test_create_and_get_conversation(async_client: AsyncClient) -> None: + await _register_user(async_client, "conv-user@example.com", "StrongPass1") + token = await _login(async_client, "conv-user@example.com", "StrongPass1") + + create_payload = {"title": "My first conversation"} + create_resp = await async_client.post( + "/api/v1/conversations/", + json=create_payload, + headers=_auth_headers(token), + ) + + assert create_resp.status_code == 201 + conv_data = create_resp.json() + assert conv_data["title"] == "My first conversation" + assert conv_data["message_count"] is None + assert conv_data["last_message_preview"] is None + conv_id = conv_data["id"] + + get_resp = await async_client.get( + f"/api/v1/conversations/{conv_id}", + headers=_auth_headers(token), + ) + + assert get_resp.status_code == 200 + detail = get_resp.json() + assert detail["id"] == conv_id + assert detail["title"] == "My first conversation" + assert detail["messages"] == [] + + +async def test_list_conversations_includes_message_metadata(async_client: AsyncClient) -> None: + await _register_user(async_client, "conv-list@example.com", "StrongPass2") + token = await _login(async_client, "conv-list@example.com", "StrongPass2") + + # Create two conversations + for idx in range(2): + resp = await async_client.post( + "/api/v1/conversations/", + json={"title": f"Conversation {idx}"}, + headers=_auth_headers(token), + ) + assert resp.status_code == 201 + conv_id = resp.json()["id"] + # Add one message to each conversation + msg_resp = await async_client.post( + f"/api/v1/conversations/{conv_id}/messages", + json={"role": "user", "content": f"hello {idx}"}, + headers=_auth_headers(token), + ) + assert msg_resp.status_code == 201 + + list_resp = await async_client.get( + "/api/v1/conversations/", + headers=_auth_headers(token), + ) + + assert list_resp.status_code == 200 + data = list_resp.json() + assert data["total"] == 2 + assert len(data["conversations"]) == 2 + for conv in data["conversations"]: + assert conv["message_count"] == 1 + assert conv["last_message_preview"].startswith("hello ") + + +async def test_update_conversation_updates_title_and_metadata(async_client: AsyncClient) -> None: + await _register_user(async_client, "conv-update@example.com", "StrongPass3") + token = await _login(async_client, "conv-update@example.com", "StrongPass3") + + create_resp = await async_client.post( + "/api/v1/conversations/", + json={"title": "Old title"}, + headers=_auth_headers(token), + ) + conv_id = create_resp.json()["id"] + + # Add a message so that last_message_preview is populated + msg_resp = await async_client.post( + f"/api/v1/conversations/{conv_id}/messages", + json={"role": "assistant", "content": "This is the last message preview."}, + headers=_auth_headers(token), + ) + assert msg_resp.status_code == 201 + + update_resp = await async_client.patch( + f"/api/v1/conversations/{conv_id}", + json={"title": "New title"}, + headers=_auth_headers(token), + ) + + assert update_resp.status_code == 200 + updated = update_resp.json() + assert updated["title"] == "New title" + assert updated["message_count"] == 1 + assert "This is the last message preview." in updated["last_message_preview"] + + +async def test_delete_conversation_marks_deleted(async_client: AsyncClient) -> None: + await _register_user(async_client, "conv-delete@example.com", "StrongPass4") + token = await _login(async_client, "conv-delete@example.com", "StrongPass4") + + create_resp = await async_client.post( + "/api/v1/conversations/", + json={"title": "To be deleted"}, + headers=_auth_headers(token), + ) + conv_id = create_resp.json()["id"] + + delete_resp = await async_client.delete( + f"/api/v1/conversations/{conv_id}", + headers=_auth_headers(token), + ) + assert delete_resp.status_code == 204 + + # After deletion, getting the conversation should return 404 + get_resp = await async_client.get( + f"/api/v1/conversations/{conv_id}", + headers=_auth_headers(token), + ) + assert get_resp.status_code == 404 + assert get_resp.json()["detail"] == "对话不存在或无权访问" + + +async def test_add_message_requires_ownership(async_client: AsyncClient) -> None: + # User A creates a conversation + await _register_user(async_client, "owner@example.com", "StrongPass5") + owner_token = await _login(async_client, "owner@example.com", "StrongPass5") + create_resp = await async_client.post( + "/api/v1/conversations/", + json={"title": "Owner conversation"}, + headers=_auth_headers(owner_token), + ) + conv_id = create_resp.json()["id"] + + # User B tries to post message to A's conversation + await _register_user(async_client, "intruder@example.com", "StrongPass6") + intruder_token = await _login(async_client, "intruder@example.com", "StrongPass6") + + msg_resp = await async_client.post( + f"/api/v1/conversations/{conv_id}/messages", + json={"role": "user", "content": "I should not be allowed."}, + headers=_auth_headers(intruder_token), + ) + + assert msg_resp.status_code == 404 + assert msg_resp.json()["detail"] == "对话不存在或无权访问" + diff --git a/backend/tests/test_health.py b/backend/tests/test_health.py new file mode 100644 index 0000000..2fd899e --- /dev/null +++ b/backend/tests/test_health.py @@ -0,0 +1,15 @@ +"""Integration tests for the health check endpoint.""" +from __future__ import annotations + +import pytest +from httpx import AsyncClient + +pytestmark = pytest.mark.asyncio + + +async def test_health_endpoint_returns_ok(async_client: AsyncClient) -> None: + response = await async_client.get("/api/v1/health") + + assert response.status_code == 200 + assert response.json() == {"status": "ok"} + diff --git a/backend/tests/test_library.py b/backend/tests/test_library.py new file mode 100644 index 0000000..cb19ffa --- /dev/null +++ b/backend/tests/test_library.py @@ -0,0 +1,253 @@ +"""Integration tests for library folder and upload management endpoints.""" +from __future__ import annotations + +import os +from pathlib import Path + +import pytest +from httpx import AsyncClient + +MEDIA_ROOT = Path(os.environ["MEDIA_ROOT"]) + +pytestmark = pytest.mark.asyncio + + +async def _register_user(async_client: AsyncClient, email: str, password: str) -> None: + payload: dict[str, str] = {"email": email, "password": password} + response = await async_client.post("/api/v1/users", json=payload) + assert response.status_code == 201 + + +async def _login(async_client: AsyncClient, email: str, password: str) -> str: + response = await async_client.post( + "/api/v1/auth/token", + data={"username": email, "password": password}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + assert response.status_code == 200 + return response.json()["access_token"] + + +def _auth_headers(token: str) -> dict[str, str]: + return {"Authorization": f"Bearer {token}"} + + +async def _upload_pdf(async_client: AsyncClient, token: str, name: str = "sample.pdf") -> int: + pdf_bytes = b"%PDF-1.4\ncontent" + resp = await async_client.post( + "/api/v1/papers/upload", + headers=_auth_headers(token), + files={"file": (name, pdf_bytes, "application/pdf")}, + ) + assert resp.status_code == 201 + return resp.json()["id"] + + +async def test_library_folders_crud(async_client: AsyncClient) -> None: + await _register_user(async_client, "library@example.com", "StrongPass1") + token = await _login(async_client, "library@example.com", "StrongPass1") + + # Initially no folders + list_resp = await async_client.get( + "/api/v1/library/folders", + headers=_auth_headers(token), + ) + assert list_resp.status_code == 200 + data = list_resp.json() + assert data["folders"] == [] + assert data["unfiled_count"] == 0 + + # Create a folder + create_resp = await async_client.post( + "/api/v1/library/folders", + headers=_auth_headers(token), + json={"name": "Reading", "color": "#fff"}, + ) + assert create_resp.status_code == 201 + folder = create_resp.json() + assert folder["name"] == "Reading" + assert folder["paper_count"] == 0 + folder_id = folder["id"] + + # Rename folder + rename_resp = await async_client.patch( + f"/api/v1/library/folders/{folder_id}", + headers=_auth_headers(token), + json={"name": "Updated", "color": "#000"}, + ) + assert rename_resp.status_code == 200 + renamed = rename_resp.json() + assert renamed["name"] == "Updated" + assert renamed["paper_count"] == 0 + + # Delete folder (still empty) + delete_resp = await async_client.delete( + f"/api/v1/library/folders/{folder_id}", + headers=_auth_headers(token), + ) + assert delete_resp.status_code == 204 + + +async def test_delete_library_folder_blocked_when_not_empty(async_client: AsyncClient) -> None: + await _register_user(async_client, "library2@example.com", "StrongPass2") + token = await _login(async_client, "library2@example.com", "StrongPass2") + + # Create folder + create_folder = await async_client.post( + "/api/v1/library/folders", + headers=_auth_headers(token), + json={"name": "WithPapers", "color": None}, + ) + folder_id = create_folder.json()["id"] + + # Upload a PDF into this folder + pdf_bytes = b"%PDF-1.4\ncontent" + upload_resp = await async_client.post( + "/api/v1/papers/upload", + headers=_auth_headers(token), + files={"file": ("paper.pdf", pdf_bytes, "application/pdf")}, + data={"folder_id": str(folder_id)}, + ) + assert upload_resp.status_code == 201 + + # Attempt to delete folder should fail + delete_resp = await async_client.delete( + f"/api/v1/library/folders/{folder_id}", + headers=_auth_headers(token), + ) + assert delete_resp.status_code == 400 + assert delete_resp.json()["detail"] == "请先移动或删除文件夹中的文献" + + +async def test_assign_uploaded_paper_folder(async_client: AsyncClient) -> None: + await _register_user(async_client, "library3@example.com", "StrongPass3") + token = await _login(async_client, "library3@example.com", "StrongPass3") + + paper_id = await _upload_pdf(async_client, token) + + # Create folder and assign paper into it + create_folder = await async_client.post( + "/api/v1/library/folders", + headers=_auth_headers(token), + json={"name": "FolderA", "color": None}, + ) + folder_id = create_folder.json()["id"] + + assign_resp = await async_client.patch( + f"/api/v1/library/uploads/{paper_id}/folder", + headers=_auth_headers(token), + json={"folder_id": folder_id}, + ) + assert assign_resp.status_code == 200 + assigned = assign_resp.json() + assert assigned["folder_id"] == folder_id + + # Move back to unfiled + unassign_resp = await async_client.patch( + f"/api/v1/library/uploads/{paper_id}/folder", + headers=_auth_headers(token), + json={"folder_id": None}, + ) + assert unassign_resp.status_code == 200 + assert unassign_resp.json()["folder_id"] is None + + +async def test_assign_uploaded_paper_folder_not_found(async_client: AsyncClient) -> None: + await _register_user(async_client, "library4@example.com", "StrongPass4") + token = await _login(async_client, "library4@example.com", "StrongPass4") + + # Paper does not exist for this user + resp = await async_client.patch( + "/api/v1/library/uploads/999/folder", + headers=_auth_headers(token), + json={"folder_id": None}, + ) + assert resp.status_code == 404 + assert resp.json()["detail"] == "未找到对应的上传文件" + + +async def test_download_local_uploaded_paper(async_client: AsyncClient) -> None: + await _register_user(async_client, "library5@example.com", "StrongPass5") + token = await _login(async_client, "library5@example.com", "StrongPass5") + + pdf_bytes = b"%PDF-1.4\ncontent" + upload_resp = await async_client.post( + "/api/v1/papers/upload", + headers=_auth_headers(token), + files={"file": ("local.pdf", pdf_bytes, "application/pdf")}, + ) + assert upload_resp.status_code == 201 + paper_id = upload_resp.json()["id"] + + # GET download should return the file + download_resp = await async_client.get( + f"/api/v1/library/uploads/{paper_id}/download", + headers=_auth_headers(token), + ) + assert download_resp.status_code == 200 + assert download_resp.headers.get("content-type", "").startswith("application/pdf") + + # HEAD should also succeed + head_resp = await async_client.head( + f"/api/v1/library/uploads/{paper_id}/download", + headers=_auth_headers(token), + ) + assert head_resp.status_code == 200 + + +async def test_download_remote_uploaded_paper(monkeypatch: pytest.MonkeyPatch, async_client: AsyncClient) -> None: + await _register_user(async_client, "library6@example.com", "StrongPass6") + token = await _login(async_client, "library6@example.com", "StrongPass6") + + # Create a placeholder upload with remote URL via import endpoint + payload = { + "title": "Remote Paper", + "abstract": "Remote", + "doi": None, + "arxiv_id": None, + "pdf_url": "https://example.com/remote.pdf", + "folder_id": None, + } + import_resp = await async_client.post( + "/api/v1/papers/import", + headers=_auth_headers(token), + json=payload, + ) + assert import_resp.status_code == 201 + uploaded = import_resp.json()["uploaded"] + paper_id = uploaded["id"] + + # Patch httpx client used in library download to avoid network + from app.api.v1.endpoints import library as library_endpoint + + class DummyResponse: + def __init__(self, content: bytes) -> None: + self.content = content + + def raise_for_status(self) -> None: + return None + + class DummyClient: + def __init__(self, *args, **kwargs) -> None: + self.args = args + self.kwargs = kwargs + + async def __aenter__(self) -> "DummyClient": + return self + + async def __aexit__(self, exc_type, exc, tb) -> None: # noqa: D401 + return None + + async def get(self, url: str): + assert url == "https://example.com/remote.pdf" + return DummyResponse(b"%PDF-1.4\nremote") + + monkeypatch.setattr(library_endpoint.httpx, "AsyncClient", DummyClient) + + download_resp = await async_client.get( + f"/api/v1/library/uploads/{paper_id}/download", + headers=_auth_headers(token), + ) + assert download_resp.status_code == 200 + assert download_resp.headers.get("content-type", "").startswith("application/pdf") + diff --git a/backend/tests/test_mineru_comprehensive.py b/backend/tests/test_mineru_comprehensive.py index 437fdf6..613d117 100644 --- a/backend/tests/test_mineru_comprehensive.py +++ b/backend/tests/test_mineru_comprehensive.py @@ -14,13 +14,35 @@ allow_module_level=True, ) +pytestmark = pytest.mark.asyncio + logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) -from backend.app.services.mineru_cli import MineruCLIError, parse_pdf_async +from app.services.mineru_cli import MineruCLIError, parse_pdf_async + + +TEST_MEDIA_DIR = Path(__file__).resolve().parent / "tmp_media" +TEST_FILES: List[Path] = [ + TEST_MEDIA_DIR / "sample_table.pdf", + TEST_MEDIA_DIR / "complex_test.pdf", +] + + +@pytest.fixture(params=TEST_FILES) +def pdf_path(request: pytest.FixtureRequest) -> Path: + """Provide test PDF paths for MinerU E2E tests. + + Skips the test gracefully if the corresponding file does not exist. + """ + + path: Path = request.param + if not path.exists(): + pytest.skip(f"Test PDF not found: {path}") + return path def analyze_content_types(content_list: List[Dict[str, Any]]) -> Dict[str, int]: @@ -250,15 +272,9 @@ async def main(): logger.info("MinerU Parser - Comprehensive Test Suite") logger.info("="*80) - # Test files - test_files = [ - Path('backend/tests/tmp_media/sample_table.pdf'), - Path('backend/tests/tmp_media/complex_test.pdf'), - ] - # Check if files exist - available_files = [f for f in test_files if f.exists()] - missing_files = [f for f in test_files if not f.exists()] + available_files = [f for f in TEST_FILES if f.exists()] + missing_files = [f for f in TEST_FILES if not f.exists()] if missing_files: logger.warning(f"\n⚠️ Missing test files:") diff --git a/backend/tests/test_notes.py b/backend/tests/test_notes.py new file mode 100644 index 0000000..38011ff --- /dev/null +++ b/backend/tests/test_notes.py @@ -0,0 +1,170 @@ +"""Integration tests for notes endpoints.""" +from __future__ import annotations + +import pytest +from httpx import AsyncClient + +pytestmark = pytest.mark.asyncio + + +async def _register_user(async_client: AsyncClient, email: str, password: str) -> None: + payload: dict[str, str] = {"email": email, "password": password} + response = await async_client.post("/api/v1/users", json=payload) + assert response.status_code == 201 + + +async def _login(async_client: AsyncClient, email: str, password: str) -> str: + response = await async_client.post( + "/api/v1/auth/token", + data={"username": email, "password": password}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + assert response.status_code == 200 + return response.json()["access_token"] + + +def _auth_headers(token: str) -> dict[str, str]: + return {"Authorization": f"Bearer {token}"} + + +async def _create_note( + async_client: AsyncClient, + token: str, + title: str = "My Note", + content: str | None = "content", + uploaded_paper_id: int | None = None, +) -> int: + resp = await async_client.post( + "/api/v1/notes", + headers=_auth_headers(token), + json={ + "title": title, + "content": content, + "uploaded_paper_id": uploaded_paper_id, + "tags": ["tag1", "tag2"], + }, + ) + assert resp.status_code == 201 + return resp.json()["id"] + + +async def test_notes_crud_flow(async_client: AsyncClient) -> None: + await _register_user(async_client, "notes@example.com", "StrongPass1") + token = await _login(async_client, "notes@example.com", "StrongPass1") + + # Initially no notes + list_resp = await async_client.get( + "/api/v1/notes", + headers=_auth_headers(token), + ) + assert list_resp.status_code == 200 + data = list_resp.json() + assert data["items"] == [] + assert data["total"] == 0 + + # Create a note + note_id = await _create_note(async_client, token, title="First", content="hello world") + + # List again + list_resp2 = await async_client.get( + "/api/v1/notes", + headers=_auth_headers(token), + ) + data2 = list_resp2.json() + assert data2["total"] == 1 + assert len(data2["items"]) == 1 + assert data2["items"][0]["title"] == "First" + + # Get detail + detail_resp = await async_client.get( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + ) + assert detail_resp.status_code == 200 + detail = detail_resp.json() + assert detail["id"] == note_id + assert detail["title"] == "First" + assert detail["content"] == "hello world" + assert detail["tags"] == ["tag1", "tag2"] + + # Update note + update_resp = await async_client.patch( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + json={"title": "Updated", "content": "updated content"}, + ) + assert update_resp.status_code == 200 + updated = update_resp.json() + assert updated["title"] == "Updated" + assert updated["content"] == "updated content" + + # Delete note + delete_resp = await async_client.delete( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + ) + assert delete_resp.status_code == 204 + + # Fetching deleted note should 404 + not_found = await async_client.get( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + ) + assert not_found.status_code == 404 + assert not_found.json()["detail"] == "笔记不存在" + + +async def test_create_note_with_invalid_uploaded_paper_id(async_client: AsyncClient) -> None: + await _register_user(async_client, "notes2@example.com", "StrongPass2") + token = await _login(async_client, "notes2@example.com", "StrongPass2") + + # Referencing a non-existent uploaded paper should 404 + resp = await async_client.post( + "/api/v1/notes", + headers=_auth_headers(token), + json={ + "title": "Linked", + "content": "content", + "uploaded_paper_id": 999, + "tags": [], + }, + ) + assert resp.status_code == 404 + assert resp.json()["detail"] == "未找到关联的上传论文" + + +async def test_update_note_with_invalid_uploaded_paper_id(async_client: AsyncClient) -> None: + await _register_user(async_client, "notes3@example.com", "StrongPass3") + token = await _login(async_client, "notes3@example.com", "StrongPass3") + + note_id = await _create_note(async_client, token, title="To update", content=None) + + resp = await async_client.patch( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + json={"uploaded_paper_id": 12345}, + ) + assert resp.status_code == 404 + assert resp.json()["detail"] == "未找到关联的上传论文" + + +async def test_list_notes_with_filters(async_client: AsyncClient) -> None: + await _register_user(async_client, "notes4@example.com", "StrongPass4") + token = await _login(async_client, "notes4@example.com", "StrongPass4") + + # Create two notes + await _create_note(async_client, token, title="Deep Learning", content="about transformers") + await _create_note(async_client, token, title="Classical Mechanics", content="physics") + + # Search filter + list_resp = await async_client.get( + "/api/v1/notes", + headers=_auth_headers(token), + params={"search": "Deep"}, + ) + assert list_resp.status_code == 200 + data = list_resp.json() + assert data["total"] == 1 + assert len(data["items"]) == 1 + assert data["items"][0]["title"] == "Deep Learning" + diff --git a/backend/tests/test_papers.py b/backend/tests/test_papers.py new file mode 100644 index 0000000..2ff7f29 --- /dev/null +++ b/backend/tests/test_papers.py @@ -0,0 +1,189 @@ +"""Integration tests for paper library (upload/list/import) endpoints.""" +from __future__ import annotations + +import os +from pathlib import Path + +import pytest +from httpx import AsyncClient + +from app.api.v1.endpoints import papers as papers_endpoint + +pytestmark = pytest.mark.asyncio + +MEDIA_ROOT = Path(os.environ["MEDIA_ROOT"]) + + +async def _register_user(client: AsyncClient, email: str, password: str) -> None: + payload: dict[str, str] = {"email": email, "password": password} + response = await client.post("/api/v1/users", json=payload) + assert response.status_code == 201 + + +async def _login(client: AsyncClient, email: str, password: str) -> str: + response = await client.post( + "/api/v1/auth/token", + data={"username": email, "password": password}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + assert response.status_code == 200 + return response.json()["access_token"] + + +def _auth_headers(token: str) -> dict[str, str]: + return {"Authorization": f"Bearer {token}"} + + +async def test_search_papers_returns_not_implemented(async_client: AsyncClient) -> None: + response = await async_client.get("/api/v1/papers", params={"q": "test"}) + + assert response.status_code == 501 + assert response.json()["detail"] == "Search service not yet available" + + +async def test_get_paper_returns_not_implemented(async_client: AsyncClient) -> None: + response = await async_client.get("/api/v1/papers/1") + + assert response.status_code == 501 + assert response.json()["detail"] == "Paper retrieval not yet available" + + +async def test_list_uploaded_papers_empty(async_client: AsyncClient) -> None: + await _register_user(async_client, "papers-empty@example.com", "StrongPass1") + token = await _login(async_client, "papers-empty@example.com", "StrongPass1") + + response = await async_client.get( + "/api/v1/papers/uploads", + headers=_auth_headers(token), + ) + + assert response.status_code == 200 + data = response.json() + assert data["items"] == [] + assert data["total"] == 0 + assert data["page"] == 1 + assert data["page_size"] == 12 + assert data["total_pages"] == 0 + + +async def test_upload_pdf_and_list_back(async_client: AsyncClient) -> None: + await _register_user(async_client, "papers-upload@example.com", "StrongPass2") + token = await _login(async_client, "papers-upload@example.com", "StrongPass2") + + pdf_bytes = b"%PDF-1.4\ncontent" + upload_response = await async_client.post( + "/api/v1/papers/upload", + headers=_auth_headers(token), + files={"file": ("sample.pdf", pdf_bytes, "application/pdf")}, + ) + + assert upload_response.status_code == 201 + uploaded = upload_response.json() + assert uploaded["original_filename"] == "sample.pdf" + assert uploaded["content_type"] == "application/pdf" + assert uploaded["file_size"] == len(pdf_bytes) + assert isinstance(uploaded["id"], int) + assert isinstance(uploaded["file_url"], str) and uploaded["file_url"].startswith("/media/uploads/") + assert uploaded["file_hash"] + + stored_filename = uploaded["file_url"].rsplit("/", 1)[-1] + stored_path = MEDIA_ROOT / "uploads" / stored_filename + assert stored_path.is_file() + + list_response = await async_client.get( + "/api/v1/papers/uploads", + headers=_auth_headers(token), + ) + + assert list_response.status_code == 200 + listing = list_response.json() + assert listing["total"] == 1 + assert listing["page"] == 1 + assert listing["total_pages"] == 1 + assert len(listing["items"]) == 1 + assert listing["items"][0]["id"] == uploaded["id"] + + +async def test_upload_rejects_non_pdf(async_client: AsyncClient) -> None: + await _register_user(async_client, "papers-non-pdf@example.com", "StrongPass3") + token = await _login(async_client, "papers-non-pdf@example.com", "StrongPass3") + + response = await async_client.post( + "/api/v1/papers/upload", + headers=_auth_headers(token), + files={"file": ("notes.txt", b"hello", "text/plain")}, + ) + + assert response.status_code == 400 + assert response.json()["detail"] == "仅支持上传 PDF 文件" + + +async def test_upload_rejects_oversized_pdf(async_client: AsyncClient) -> None: + await _register_user(async_client, "papers-large@example.com", "StrongPass4") + token = await _login(async_client, "papers-large@example.com", "StrongPass4") + + max_bytes = papers_endpoint.MAX_UPLOAD_BYTES + oversized = b"%PDF-1.4\n" + b"0" * (max_bytes + 1) + + response = await async_client.post( + "/api/v1/papers/upload", + headers=_auth_headers(token), + files={"file": ("large.pdf", oversized, "application/pdf")}, + ) + + assert response.status_code == 413 + assert response.json()["detail"] == "文件体积超过 25MB 限制" + + +async def test_get_uploaded_paper_detail_requires_auth(async_client: AsyncClient) -> None: + response = await async_client.get("/api/v1/papers/uploads/1") + + assert response.status_code == 401 + + +async def test_get_uploaded_paper_detail_not_found(async_client: AsyncClient) -> None: + await _register_user(async_client, "papers-detail@example.com", "StrongPass5") + token = await _login(async_client, "papers-detail@example.com", "StrongPass5") + + response = await async_client.get( + "/api/v1/papers/uploads/999", + headers=_auth_headers(token), + ) + + assert response.status_code == 404 + assert response.json()["detail"] == "未找到对应的上传文件" + + +async def test_import_paper_creates_uploaded_placeholder(async_client: AsyncClient) -> None: + await _register_user(async_client, "papers-import@example.com", "StrongPass6") + token = await _login(async_client, "papers-import@example.com", "StrongPass6") + + payload = { + "title": "Test Import Paper", + "abstract": "A paper about testing import.", + "doi": "10.1234/example", + "arxiv_id": None, + "pdf_url": None, + "folder_id": None, + } + + response = await async_client.post( + "/api/v1/papers/import", + headers=_auth_headers(token), + json=payload, + ) + + assert response.status_code == 201 + data = response.json() + + paper = data["paper"] + uploaded = data["uploaded"] + assert paper["title"] == payload["title"] + assert paper["abstract"] == payload["abstract"] + assert uploaded is not None + assert uploaded["original_filename"] == "Test Import Paper.pdf" + assert uploaded["file_size"] == 0 + assert uploaded["file_hash"] is None + assert isinstance(uploaded["file_url"], str) + assert uploaded["file_url"].startswith("#") + diff --git a/redis.conf b/redis.conf index f4dc752..42152fd 100644 --- a/redis.conf +++ b/redis.conf @@ -1,4 +1,5 @@ # Redis configuration for InsightReading + # Bind to local interfaces so services in Docker network # 和宿主机本地开发进程(uvicorn/celery)都能访问 #bind 127.0.0.1 diff --git a/tests/TESTING.md b/tests/TESTING.md new file mode 100644 index 0000000..04c4910 --- /dev/null +++ b/tests/TESTING.md @@ -0,0 +1,186 @@ +# InsightReading 测试指南(后端为主) + +本项目目前主要是后端 FastAPI 的测试,分布在: + +- `backend/tests/`:后端单元 / 集成测试(官方主测试目录) +- `tests/`:基于后端 API 的额外集成测试 + +前端目前只有 ESLint 检查(无单元测试)。 + +--- + +## 一、测试环境要求 + +- Python:建议使用 **3.11**(与后端说明保持一致) +- 依赖: + - `pytest` + - `pytest-asyncio` + - `pytest-cov` + - 以及 `backend/pyproject.toml` / `backend/requirements-dev.txt` 中列出的依赖 + +### 1. 创建虚拟环境并安装依赖 + +在项目根目录: + +```bash +cd backend +python3.11 -m venv .venv +source .venv/bin/activate # Windows: .venv\Scripts\activate + +# 推荐:安装带 dev 额外依赖的后端包 +pip install -e ".[dev]" +# 或者使用 requirements-dev.txt +# pip install -r requirements-dev.txt +``` + +--- + +## 二、运行全部后端测试 + 覆盖率 + +在项目根目录或 `backend/` 下都可以运行(建议在项目根目录,方便同时覆盖 `backend/tests` 和根目录 `tests`): + +```bash +cd /path/to/InsightReading +source backend/.venv/bin/activate # 确保已激活虚拟环境 + +pytest --cov=app --cov-report=term-missing +``` + +说明: + +- `--cov=app`:统计 `backend/app` 包的代码覆盖率。 +- `--cov-report=term-missing`:在终端展示每个文件未覆盖的行号,便于补测试。 + +### 生成 HTML 覆盖率报告 + +如果希望用浏览器直观查看覆盖率: + +```bash +pytest --cov=app --cov-report=html +``` + +生成的报告默认在 `htmlcov/` 目录下(当前工作目录),用浏览器打开: + +```bash +open htmlcov/index.html # macOS +# 或者手动用浏览器打开该文件 +``` + +--- + +## 三、只运行部分测试 + +### 1. 只跑后端某个测试文件 + +示例:只跑后端库相关测试: + +```bash +pytest backend/tests/test_library.py +``` + +### 2. 只跑根目录 tests/ 下的集成测试 + +```bash +pytest tests +``` + +### 3. 只跑某个具体测试用例 + +```bash +pytest backend/tests/test_notes.py::test_notes_crud_flow +pytest tests/test_library.py::test_download_local_uploaded_paper +``` + +--- + +## 四、前端检查(Lint) + +前端当前只配置了 ESLint,用于静态检查: + +```bash +cd frontend +npm install # 第一次或依赖变更时执行 +npm run lint +``` + +如需增加前端单元测试,可以自行引入 Jest / Vitest 等测试框架,并在 `frontend/package.json` 中配置测试脚本。 + +--- + +## 五、常见问题与解决方法 + +### 1. 找不到 `async_client` fixture + +错误示例: + +```text +E fixture 'async_client' not found +``` + +原因: + +- 顶层 `tests/` 和 `backend/tests/` 里的测试都依赖一个名为 `async_client` 的异步客户端 fixture,它在对应目录的 `conftest.py` 中定义: + - `backend/tests/conftest.py` + - `tests/conftest.py` + +检查: + +- 确认你是从项目根目录运行 `pytest`,而不是在某个子目录下。 +- 确认本地未误改 / 删除上述两个 `conftest.py` 文件。 + +### 2. `ModuleNotFoundError: No module named 'pytest_asyncio'` + +错误示例: + +```text +ImportError while loading conftest +ModuleNotFoundError: No module named 'pytest_asyncio' +``` + +解决: + +```bash +source backend/.venv/bin/activate +pip install pytest-asyncio +``` + +建议始终使用 `pip install -e ".[dev]"` 或 `pip install -r requirements-dev.txt` 安装依赖,确保测试相关包完整安装。 + +### 3. 数据库 / MEDIA_ROOT 相关问题 + +- 测试使用 **独立的内存 SQLite** 数据库(不依赖真实 Postgres),在 `conftest.py` 中通过: + + ```python + TEST_DATABASE_URL = "sqlite+aiosqlite:///:memory:?cache=shared" + ``` + +- 媒体文件目录在测试中会被重定向到一个临时目录 `tmp_media`,并通过环境变量 `MEDIA_ROOT` 注入,无需手动创建。 + +如果看到与数据库连接或媒体目录相关的异常,优先检查: + +- 是否正确激活了虚拟环境; +- 是否误修改了 `conftest.py` 中的 `TEST_DATABASE_URL` 或 `MEDIA_ROOT` 设置。 + +--- + +## 六、推荐的日常使用方式 + +开发中常用命令总结(在项目根目录执行): + +```bash +# 1. 激活后端虚拟环境 +source backend/.venv/bin/activate + +# 2. 跑全部后端测试 + 覆盖率 +pytest --cov=app --cov-report=term-missing + +# 3. 调试某个模块时,单独跑对应文件或用例 +pytest backend/tests/test_notes.py::test_notes_crud_flow + +# 4. 前端代码改动后做静态检查 +cd frontend +npm run lint +``` + +根据需要,你可以在本文件中继续补充项目特有的测试约定或案例。 + diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..a95c502 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,63 @@ +"""Test fixtures shared by top-level integration tests.""" +from __future__ import annotations + +import asyncio +import os +import shutil +from pathlib import Path +from typing import AsyncIterator, Generator + +import pytest +import pytest_asyncio # type: ignore[import-not-found] +from httpx import ASGITransport, AsyncClient +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine + +from app.db.session import get_db +from app.models import user # noqa: F401 +from app.models.base import Base + +TEST_DATABASE_URL = "sqlite+aiosqlite:///:memory:?cache=shared" +TEST_MEDIA_ROOT = Path(__file__).resolve().parent / "tmp_media" +os.environ.setdefault("MEDIA_ROOT", str(TEST_MEDIA_ROOT)) + +from app.main import app + + +@pytest.fixture(scope="session") +def event_loop() -> Generator[asyncio.AbstractEventLoop, None, None]: + """Create a single event loop for the async tests.""" + + loop = asyncio.new_event_loop() + yield loop + loop.close() + + +@pytest_asyncio.fixture() +async def async_client() -> AsyncIterator[AsyncClient]: + """Yield an AsyncClient wired to an isolated SQLite database.""" + + engine = create_async_engine(TEST_DATABASE_URL, future=True, connect_args={"uri": True}) + + if TEST_MEDIA_ROOT.exists(): + shutil.rmtree(TEST_MEDIA_ROOT) + TEST_MEDIA_ROOT.mkdir(parents=True, exist_ok=True) + + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + + session_factory = async_sessionmaker(engine, expire_on_commit=False, class_=AsyncSession) + + async def override_get_db() -> AsyncIterator[AsyncSession]: + async with session_factory() as session: + yield session + + app.dependency_overrides[get_db] = override_get_db + + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as client: + yield client + + app.dependency_overrides.clear() + await engine.dispose() + shutil.rmtree(TEST_MEDIA_ROOT, ignore_errors=True) + diff --git a/tests/test_library.py b/tests/test_library.py new file mode 100644 index 0000000..cb19ffa --- /dev/null +++ b/tests/test_library.py @@ -0,0 +1,253 @@ +"""Integration tests for library folder and upload management endpoints.""" +from __future__ import annotations + +import os +from pathlib import Path + +import pytest +from httpx import AsyncClient + +MEDIA_ROOT = Path(os.environ["MEDIA_ROOT"]) + +pytestmark = pytest.mark.asyncio + + +async def _register_user(async_client: AsyncClient, email: str, password: str) -> None: + payload: dict[str, str] = {"email": email, "password": password} + response = await async_client.post("/api/v1/users", json=payload) + assert response.status_code == 201 + + +async def _login(async_client: AsyncClient, email: str, password: str) -> str: + response = await async_client.post( + "/api/v1/auth/token", + data={"username": email, "password": password}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + assert response.status_code == 200 + return response.json()["access_token"] + + +def _auth_headers(token: str) -> dict[str, str]: + return {"Authorization": f"Bearer {token}"} + + +async def _upload_pdf(async_client: AsyncClient, token: str, name: str = "sample.pdf") -> int: + pdf_bytes = b"%PDF-1.4\ncontent" + resp = await async_client.post( + "/api/v1/papers/upload", + headers=_auth_headers(token), + files={"file": (name, pdf_bytes, "application/pdf")}, + ) + assert resp.status_code == 201 + return resp.json()["id"] + + +async def test_library_folders_crud(async_client: AsyncClient) -> None: + await _register_user(async_client, "library@example.com", "StrongPass1") + token = await _login(async_client, "library@example.com", "StrongPass1") + + # Initially no folders + list_resp = await async_client.get( + "/api/v1/library/folders", + headers=_auth_headers(token), + ) + assert list_resp.status_code == 200 + data = list_resp.json() + assert data["folders"] == [] + assert data["unfiled_count"] == 0 + + # Create a folder + create_resp = await async_client.post( + "/api/v1/library/folders", + headers=_auth_headers(token), + json={"name": "Reading", "color": "#fff"}, + ) + assert create_resp.status_code == 201 + folder = create_resp.json() + assert folder["name"] == "Reading" + assert folder["paper_count"] == 0 + folder_id = folder["id"] + + # Rename folder + rename_resp = await async_client.patch( + f"/api/v1/library/folders/{folder_id}", + headers=_auth_headers(token), + json={"name": "Updated", "color": "#000"}, + ) + assert rename_resp.status_code == 200 + renamed = rename_resp.json() + assert renamed["name"] == "Updated" + assert renamed["paper_count"] == 0 + + # Delete folder (still empty) + delete_resp = await async_client.delete( + f"/api/v1/library/folders/{folder_id}", + headers=_auth_headers(token), + ) + assert delete_resp.status_code == 204 + + +async def test_delete_library_folder_blocked_when_not_empty(async_client: AsyncClient) -> None: + await _register_user(async_client, "library2@example.com", "StrongPass2") + token = await _login(async_client, "library2@example.com", "StrongPass2") + + # Create folder + create_folder = await async_client.post( + "/api/v1/library/folders", + headers=_auth_headers(token), + json={"name": "WithPapers", "color": None}, + ) + folder_id = create_folder.json()["id"] + + # Upload a PDF into this folder + pdf_bytes = b"%PDF-1.4\ncontent" + upload_resp = await async_client.post( + "/api/v1/papers/upload", + headers=_auth_headers(token), + files={"file": ("paper.pdf", pdf_bytes, "application/pdf")}, + data={"folder_id": str(folder_id)}, + ) + assert upload_resp.status_code == 201 + + # Attempt to delete folder should fail + delete_resp = await async_client.delete( + f"/api/v1/library/folders/{folder_id}", + headers=_auth_headers(token), + ) + assert delete_resp.status_code == 400 + assert delete_resp.json()["detail"] == "请先移动或删除文件夹中的文献" + + +async def test_assign_uploaded_paper_folder(async_client: AsyncClient) -> None: + await _register_user(async_client, "library3@example.com", "StrongPass3") + token = await _login(async_client, "library3@example.com", "StrongPass3") + + paper_id = await _upload_pdf(async_client, token) + + # Create folder and assign paper into it + create_folder = await async_client.post( + "/api/v1/library/folders", + headers=_auth_headers(token), + json={"name": "FolderA", "color": None}, + ) + folder_id = create_folder.json()["id"] + + assign_resp = await async_client.patch( + f"/api/v1/library/uploads/{paper_id}/folder", + headers=_auth_headers(token), + json={"folder_id": folder_id}, + ) + assert assign_resp.status_code == 200 + assigned = assign_resp.json() + assert assigned["folder_id"] == folder_id + + # Move back to unfiled + unassign_resp = await async_client.patch( + f"/api/v1/library/uploads/{paper_id}/folder", + headers=_auth_headers(token), + json={"folder_id": None}, + ) + assert unassign_resp.status_code == 200 + assert unassign_resp.json()["folder_id"] is None + + +async def test_assign_uploaded_paper_folder_not_found(async_client: AsyncClient) -> None: + await _register_user(async_client, "library4@example.com", "StrongPass4") + token = await _login(async_client, "library4@example.com", "StrongPass4") + + # Paper does not exist for this user + resp = await async_client.patch( + "/api/v1/library/uploads/999/folder", + headers=_auth_headers(token), + json={"folder_id": None}, + ) + assert resp.status_code == 404 + assert resp.json()["detail"] == "未找到对应的上传文件" + + +async def test_download_local_uploaded_paper(async_client: AsyncClient) -> None: + await _register_user(async_client, "library5@example.com", "StrongPass5") + token = await _login(async_client, "library5@example.com", "StrongPass5") + + pdf_bytes = b"%PDF-1.4\ncontent" + upload_resp = await async_client.post( + "/api/v1/papers/upload", + headers=_auth_headers(token), + files={"file": ("local.pdf", pdf_bytes, "application/pdf")}, + ) + assert upload_resp.status_code == 201 + paper_id = upload_resp.json()["id"] + + # GET download should return the file + download_resp = await async_client.get( + f"/api/v1/library/uploads/{paper_id}/download", + headers=_auth_headers(token), + ) + assert download_resp.status_code == 200 + assert download_resp.headers.get("content-type", "").startswith("application/pdf") + + # HEAD should also succeed + head_resp = await async_client.head( + f"/api/v1/library/uploads/{paper_id}/download", + headers=_auth_headers(token), + ) + assert head_resp.status_code == 200 + + +async def test_download_remote_uploaded_paper(monkeypatch: pytest.MonkeyPatch, async_client: AsyncClient) -> None: + await _register_user(async_client, "library6@example.com", "StrongPass6") + token = await _login(async_client, "library6@example.com", "StrongPass6") + + # Create a placeholder upload with remote URL via import endpoint + payload = { + "title": "Remote Paper", + "abstract": "Remote", + "doi": None, + "arxiv_id": None, + "pdf_url": "https://example.com/remote.pdf", + "folder_id": None, + } + import_resp = await async_client.post( + "/api/v1/papers/import", + headers=_auth_headers(token), + json=payload, + ) + assert import_resp.status_code == 201 + uploaded = import_resp.json()["uploaded"] + paper_id = uploaded["id"] + + # Patch httpx client used in library download to avoid network + from app.api.v1.endpoints import library as library_endpoint + + class DummyResponse: + def __init__(self, content: bytes) -> None: + self.content = content + + def raise_for_status(self) -> None: + return None + + class DummyClient: + def __init__(self, *args, **kwargs) -> None: + self.args = args + self.kwargs = kwargs + + async def __aenter__(self) -> "DummyClient": + return self + + async def __aexit__(self, exc_type, exc, tb) -> None: # noqa: D401 + return None + + async def get(self, url: str): + assert url == "https://example.com/remote.pdf" + return DummyResponse(b"%PDF-1.4\nremote") + + monkeypatch.setattr(library_endpoint.httpx, "AsyncClient", DummyClient) + + download_resp = await async_client.get( + f"/api/v1/library/uploads/{paper_id}/download", + headers=_auth_headers(token), + ) + assert download_resp.status_code == 200 + assert download_resp.headers.get("content-type", "").startswith("application/pdf") + diff --git a/tests/test_notes.py b/tests/test_notes.py new file mode 100644 index 0000000..2c5bcd9 --- /dev/null +++ b/tests/test_notes.py @@ -0,0 +1,170 @@ +"""Integration tests for notes endpoints.""" +from __future__ import annotations + +import pytest +from httpx import AsyncClient + +pytestmark = pytest.mark.asyncio + + +async def _register_user(async_client: AsyncClient, email: str, password: str) -> None: + payload: dict[str, str] = {"email": email, "password": password} + response = await async_client.post("/api/v1/users", json=payload) + assert response.status_code == 201 + + +async def _login(async_client: AsyncClient, email: str, password: str) -> str: + response = await async_client.post( + "/api/v1/auth/token", + data={"username": email, "password": password}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + assert response.status_code == 200 + return response.json()["access_token"] + + +def _auth_headers(token: str) -> dict[str, str]: + return {"Authorization": f"Bearer {token}"} + + +async def _create_note( + async_client: AsyncClient, + token: str, + title: str = "My Note", + content: str | None = "content", + uploaded_paper_id: int | None = None, +) -> int: + resp = await async_client.post( + "/api/v1/notes", + headers=_auth_headers(token), + json={ + "title": title, + "content": content, + "uploaded_paper_id": uploaded_paper_id, + "tags": ["tag1", "tag2"], + }, + ) + assert resp.status_code == 201 + return resp.json()["id"] + + +async def test_notes_crud_flow(async_client: AsyncClient) -> None: + await _register_user(async_client, "notes@example.com", "StrongPass1") + token = await _login(async_client, "notes@example.com", "StrongPass1") + + # Initially no notes + list_resp = await async_client.get( + "/api/v1/notes", + headers=_auth_headers(token), + ) + assert list_resp.status_code == 200 + data = list_resp.json() + assert data["items"] == [] + assert data["total"] == 0 + + # Create a note + note_id = await _create_note(async_client, token, title="First", content="hello world") + + # List again + list_resp2 = await async_client.get( + "/api/v1/notes", + headers=_auth_headers(token), + ) + data2 = list_resp2.json() + assert data2["total"] == 1 + assert len(data2["items"]) == 1 + assert data2["items"][0]["title"] == "First" + + # Get detail + detail_resp = await async_client.get( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + ) + assert detail_resp.status_code == 200 + detail = detail_resp.json() + assert detail["id"] == note_id + assert detail["title"] == "First" + assert detail["content"] == "hello world" + assert detail["tags"] == ["tag1", "tag2"] + + # Update note + update_resp = await async_client.patch( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + json={"title": "Updated", "content": "updated content"}, + ) + assert update_resp.status_code == 200 + updated = update_resp.json() + assert updated["title"] == "Updated" + assert updated["content"] == "updated content" + + # Delete note + delete_resp = await async_client.delete( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + ) + assert delete_resp.status_code == 204 + + # Fetching deleted note should 404 + not_found = await async_client.get( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + ) + assert not_found.status_code == 404 + assert not_found.json()["detail"] == "笔记不存在" + + +async def test_create_note_with_invalid_uploaded_paper_id(async_client: AsyncClient) -> None: + await _register_user(async_client, "notes2@example.com", "StrongPass2") + token = await _login(async_client, "notes2@example.com", "StrongPass2") + + # Referencing a non-existent uploaded paper should 404 + resp = await async_client.post( + "/api/v1/notes", + headers=_auth_headers(token), + json={ + "title": "Linked", + "content": "content", + "uploaded_paper_id": 999, + "tags": [], + }, + ) + assert resp.status_code == 404 + assert resp.json()["detail"] == "未找到关联的上传论文" + + +async def test_update_note_with_invalid_uploaded_paper_id(async_client: AsyncClient) -> None: + await _register_user(async_client, "notes3@example.com", "StrongPass3") + token = await _login(async_client, "notes3@example.com", "StrongPass3") + + note_id = await _create_note(async_client, token, title="To update", content=None) + + resp = await async_client.patch( + f"/api/v1/notes/{note_id}", + headers=_auth_headers(token), + json={"uploaded_paper_id": 12345}, + ) + assert resp.status_code == 404 + assert resp.json()["detail"] == "未找到关联的上传论文" + + +async def test_list_notes_with_filters(async_client: AsyncClient) -> None: + await _register_user(async_client, "notes4@example.com", "StrongPass4") + token = await _login(async_client, "notes4@example.com", "StrongPass4") + + # Create two notes + await _create_note(async_client, token, title="Deep Learning", content="about transformers") + await _create_note(async_client, token, title="Classical Mechanics", content="physics") + + # Search filter + list_resp = await async_client.get( + "/api/v1/notes", + headers=_auth_headers(token), + params={"search": "Deep"}, + ) + assert list_resp.status_code == 200 + data = list_resp.json() + assert data["total"] == 1 + assert len(data["items"]) == 1 + assert data["items"][0]["title"] == "Deep Learning" +