diff --git a/aiohttp/_cookie_helpers.py b/aiohttp/_cookie_helpers.py index 80b24068db9..d545df3ef6c 100644 --- a/aiohttp/_cookie_helpers.py +++ b/aiohttp/_cookie_helpers.py @@ -185,6 +185,7 @@ def parse_cookie_header(header: str) -> list[tuple[str, Morsel[str]]]: i = 0 n = len(header) + invalid_names = [] while i < n: # Use the same pattern as parse_set_cookie_headers to find cookies match = _COOKIE_PATTERN.match(header, i) @@ -202,9 +203,7 @@ def parse_cookie_header(header: str) -> list[tuple[str, Morsel[str]]]: # Validate the name (same as regex path) if not _COOKIE_NAME_RE.match(key): - internal_logger.warning( - "Can not load cookie: Illegal cookie name %r", key - ) + invalid_names.append(key) else: morsel = Morsel() morsel.__setstate__( # type: ignore[attr-defined] @@ -222,7 +221,7 @@ def parse_cookie_header(header: str) -> list[tuple[str, Morsel[str]]]: # Validate the name if not key or not _COOKIE_NAME_RE.match(key): - internal_logger.warning("Can not load cookie: Illegal cookie name %r", key) + invalid_names.append(key) continue # Create new morsel @@ -238,6 +237,11 @@ def parse_cookie_header(header: str) -> list[tuple[str, Morsel[str]]]: cookies.append((key, morsel)) + if invalid_names: + internal_logger.debug( + "Cannot load cookie. Illegal cookie names: %r", invalid_names + ) + return cookies diff --git a/aiohttp/multipart.py b/aiohttp/multipart.py index a2848f5a486..c8f216956d7 100644 --- a/aiohttp/multipart.py +++ b/aiohttp/multipart.py @@ -350,11 +350,8 @@ async def read_chunk(self, size: int = chunk_size) -> bytes: self._read_bytes += len(chunk) if self._read_bytes == self._length: self._at_eof = True - if self._at_eof: - clrf = await self._content.readline() - assert ( - b"\r\n" == clrf - ), "reader did not read all the data or it is malformed" + if self._at_eof and await self._content.readline() != b"\r\n": + raise ValueError("Reader did not read all the data or it is malformed") return chunk async def _read_chunk_from_length(self, size: int) -> bytes: @@ -384,7 +381,8 @@ async def _read_chunk_from_stream(self, size: int) -> bytes: while len(chunk) < self._boundary_len: chunk += await self._content.read(size) self._content_eof += int(self._content.at_eof()) - assert self._content_eof < 3, "Reading after EOF" + if self._content_eof > 2: + raise ValueError("Reading after EOF") if self._content_eof: break if len(chunk) > size: diff --git a/aiohttp/streams.py b/aiohttp/streams.py index 9d1858a3285..8c902d6b1ce 100644 --- a/aiohttp/streams.py +++ b/aiohttp/streams.py @@ -138,7 +138,7 @@ def __init__( self._loop = loop self._size = 0 self._cursor = 0 - self._http_chunk_splits: list[int] | None = None + self._http_chunk_splits: collections.deque[int] | None = None self._buffer: collections.deque[bytes] = collections.deque() self._buffer_offset = 0 self._eof = False @@ -291,7 +291,7 @@ def begin_http_chunk_receiving(self) -> None: raise RuntimeError( "Called begin_http_chunk_receiving when some data was already fed" ) - self._http_chunk_splits = [] + self._http_chunk_splits = collections.deque() def end_http_chunk_receiving(self) -> None: if self._http_chunk_splits is None: @@ -436,7 +436,7 @@ async def readchunk(self) -> tuple[bytes, bool]: raise self._exception while self._http_chunk_splits: - pos = self._http_chunk_splits.pop(0) + pos = self._http_chunk_splits.popleft() if pos == self._cursor: return (b"", True) if pos > self._cursor: @@ -509,7 +509,7 @@ def _read_nowait_chunk(self, n: int) -> bytes: chunk_splits = self._http_chunk_splits # Prevent memory leak: drop useless chunk splits while chunk_splits and chunk_splits[0] < self._cursor: - chunk_splits.pop(0) + chunk_splits.popleft() if self._size < self._low_water and self._protocol._reading_paused: self._protocol.resume_reading() diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py index f04bba8fa7f..2300becb3a1 100644 --- a/aiohttp/web_request.py +++ b/aiohttp/web_request.py @@ -574,7 +574,7 @@ def http_range(self) -> "slice[int, int, int]": if rng is not None: try: pattern = r"^bytes=(\d*)-(\d*)$" - start, end = re.findall(pattern, rng)[0] + start, end = re.findall(pattern, rng, re.ASCII)[0] except IndexError: # pattern was not found in header raise ValueError("range not in acceptable format") @@ -697,13 +697,13 @@ async def post(self) -> "MultiDictProxy[str | bytes | FileField]": multipart = await self.multipart() max_size = self._client_max_size - field = await multipart.next() - while field is not None: - size = 0 + size = 0 + while (field := await multipart.next()) is not None: field_ct = field.headers.get(hdrs.CONTENT_TYPE) if isinstance(field, BodyPartReader): - assert field.name is not None + if field.name is None: + raise ValueError("Multipart field missing name.") # Note that according to RFC 7578, the Content-Type header # is optional, even for files, so we can't assume it's @@ -755,8 +755,6 @@ async def post(self) -> "MultiDictProxy[str | bytes | FileField]": raise ValueError( "To decode nested multipart you need to use custom reader", ) - - field = await multipart.next() else: data = await self.read() if data: diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index acca27cafad..c220d2e968e 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -7,6 +7,7 @@ import inspect import keyword import os +import platform import re import sys from collections.abc import ( @@ -70,6 +71,7 @@ ) PATH_SEP: Final[str] = re.escape("/") +IS_WINDOWS: Final[bool] = platform.system() == "Windows" _ExpectHandler = Callable[[Request], Awaitable[StreamResponse | None]] _Resolve = tuple[Optional["UrlMappingMatchInfo"], set[str]] @@ -597,7 +599,12 @@ def set_options_route(self, handler: Handler) -> None: async def resolve(self, request: Request) -> _Resolve: path = request.rel_url.path_safe method = request.method - if not path.startswith(self._prefix2) and path != self._prefix: + # We normalise here to avoid matches that traverse below the static root. + # e.g. /static/../../../../home/user/webapp/static/ + norm_path = os.path.normpath(path) + if IS_WINDOWS: + norm_path = norm_path.replace("\\", "/") + if not norm_path.startswith(self._prefix2) and norm_path != self._prefix: return None, set() allowed_methods = self._allowed_methods @@ -614,14 +621,7 @@ def __iter__(self) -> Iterator[AbstractRoute]: return iter(self._routes.values()) async def _handle(self, request: Request) -> StreamResponse: - rel_url = request.match_info["filename"] - filename = Path(rel_url) - if filename.anchor: - # rel_url is an absolute name like - # /static/\\machine_name\c$ or /static/D:\path - # where the static dir is totally different - raise HTTPForbidden() - + filename = request.match_info["filename"] unresolved_path = self._directory.joinpath(filename) loop = asyncio.get_running_loop() return await loop.run_in_executor( diff --git a/tests/test_cookie_helpers.py b/tests/test_cookie_helpers.py index 8dbdd5ccb3d..38a44972c09 100644 --- a/tests/test_cookie_helpers.py +++ b/tests/test_cookie_helpers.py @@ -1,5 +1,6 @@ """Tests for internal cookie helper functions.""" +import logging import sys import time from http.cookies import ( @@ -1444,14 +1445,16 @@ def test_parse_cookie_header_illegal_names(caplog: pytest.LogCaptureFixture) -> """Test parse_cookie_header warns about illegal cookie names.""" # Cookie name with comma (not allowed in _COOKIE_NAME_RE) header = "good=value; invalid,cookie=bad; another=test" - result = parse_cookie_header(header) + with caplog.at_level(logging.DEBUG): + result = parse_cookie_header(header) # Should skip the invalid cookie but continue parsing assert len(result) == 2 assert result[0][0] == "good" assert result[0][1].value == "value" assert result[1][0] == "another" assert result[1][1].value == "test" - assert "Can not load cookie: Illegal cookie name 'invalid,cookie'" in caplog.text + assert "Cannot load cookie. Illegal cookie name" in caplog.text + assert "'invalid,cookie'" in caplog.text def test_parse_cookie_header_large_value() -> None: @@ -1554,7 +1557,8 @@ def test_parse_cookie_header_invalid_name_in_fallback( """Test that fallback parser rejects cookies with invalid names.""" header = 'normal=value; invalid,name={"x":"y"}; another=test' - result = parse_cookie_header(header) + with caplog.at_level(logging.DEBUG): + result = parse_cookie_header(header) assert len(result) == 2 @@ -1566,7 +1570,8 @@ def test_parse_cookie_header_invalid_name_in_fallback( assert name2 == "another" assert morsel2.value == "test" - assert "Can not load cookie: Illegal cookie name 'invalid,name'" in caplog.text + assert "Cannot load cookie. Illegal cookie name" in caplog.text + assert "'invalid,name'" in caplog.text def test_parse_cookie_header_empty_key_in_fallback( @@ -1574,8 +1579,8 @@ def test_parse_cookie_header_empty_key_in_fallback( ) -> None: """Test that fallback parser logs warning for empty cookie names.""" header = 'normal=value; ={"malformed":"json"}; another=test' - - result = parse_cookie_header(header) + with caplog.at_level(logging.DEBUG): + result = parse_cookie_header(header) assert len(result) == 2 @@ -1587,7 +1592,8 @@ def test_parse_cookie_header_empty_key_in_fallback( assert name2 == "another" assert morsel2.value == "test" - assert "Can not load cookie: Illegal cookie name ''" in caplog.text + assert "Cannot load cookie. Illegal cookie name" in caplog.text + assert "''" in caplog.text @pytest.mark.parametrize( diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 2269d4b0fda..ef597ccca46 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -1294,7 +1294,8 @@ def test_http_request_chunked_payload(parser: HttpRequestParser) -> None: parser.feed_data(b"4\r\ndata\r\n4\r\nline\r\n0\r\n\r\n") assert b"dataline" == b"".join(d for d in payload._buffer) - assert [4, 8] == payload._http_chunk_splits + assert payload._http_chunk_splits is not None + assert [4, 8] == list(payload._http_chunk_splits) assert payload.is_eof() @@ -1311,7 +1312,8 @@ def test_http_request_chunked_payload_and_next_message( ) assert b"dataline" == b"".join(d for d in payload._buffer) - assert [4, 8] == payload._http_chunk_splits + assert payload._http_chunk_splits is not None + assert [4, 8] == list(payload._http_chunk_splits) assert payload.is_eof() assert len(messages) == 1 @@ -1335,12 +1337,13 @@ def test_http_request_chunked_payload_chunks(parser: HttpRequestParser) -> None: parser.feed_data(b"test: test\r\n") assert b"dataline" == b"".join(d for d in payload._buffer) - assert [4, 8] == payload._http_chunk_splits + assert payload._http_chunk_splits is not None + assert [4, 8] == list(payload._http_chunk_splits) assert not payload.is_eof() parser.feed_data(b"\r\n") assert b"dataline" == b"".join(d for d in payload._buffer) - assert [4, 8] == payload._http_chunk_splits + assert [4, 8] == list(payload._http_chunk_splits) assert payload.is_eof() @@ -1351,7 +1354,8 @@ def test_parse_chunked_payload_chunk_extension(parser: HttpRequestParser) -> Non parser.feed_data(b"4;test\r\ndata\r\n4\r\nline\r\n0\r\ntest: test\r\n\r\n") assert b"dataline" == b"".join(d for d in payload._buffer) - assert [4, 8] == payload._http_chunk_splits + assert payload._http_chunk_splits is not None + assert [4, 8] == list(payload._http_chunk_splits) assert payload.is_eof() diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 14896667e52..fdc0389c361 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -240,11 +240,21 @@ async def test_read_incomplete_body_chunked(self) -> None: d = CIMultiDictProxy[str](CIMultiDict()) obj = aiohttp.BodyPartReader(BOUNDARY, d, stream) result = b"" - with pytest.raises(AssertionError): + with pytest.raises(ValueError): for _ in range(4): result += await obj.read_chunk(7) assert b"Hello, World!\r\n-" == result + async def test_read_with_content_length_malformed_crlf(self) -> None: + # Content-Length is correct but data after content is not \r\n + content = b"Hello" + h = CIMultiDictProxy(CIMultiDict({"CONTENT-LENGTH": str(len(content))})) + # Malformed: "XX" instead of "\r\n" after content + with Stream(content + b"XX--:--") as stream: + obj = aiohttp.BodyPartReader(BOUNDARY, h, stream) + with pytest.raises(ValueError, match="malformed"): + await obj.read() + async def test_read_boundary_with_incomplete_chunk(self) -> None: with Stream(b"") as stream: diff --git a/tests/test_urldispatch.py b/tests/test_urldispatch.py index abe13726353..1bc7a7b638e 100644 --- a/tests/test_urldispatch.py +++ b/tests/test_urldispatch.py @@ -1,5 +1,6 @@ import asyncio import pathlib +import platform import re from collections.abc import ( Awaitable, @@ -1083,6 +1084,19 @@ async def test_405_for_resource_adapter(router: web.UrlDispatcher) -> None: assert (None, {"HEAD", "GET"}) == ret +@pytest.mark.skipif(platform.system() == "Windows", reason="Different path formats") +async def test_static_resource_outside_traversal(router: web.UrlDispatcher) -> None: + """Test relative path traversing outside root does not resolve.""" + static_file = pathlib.Path(aiohttp.__file__) + request_path = "/st" + "/.." * (len(static_file.parts) - 2) + str(static_file) + assert pathlib.Path(request_path).resolve() == static_file + + resource = router.add_static("/st", static_file.parent) + ret = await resource.resolve(make_mocked_request("GET", request_path)) + # Should not resolve, otherwise filesystem information may be leaked. + assert (None, set()) == ret + + async def test_check_allowed_method_for_found_resource( router: web.UrlDispatcher, ) -> None: diff --git a/tests/test_web_functional.py b/tests/test_web_functional.py index db51a7cf06b..71dc53b500e 100644 --- a/tests/test_web_functional.py +++ b/tests/test_web_functional.py @@ -1680,6 +1680,26 @@ async def handler(request: web.Request) -> NoReturn: resp.release() +async def test_app_max_client_size_form(aiohttp_client: AiohttpClient) -> None: + async def handler(request: web.Request) -> NoReturn: + await request.post() + assert False + + app = web.Application() + app.router.add_post("/", handler) + client = await aiohttp_client(app) + + # Verify that entire multipart form can't exceed client size (not just each field). + form = aiohttp.FormData() + for i in range(3): + form.add_field(f"f{i}", b"A" * 512000) + + async with client.post("/", data=form) as resp: + assert resp.status == 413 + resp_text = await resp.text() + assert "Maximum request body size 1048576 exceeded, actual body size" in resp_text + + async def test_app_max_client_size_adjusted(aiohttp_client: AiohttpClient) -> None: async def handler(request: web.Request) -> web.Response: await request.post() diff --git a/tests/test_web_request.py b/tests/test_web_request.py index 7d172e0d901..cd18a90015e 100644 --- a/tests/test_web_request.py +++ b/tests/test_web_request.py @@ -1,5 +1,6 @@ import asyncio import datetime +import logging import socket import ssl import sys @@ -222,6 +223,13 @@ def test_range_to_slice_tail_stop() -> None: assert req.http_range.start == -500 and req.http_range.stop is None +def test_range_non_ascii() -> None: + # ५ = DEVANAGARI DIGIT FIVE + req = make_mocked_request("GET", "/", headers=CIMultiDict([("RANGE", "bytes=4-५")])) + with pytest.raises(ValueError, match="range not in acceptable format"): + req.http_range + + def test_non_keepalive_on_http10() -> None: req = make_mocked_request("GET", "/", version=HttpVersion(1, 0)) assert not req.keep_alive @@ -351,6 +359,22 @@ def test_request_cookies_edge_cases() -> None: assert req.cookies == {"test": "quoted value", "normal": "unquoted"} +def test_request_cookies_many_invalid(caplog: pytest.LogCaptureFixture) -> None: + """Test many invalid cookies doesn't cause too many logs.""" + bad = "bad" + chr(1) + "name" + cookie = "; ".join(f"{bad}{i}=1" for i in range(3000)) + req = make_mocked_request("GET", "/", headers=CIMultiDict(COOKIE=cookie)) + + with caplog.at_level(logging.DEBUG): + cookies = req.cookies + + assert len(caplog.record_tuples) == 1 + _, level, msg = caplog.record_tuples[0] + assert level is logging.DEBUG + assert "Cannot load cookie" in msg + assert cookies == {} + + def test_request_cookies_no_500_error() -> None: """Test that cookies with special characters don't cause 500 errors. @@ -889,6 +913,27 @@ async def test_multipart_formdata(protocol: BaseProtocol) -> None: assert dict(result) == {"a": "b", "c": "d"} +async def test_multipart_formdata_field_missing_name(protocol: BaseProtocol) -> None: + # Ensure ValueError is raised when Content-Disposition has no name + payload = StreamReader(protocol, 2**16, loop=asyncio.get_event_loop()) + payload.feed_data( + b"-----------------------------326931944431359\r\n" + b"Content-Disposition: form-data\r\n" # Missing name! + b"\r\n" + b"value\r\n" + b"-----------------------------326931944431359--\r\n" + ) + content_type = ( + "multipart/form-data; boundary=---------------------------326931944431359" + ) + payload.feed_eof() + req = make_mocked_request( + "POST", "/", headers={"CONTENT-TYPE": content_type}, payload=payload + ) + with pytest.raises(ValueError, match="Multipart field missing name"): + await req.post() + + async def test_multipart_formdata_file(protocol: BaseProtocol) -> None: # Make sure file uploads work, even without a content type payload = StreamReader(protocol, 2**16, loop=asyncio.get_event_loop()) diff --git a/tests/test_web_sendfile_functional.py b/tests/test_web_sendfile_functional.py index 5a44207b7c1..21249daa371 100644 --- a/tests/test_web_sendfile_functional.py +++ b/tests/test_web_sendfile_functional.py @@ -652,7 +652,7 @@ async def test_static_file_directory_traversal_attack( url_abspath = "/static/" + str(full_path.resolve()) resp = await client.get(url_abspath) - assert 403 == resp.status + assert resp.status == 404 resp.release() await client.close() diff --git a/tests/test_web_urldispatcher.py b/tests/test_web_urldispatcher.py index 176d7468851..468b39a1c52 100644 --- a/tests/test_web_urldispatcher.py +++ b/tests/test_web_urldispatcher.py @@ -825,7 +825,7 @@ async def test_static_absolute_url( app.router.add_static("/static", here) client = await aiohttp_client(app) async with client.get("/static/" + str(file_path.resolve())) as resp: - assert resp.status == 403 + assert resp.status == 404 async def test_for_issue_5250(