Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 37 additions & 25 deletions b2/_internal/_utils/uri.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
from __future__ import annotations

import dataclasses
import pathlib
import urllib.parse
import re
from functools import singledispatchmethod
from pathlib import Path
from typing import Sequence
Expand All @@ -24,7 +23,10 @@
)
from b2sdk.v3.exception import B2Error

from b2._internal._utils.python_compat import removeprefix
_B2ID_PATTERN = re.compile(r'^b2id://(?P<file_id>[a-zA-Z0-9:_-]+)$', re.IGNORECASE)
_B2_PATTERN = re.compile(r'^b2://(?P<bucket>[a-z0-9-]*)(?P<path>/.*)?$', re.IGNORECASE)
_SCHEME_PATTERN = re.compile(r'(?P<scheme>[a-z0-9]*)://.*', re.IGNORECASE)
_CONTROL_CHARACTERS_AND_SPACE = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '


class B2URIBase:
Expand Down Expand Up @@ -92,10 +94,10 @@ def parse_uri(uri: str, *, allow_all_buckets: bool = False) -> Path | B2URI | B2
"""
if not uri:
raise ValueError('URI cannot be empty')
parsed = urllib.parse.urlsplit(uri)
if parsed.scheme == '':
return pathlib.Path(uri)
return _parse_b2_uri(uri, parsed, allow_all_buckets=allow_all_buckets)

if _SCHEME_PATTERN.fullmatch(_clean_uri(uri)):
return _parse_b2_uri(uri, allow_all_buckets=allow_all_buckets)
return Path(uri)


def parse_b2_uri(
Expand All @@ -110,38 +112,48 @@ def parse_b2_uri(
:return: B2 URI
:raises ValueError: if the URI is invalid
"""
parsed = urllib.parse.urlsplit(uri)
return _parse_b2_uri(uri, parsed, allow_all_buckets=allow_all_buckets, allow_b2id=allow_b2id)
return _parse_b2_uri(uri, allow_all_buckets=allow_all_buckets, allow_b2id=allow_b2id)


def _clean_uri(uri: str) -> str:
# Clean URI
uri = uri.lstrip(_CONTROL_CHARACTERS_AND_SPACE)
for i in ('\n', '\r', '\t'):
uri = uri.replace(i, '')
return uri


def _parse_b2_uri(
uri,
parsed: urllib.parse.SplitResult,
*,
allow_all_buckets: bool = False,
allow_b2id: bool = True,
) -> B2URI | B2FileIdURI:
if parsed.scheme in ('b2', 'b2id'):
path = urllib.parse.urlunsplit(parsed._replace(scheme='', netloc=''))
if not parsed.netloc:
uri = _clean_uri(uri)
if uri.lower().startswith('b2://'):
match = _B2_PATTERN.fullmatch(uri)
if not match:
raise ValueError(f'Invalid B2 URI: {uri!r}')

bucket = match.group('bucket')
path = match.group('path')
if not bucket:
if allow_all_buckets:
if path:
raise ValueError(
f"Invalid B2 URI: all buckets URI doesn't allow non-empty path, but {path!r} was provided"
)
return B2URI(bucket_name='')
raise ValueError(f'Invalid B2 URI: {uri!r}')
elif parsed.password or parsed.username:
raise ValueError(
'Invalid B2 URI: credentials passed using `user@password:` syntax is not supported in URI'
)

if parsed.scheme == 'b2':
return B2URI(bucket_name=parsed.netloc, path=removeprefix(path, '/'))
elif parsed.scheme == 'b2id' and allow_b2id:
return B2FileIdURI(file_id=parsed.netloc)
else:
raise ValueError(f'Unsupported URI scheme: {parsed.scheme!r}')
else:
return B2URI(bucket_name=bucket, path=path[1:] if path else '')
elif allow_b2id and uri.lower().startswith('b2id://'):
match = _B2ID_PATTERN.fullmatch(uri)
if match:
return B2FileIdURI(file_id=match.group('file_id'))
elif match := _SCHEME_PATTERN.fullmatch(uri):
raise ValueError(f'Unsupported URI scheme: {match.group("scheme")!r}')

raise ValueError(f'Invalid B2 URI: {uri!r}')


class B2URIAdapter:
Expand Down
1 change: 1 addition & 0 deletions changelog.d/1090.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Handle filenames starting with / or ending with # or ?.
10 changes: 8 additions & 2 deletions test/unit/_utils/test_uri.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,18 @@ def test_b2fileuri_str():
[
('some/local/path', Path('some/local/path')),
('./some/local/path', Path('some/local/path')),
('.', Path('')),
('b2://bucket', B2URI(bucket_name='bucket')),
(' b2://bucket', B2URI(bucket_name='bucket')),
('b2://bucket/', B2URI(bucket_name='bucket')),
('b2://bucket/path/to/dir/', B2URI(bucket_name='bucket', path='path/to/dir/')),
('b2id://file123', B2FileIdURI(file_id='file123')),
('b2://bucket/wild[card]', B2URI(bucket_name='bucket', path='wild[card]')),
('b2://bucket/wild?card', B2URI(bucket_name='bucket', path='wild?card')),
('b2://bucket/special#char', B2URI(bucket_name='bucket', path='special#char')),
('b2://bucket/special#', B2URI(bucket_name='bucket', path='special#')),
('b2://bucket/special?', B2URI(bucket_name='bucket', path='special?')),
('b2://bucket//special', B2URI(bucket_name='bucket', path='/special')),
],
)
def test_parse_uri(uri, expected):
Expand All @@ -94,14 +99,15 @@ def test_parse_uri__allow_all_buckets():
# Test cases for B2 URIs with credentials
(
'b2://user@password:bucket/path',
'Invalid B2 URI: credentials passed using `user@password:` syntax is not supported in URI',
"Invalid B2 URI: 'b2://user@password:bucket/path'",
),
(
'b2id://user@password:file123',
'Invalid B2 URI: credentials passed using `user@password:` syntax is not supported in URI',
"Invalid B2 URI: 'b2id://user@password:file123'",
),
# Test cases for unsupported URI schemes
('unknown://bucket/path', "Unsupported URI scheme: 'unknown'"),
(' unknown://bucket/path', "Unsupported URI scheme: 'unknown'"),
],
)
def test_parse_uri_exceptions(uri, expected_exception_message):
Expand Down
2 changes: 1 addition & 1 deletion test/unit/console_tool/test_download_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def test_cat__b2_uri__invalid(b2_cli, capfd):
expected_stderr=None,
expected_status=2,
)
assert "argument B2_URI: Unsupported URI scheme: ''" in capfd.readouterr().err
assert "argument B2_URI: Invalid B2 URI: 'nothing/meaningful'" in capfd.readouterr().err


def test_cat__b2_uri__not_a_file(b2_cli, bucket, capfd):
Expand Down
Loading