Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 23 additions & 5 deletions mkdocs/docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,7 @@ The RESTCatalog supports pluggable authentication via the `auth` configuration b
- `oauth2`: OAuth2 client credentials flow.
- `custom`: Custom authentication manager (requires `auth.impl`).
- `google`: Google Authentication support
- `entra`: Microsoft Entra ID (Azure AD) authentication support

###### Configuration Properties

Expand Down Expand Up @@ -422,6 +423,7 @@ catalog:
| `auth.oauth2` | If type is `oauth2` | Block containing OAuth2 configuration (see below). |
| `auth.custom` | If type is `custom` | Block containing configuration for the custom AuthManager. |
| `auth.google` | If type is `google` | Block containing `credentials_path` to a service account file (if using). Will default to using Application Default Credentials. |
| `auth.entra` | If type is `entra` | Block containing Entra ID configuration. Will default to using DefaultAzureCredential. |

###### Examples

Expand Down Expand Up @@ -578,22 +580,38 @@ catalog:

See [OneLake table APIs for Iceberg](https://aka.ms/onelakeircdocs) for detailed documentation.

Using Entra ID authentication (recommended):

```yaml
catalog:
onelake_catalog:
type: rest
uri: https://onelake.table.fabric.microsoft.com/iceberg
warehouse: <fabric_workspace_id>/<fabric_data_item_id>
auth:
type: entra
adls.account-name: onelake
adls.account-host: onelake.blob.fabric.microsoft.com
```

Using static token:

```yaml
catalog:
onelake_catalog:
type: rest
uri: https://onelake.table.fabric.microsoft.com/iceberg
warehouse: <fabric_workspace_id>/<fabric_data_item_id> # Example : DB0CE1EE-B014-47D3-8F0C-9D64C39C0FC2/F470A1D2-6D6D-4C9D-8796-46286C80B7C0
token: <token>,
adls.account-name: onelake,
adls.account-host: onelake.blob.fabric.microsoft.com,
token: <token>
adls.account-name: onelake
adls.account-host: onelake.blob.fabric.microsoft.com
adls.credential: <credential>
```

<!-- prettier-ignore-start -->

!!! Note "OneLake Authentication Models"
For Authentication: You can use DefautlAzureCredential from `azure.identity` package or refer to other [authentication flows](https://learn.microsoft.com/en-us/entra/identity-platform/authentication-flows-app-scenarios) for detailed documentation.
!!! Note "OneLake Authentication"
Use the `entra` auth type for Entra ID (Azure AD) authentication via [DefaultAzureCredential](https://learn.microsoft.com/en-us/azure/developer/python/sdk/authentication/credential-chains?tabs=dac#defaultazurecredential-overview), which supports environment variables, managed identity, Azure CLI, and more. Install with `pip install pyiceberg[entra-auth]`.
<!-- prettier-ignore-end -->

### SQL Catalog
Expand Down
3 changes: 3 additions & 0 deletions mkdocs/docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ You can mix and match optional dependencies depending on your needs:
| rest-sigv4 | Support for generating AWS SIGv4 authentication headers for REST Catalogs |
| pyiceberg-core | Installs iceberg-rust powered core |
| datafusion | Installs both PyArrow and Apache DataFusion |
| hf | Support for Hugging Face Hub |
| gcp-auth | Support for Google Cloud authentication |
| entra-auth | Support for Azure Entra authentication |

You either need to install `s3fs`, `adlfs`, `gcsfs`, or `pyarrow` to be able to fetch files from an object store.

Expand Down
63 changes: 63 additions & 0 deletions pyiceberg/catalog/rest/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,68 @@ def auth_header(self) -> str:
return f"Bearer {self.credentials.token}"


class EntraAuthManager(AuthManager):
"""Auth Manager implementation that supports Microsoft Entra ID (Azure AD) authentication.

This manager uses the Azure Identity library's DefaultAzureCredential which automatically
tries multiple authentication methods including environment variables, managed identity,
and Azure CLI.

See https://learn.microsoft.com/en-us/azure/developer/python/sdk/authentication/credential-chains
for more details on DefaultAzureCredential.
"""

DEFAULT_SCOPE = "https://storage.azure.com/.default"

def __init__(
self,
scopes: list[str] | None = None,
**credential_kwargs: Any,
):
"""
Initialize EntraAuthManager.

Args:
scopes: List of OAuth2 scopes. Defaults to ["https://storage.azure.com/.default"].
**credential_kwargs: Arguments passed to DefaultAzureCredential.
Supported authentication methods:
- Environment Variables: Set AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET
- Managed Identity: Works automatically on Azure; for user-assigned, pass managed_identity_client_id
- Azure CLI: Works automatically if logged in via `az login`
- Workload Identity: Works automatically in AKS with workload identity configured # codespell:ignore aks
"""
try:
from azure.identity import DefaultAzureCredential
except ImportError as e:
raise ImportError("Azure Identity library not found. Please install with: pip install pyiceberg[entra-auth]") from e

self._scopes = scopes or [self.DEFAULT_SCOPE]
self._lock = threading.Lock()
self._token: str | None = None
self._expires_at: float = 0
self._credential = DefaultAzureCredential(**credential_kwargs)

def _refresh_token(self) -> None:
"""Refresh the access token from Azure."""
token = self._credential.get_token(*self._scopes)
self._token = token.token
# expires_on is a Unix timestamp; add a 60-second margin for safety
self._expires_at = token.expires_on - 60

def _get_token(self) -> str:
"""Get a valid access token, refreshing if necessary."""
with self._lock:
if not self._token or time.time() >= self._expires_at:
self._refresh_token()
if self._token is None:
raise ValueError("Failed to obtain Entra access token")
return self._token

def auth_header(self) -> str:
"""Return the Authorization header value with a valid Bearer token."""
return f"Bearer {self._get_token()}"


class AuthManagerAdapter(AuthBase):
"""A `requests.auth.AuthBase` adapter for integrating an `AuthManager` into a `requests.Session`.

Expand Down Expand Up @@ -330,3 +392,4 @@ def create(cls, class_or_name: str, config: dict[str, Any]) -> AuthManager:
AuthManagerFactory.register("legacyoauth2", LegacyOAuth2AuthManager)
AuthManagerFactory.register("oauth2", OAuth2AuthManager)
AuthManagerFactory.register("google", GoogleAuthManager)
AuthManagerFactory.register("entra", EntraAuthManager)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ hf = ["huggingface-hub>=0.24.0"]
pyiceberg-core = ["pyiceberg-core>=0.5.1,<0.9.0"]
datafusion = ["datafusion>=51,<52"]
gcp-auth = ["google-auth>=2.4.0"]
entra-auth = ["azure-identity>=1.25.1"]

[dependency-groups]
dev = [
Expand Down
98 changes: 97 additions & 1 deletion tests/catalog/test_rest_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import requests
from requests_mock import Mocker

from pyiceberg.catalog.rest.auth import AuthManagerAdapter, BasicAuthManager, GoogleAuthManager, NoopAuthManager
from pyiceberg.catalog.rest.auth import AuthManagerAdapter, BasicAuthManager, EntraAuthManager, GoogleAuthManager, NoopAuthManager

TEST_URI = "https://iceberg-test-catalog/"
GOOGLE_CREDS_URI = "https://oauth2.googleapis.com/token"
Expand Down Expand Up @@ -153,3 +153,99 @@ def test_google_auth_manager_import_error() -> None:
with patch.dict("sys.modules", {"google.auth": None, "google.auth.transport.requests": None}):
with pytest.raises(ImportError, match="Google Auth libraries not found. Please install 'google-auth'."):
GoogleAuthManager()


@patch("azure.identity.DefaultAzureCredential")
def test_entra_auth_manager_default_credential(mock_default_cred: MagicMock, rest_mock: Mocker) -> None:
"""Test EntraAuthManager with DefaultAzureCredential."""
mock_credential_instance = MagicMock()
mock_token = MagicMock()
mock_token.token = "entra_default_token"
mock_token.expires_on = 9999999999 # Far future timestamp
mock_credential_instance.get_token.return_value = mock_token
mock_default_cred.return_value = mock_credential_instance

auth_manager = EntraAuthManager()
session = requests.Session()
session.auth = AuthManagerAdapter(auth_manager)
session.get(TEST_URI)

mock_default_cred.assert_called_once_with()
mock_credential_instance.get_token.assert_called_once_with("https://storage.azure.com/.default")
history = rest_mock.request_history
assert len(history) == 1
actual_headers = history[0].headers
assert actual_headers["Authorization"] == "Bearer entra_default_token"


@patch("azure.identity.DefaultAzureCredential")
def test_entra_auth_manager_with_managed_identity_client_id(mock_default_cred: MagicMock, rest_mock: Mocker) -> None:
"""Test EntraAuthManager with managed_identity_client_id passed to DefaultAzureCredential."""
mock_credential_instance = MagicMock()
mock_token = MagicMock()
mock_token.token = "entra_mi_token"
mock_token.expires_on = 9999999999
mock_credential_instance.get_token.return_value = mock_token
mock_default_cred.return_value = mock_credential_instance

auth_manager = EntraAuthManager(managed_identity_client_id="user-assigned-client-id")
session = requests.Session()
session.auth = AuthManagerAdapter(auth_manager)
session.get(TEST_URI)

mock_default_cred.assert_called_once_with(managed_identity_client_id="user-assigned-client-id")
mock_credential_instance.get_token.assert_called_once_with("https://storage.azure.com/.default")
history = rest_mock.request_history
assert len(history) == 1
actual_headers = history[0].headers
assert actual_headers["Authorization"] == "Bearer entra_mi_token"


@patch("azure.identity.DefaultAzureCredential")
def test_entra_auth_manager_custom_scopes(mock_default_cred: MagicMock, rest_mock: Mocker) -> None:
"""Test EntraAuthManager with custom scopes."""
mock_credential_instance = MagicMock()
mock_token = MagicMock()
mock_token.token = "entra_custom_scope_token"
mock_token.expires_on = 9999999999
mock_credential_instance.get_token.return_value = mock_token
mock_default_cred.return_value = mock_credential_instance

custom_scopes = ["https://datalake.azure.net/.default", "https://storage.azure.com/.default"]
auth_manager = EntraAuthManager(scopes=custom_scopes)
session = requests.Session()
session.auth = AuthManagerAdapter(auth_manager)
session.get(TEST_URI)

mock_default_cred.assert_called_once_with()
mock_credential_instance.get_token.assert_called_once_with(*custom_scopes)
history = rest_mock.request_history
assert len(history) == 1
actual_headers = history[0].headers
assert actual_headers["Authorization"] == "Bearer entra_custom_scope_token"


def test_entra_auth_manager_import_error() -> None:
"""Test EntraAuthManager raises ImportError if azure-identity is not installed."""
with patch.dict("sys.modules", {"azure.identity": None}):
with pytest.raises(ImportError, match="Azure Identity library not found"):
EntraAuthManager()


@patch("azure.identity.DefaultAzureCredential")
def test_entra_auth_manager_token_failure(mock_default_cred: MagicMock, rest_mock: Mocker) -> None:
"""Test EntraAuthManager raises exception when token acquisition fails."""
mock_credential_instance = MagicMock()
mock_credential_instance.get_token.side_effect = Exception("Failed to acquire token")
mock_default_cred.return_value = mock_credential_instance

auth_manager = EntraAuthManager()
session = requests.Session()
session.auth = AuthManagerAdapter(auth_manager)

with pytest.raises(Exception, match="Failed to acquire token"):
session.get(TEST_URI)

# Verify no requests were made with a blank/missing auth header
history = rest_mock.request_history
assert len(history) == 0
6 changes: 5 additions & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.