diff --git a/airbyte/mcp/connector_registry.py b/airbyte/mcp/connector_registry.py index 7dbedb341..d641fcf45 100644 --- a/airbyte/mcp/connector_registry.py +++ b/airbyte/mcp/connector_registry.py @@ -16,15 +16,15 @@ from airbyte.mcp._tool_utils import mcp_tool, register_tools from airbyte.mcp._util import resolve_list_of_strings from airbyte.registry import ( - _DEFAULT_MANIFEST_URL, + _DEFAULT_METADATA_URL, ApiDocsUrl, ConnectorMetadata, ConnectorVersionInfo, InstallType, get_available_connectors, - get_connector_api_docs_urls, get_connector_metadata, ) +from airbyte.registry import get_connector_docs_urls as _get_connector_docs_urls from airbyte.registry import get_connector_version_history as _get_connector_version_history from airbyte.sources.util import get_source @@ -161,7 +161,7 @@ def get_connector_info( connector.install() config_spec_jsonschema = connector.config_spec - manifest_url = _DEFAULT_MANIFEST_URL.format( + manifest_url = _DEFAULT_METADATA_URL.format( source_name=connector_name, version="latest", ) @@ -180,7 +180,7 @@ def get_connector_info( read_only=True, idempotent=True, ) -def get_api_docs_urls( +def get_connector_docs_urls( connector_name: Annotated[ str, Field( @@ -191,14 +191,14 @@ def get_api_docs_urls( ), ], ) -> list[ApiDocsUrl] | Literal["Connector not found."]: - """Get API documentation URLs for a connector. + """Get documentation URLs for a connector. - This tool retrieves documentation URLs for a connector's upstream API from multiple sources: + This tool retrieves documentation URLs for a connector from multiple sources: - Registry metadata (documentationUrl, externalDocumentationUrls) - - Connector manifest.yaml file (data.externalDocumentationUrls) + - Connector metadata.yaml file (data.externalDocumentationUrls) """ try: - return get_connector_api_docs_urls(connector_name) + return _get_connector_docs_urls(connector_name) except exc.AirbyteConnectorNotRegisteredError: return "Connector not found." diff --git a/airbyte/registry.py b/airbyte/registry.py index adff22755..8fee2c91d 100644 --- a/airbyte/registry.py +++ b/airbyte/registry.py @@ -40,8 +40,8 @@ _PYTHON_LANGUAGE_TAG = f"language:{_PYTHON_LANGUAGE}" _MANIFEST_ONLY_TAG = f"language:{_MANIFEST_ONLY_LANGUAGE}" -_DEFAULT_MANIFEST_URL = ( - "https://connectors.airbyte.com/files/metadata/airbyte/{source_name}/{version}/manifest.yaml" +_DEFAULT_METADATA_URL = ( + "https://connectors.airbyte.com/files/metadata/airbyte/{source_name}/{version}/metadata.yaml" ) @@ -312,58 +312,88 @@ class ApiDocsUrl(BaseModel): model_config = {"populate_by_name": True} @classmethod - def from_manifest_dict(cls, manifest_data: dict[str, Any]) -> list[Self]: - """Extract documentation URLs from parsed manifest data. + def from_metadata_docs_list( + cls, docs: list[dict[str, Any]], *, source: str, context: str + ) -> list[Self]: + """Extract documentation URLs from a list of metadata documentation dictionaries. Args: - manifest_data: The parsed manifest.yaml data as a dictionary + docs: List of documentation dictionaries with 'title' and 'url' fields + source: The source identifier for these documentation URLs + context: Context string for error messages (e.g., "Metadata", "Registry") Returns: - List of ApiDocsUrl objects extracted from the manifest + List of ApiDocsUrl objects extracted from the docs list + + Raises: + PyAirbyteInternalError: If a documentation entry is missing required 'title' or + 'url' field """ results: list[Self] = [] - - data_section = manifest_data.get("data") - if isinstance(data_section, dict): - external_docs = data_section.get("externalDocumentationUrls") - if isinstance(external_docs, list): - results = [ + for doc in docs: + try: + results.append( cls( title=doc["title"], url=doc["url"], - source="data_external_docs", + source=source, doc_type=doc.get("type", "other"), requires_login=doc.get("requiresLogin", False), ) - for doc in external_docs - ] - + ) + except KeyError as e: + raise exc.PyAirbyteInternalError( + message=f"{context} parsing error: missing required field in {doc}: {e}" + ) from e return results + @classmethod + def from_metadata_dict(cls, metadata_data: dict[str, Any]) -> list[Self]: + """Extract documentation URLs from parsed metadata. + + Args: + metadata_data: The parsed metadata.yaml data as a dictionary -def _manifest_url_for(connector_name: str) -> str: - """Get the expected URL of the manifest.yaml file for a connector. + Returns: + List of ApiDocsUrl objects extracted from the metadata + + Raises: + PyAirbyteInternalError: If a documentation entry is missing required 'title' or + 'url' field + """ + data_section = metadata_data.get("data") + if isinstance(data_section, dict): + external_docs = data_section.get("externalDocumentationUrls") + if isinstance(external_docs, list): + return cls.from_metadata_docs_list( + external_docs, source="metadata_external_docs", context="Metadata" + ) + return [] + + +def _metadata_url_for(connector_name: str) -> str: + """Get the expected URL of the metadata.yaml file for a connector. Args: connector_name: The canonical connector name (e.g., "source-facebook-marketing") Returns: - The URL to the connector's manifest.yaml file + The URL to the connector's metadata.yaml file """ - return _DEFAULT_MANIFEST_URL.format( + return _DEFAULT_METADATA_URL.format( source_name=connector_name, version="latest", ) -def _fetch_manifest_dict(url: str) -> dict[str, Any]: - """Fetch and parse a manifest.yaml file from a URL. +def _fetch_metadata_dict(url: str) -> dict[str, Any]: + """Fetch and parse a metadata.yaml file from a URL. Args: - url: The URL to fetch the manifest from + url: The URL to fetch the metadata from Returns: - The parsed manifest data as a dictionary, or empty dict if manifest not found (404) + The parsed metadata as a dictionary, or empty dict if metadata not found (404) Raises: HTTPError: If the request fails with a non-404 status code @@ -386,6 +416,9 @@ def _extract_docs_from_registry(connector_name: str) -> list[ApiDocsUrl]: Returns: List of ApiDocsUrl objects extracted from the registry + + Raises: + PyAirbyteInternalError: If a documentation entry is missing required 'title' or 'url' field """ registry_url = _get_registry_url() response = requests.get(registry_url, timeout=10) @@ -407,6 +440,7 @@ def _extract_docs_from_registry(connector_name: str) -> list[ApiDocsUrl]: title="Airbyte Documentation", url=connector_entry["documentationUrl"], source="registry", + doc_type="internal", ) ) @@ -414,33 +448,26 @@ def _extract_docs_from_registry(connector_name: str) -> list[ApiDocsUrl]: external_docs = connector_entry["externalDocumentationUrls"] if isinstance(external_docs, list): docs_urls.extend( - [ - ApiDocsUrl( - title=doc["title"], - url=doc["url"], - source="registry_external_docs", - doc_type=doc.get("type", "other"), - requires_login=doc.get("requiresLogin", False), - ) - for doc in external_docs - ] + ApiDocsUrl.from_metadata_docs_list( + external_docs, source="registry_external_docs", context="Registry" + ) ) return docs_urls -def get_connector_api_docs_urls(connector_name: str) -> list[ApiDocsUrl]: - """Get API documentation URLs for a connector. +def get_connector_docs_urls(connector_name: str) -> list[ApiDocsUrl]: + """Get documentation URLs for a connector. - This function retrieves documentation URLs for a connector's upstream API from multiple sources: + This function retrieves documentation URLs for a connector from multiple sources: - Registry metadata (documentationUrl, externalDocumentationUrls) - - Connector manifest.yaml file (data.externalDocumentationUrls) + - Connector metadata.yaml file (data.externalDocumentationUrls) Args: connector_name: The canonical connector name (e.g., "source-facebook-marketing") Returns: - List of ApiDocsUrl objects with documentation URLs, deduplicated by URL. + List of ApiDocsUrl objects with documentation URLs. Raises: AirbyteConnectorNotRegisteredError: If the connector is not found in the registry. @@ -459,19 +486,12 @@ def get_connector_api_docs_urls(connector_name: str) -> list[ApiDocsUrl]: registry_urls = _extract_docs_from_registry(connector_name) docs_urls.extend(registry_urls) - manifest_url = _manifest_url_for(connector_name) - manifest_data = _fetch_manifest_dict(manifest_url) - manifest_urls = ApiDocsUrl.from_manifest_dict(manifest_data) - docs_urls.extend(manifest_urls) - - seen_urls = set() - unique_docs_urls = [] - for doc_url in docs_urls: - if doc_url.url not in seen_urls: - seen_urls.add(doc_url.url) - unique_docs_urls.append(doc_url) + metadata_url = _metadata_url_for(connector_name) + metadata_data = _fetch_metadata_dict(metadata_url) + metadata_urls = ApiDocsUrl.from_metadata_dict(metadata_data) + docs_urls.extend(metadata_urls) - return unique_docs_urls + return docs_urls def get_connector_version_history( diff --git a/tests/unit_tests/test_mcp_connector_registry.py b/tests/unit_tests/test_mcp_connector_registry.py index 5f70ea167..3b19fd646 100644 --- a/tests/unit_tests/test_mcp_connector_registry.py +++ b/tests/unit_tests/test_mcp_connector_registry.py @@ -5,42 +5,44 @@ from unittest.mock import MagicMock, patch +import pytest + from airbyte import exceptions as exc -from airbyte.mcp.connector_registry import get_api_docs_urls +from airbyte.mcp.connector_registry import get_connector_docs_urls from airbyte.registry import ( ApiDocsUrl, - _fetch_manifest_dict, - _manifest_url_for, + _fetch_metadata_dict, + _metadata_url_for, ) -class TestManifestUrlFor: - """Tests for _manifest_url_for function.""" +class TestMetadataUrlFor: + """Tests for _metadata_url_for function.""" - def test_manifest_url_for(self) -> None: - """Test generating manifest URL for a connector.""" - url = _manifest_url_for("source-example") + def test_metadata_url_for(self) -> None: + """Test generating metadata URL for a connector.""" + url = _metadata_url_for("source-example") assert "source-example" in url - assert "manifest.yaml" in url + assert "metadata.yaml" in url assert "latest" in url -class TestFetchManifestDict: - """Tests for _fetch_manifest_dict function.""" +class TestFetchMetadataDict: + """Tests for _fetch_metadata_dict function.""" - def test_manifest_not_found(self) -> None: - """Test handling when manifest.yaml doesn't exist (404).""" + def test_metadata_not_found(self) -> None: + """Test handling when metadata.yaml doesn't exist (404).""" with patch("airbyte.registry.requests.get") as mock_get: mock_response = MagicMock() mock_response.status_code = 404 mock_get.return_value = mock_response - manifest_dict = _fetch_manifest_dict("https://example.com/manifest.yaml") - assert manifest_dict == {} + metadata_dict = _fetch_metadata_dict("https://example.com/metadata.yaml") + assert metadata_dict == {} - def test_fetch_manifest_dict(self) -> None: - """Test fetching and parsing manifest.yaml.""" - manifest_yaml = """ + def test_fetch_metadata_dict(self) -> None: + """Test fetching and parsing metadata.yaml.""" + metadata_yaml = """ version: 1.0.0 type: DeclarativeSource data: @@ -49,21 +51,21 @@ def test_fetch_manifest_dict(self) -> None: with patch("airbyte.registry.requests.get") as mock_get: mock_response = MagicMock() mock_response.status_code = 200 - mock_response.text = manifest_yaml + mock_response.text = metadata_yaml mock_get.return_value = mock_response - manifest_dict = _fetch_manifest_dict("https://example.com/manifest.yaml") - assert manifest_dict["version"] == "1.0.0" - assert manifest_dict["type"] == "DeclarativeSource" - assert manifest_dict["data"]["name"] == "Example" + metadata_dict = _fetch_metadata_dict("https://example.com/metadata.yaml") + assert metadata_dict["version"] == "1.0.0" + assert metadata_dict["type"] == "DeclarativeSource" + assert metadata_dict["data"]["name"] == "Example" -class TestApiDocsUrlFromManifestDict: - """Tests for ApiDocsUrl.from_manifest_dict classmethod.""" +class TestApiDocsUrlFromMetadataDict: + """Tests for ApiDocsUrl.from_metadata_dict classmethod.""" - def test_manifest_with_external_docs_urls(self) -> None: + def test_metadata_with_external_docs_urls(self) -> None: """Test extracting URLs from data.externalDocumentationUrls field.""" - manifest_dict = { + metadata_dict = { "version": "1.0.0", "type": "DeclarativeSource", "data": { @@ -88,7 +90,7 @@ def test_manifest_with_external_docs_urls(self) -> None: }, } - urls = ApiDocsUrl.from_manifest_dict(manifest_dict) + urls = ApiDocsUrl.from_metadata_dict(metadata_dict) assert len(urls) == 3 assert urls[0].title == "Versioning docs" assert urls[0].url == "https://api.example.com/versioning" @@ -100,9 +102,9 @@ def test_manifest_with_external_docs_urls(self) -> None: assert urls[2].doc_type == "api_deprecations" assert urls[2].requires_login is True - def test_manifest_with_external_docs_no_type(self) -> None: + def test_metadata_with_external_docs_no_type(self) -> None: """Test extracting URLs from data.externalDocumentationUrls without type field.""" - manifest_dict = { + metadata_dict = { "version": "1.0.0", "type": "DeclarativeSource", "data": { @@ -115,49 +117,68 @@ def test_manifest_with_external_docs_no_type(self) -> None: }, } - urls = ApiDocsUrl.from_manifest_dict(manifest_dict) + urls = ApiDocsUrl.from_metadata_dict(metadata_dict) assert len(urls) == 1 assert urls[0].title == "General docs" assert urls[0].doc_type == "other" assert urls[0].requires_login is False - def test_empty_manifest(self) -> None: - """Test handling empty manifest dict.""" - urls = ApiDocsUrl.from_manifest_dict({}) + def test_empty_metadata(self) -> None: + """Test handling empty metadata dict.""" + urls = ApiDocsUrl.from_metadata_dict({}) assert len(urls) == 0 + def test_metadata_missing_title_raises_error(self) -> None: + """Test that missing 'title' field raises PyAirbyteInternalError.""" + metadata_dict = { + "version": "1.0.0", + "type": "DeclarativeSource", + "data": { + "externalDocumentationUrls": [ + { + "url": "https://api.example.com/docs", + } + ] + }, + } -class TestGetApiDocsUrls: - """Tests for get_api_docs_urls function.""" + with pytest.raises( + exc.PyAirbyteInternalError, match="Metadata parsing error.*'title'" + ): + ApiDocsUrl.from_metadata_dict(metadata_dict) + + def test_metadata_missing_url_raises_error(self) -> None: + """Test that missing 'url' field raises PyAirbyteInternalError.""" + metadata_dict = { + "version": "1.0.0", + "type": "DeclarativeSource", + "data": { + "externalDocumentationUrls": [ + { + "title": "API Documentation", + } + ] + }, + } + + with pytest.raises( + exc.PyAirbyteInternalError, match="Metadata parsing error.*'url'" + ): + ApiDocsUrl.from_metadata_dict(metadata_dict) + + +class TestGetConnectorDocsUrls: + """Tests for get_connector_docs_urls function.""" def test_connector_not_found(self) -> None: """Test handling when connector is not found.""" with patch( - "airbyte.mcp.connector_registry.get_connector_api_docs_urls" + "airbyte.mcp.connector_registry.get_connector_docs_urls" ) as mock_get_docs: mock_get_docs.side_effect = exc.AirbyteConnectorNotRegisteredError( connector_name="nonexistent-connector", context={}, ) - result = get_api_docs_urls("nonexistent-connector") + result = get_connector_docs_urls("nonexistent-connector") assert result == "Connector not found." - - def test_deduplication_of_urls(self) -> None: - """Test that duplicate URLs are deduplicated.""" - with patch( - "airbyte.mcp.connector_registry.get_connector_api_docs_urls" - ) as mock_get_docs: - mock_get_docs.return_value = [ - ApiDocsUrl( - title="Airbyte Documentation", - url="https://docs.airbyte.com/integrations/sources/example", - source="registry", - ) - ] - - result = get_api_docs_urls("source-example") - - assert isinstance(result, list) - assert len(result) == 1 - assert result[0].title == "Airbyte Documentation"