Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions airbyte/mcp/connector_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@
from airbyte.mcp._tool_utils import mcp_tool, register_tools
from airbyte.mcp._util import resolve_list_of_strings
from airbyte.registry import (
_DEFAULT_MANIFEST_URL,
_DEFAULT_METADATA_URL,
ApiDocsUrl,
ConnectorMetadata,
ConnectorVersionInfo,
InstallType,
get_available_connectors,
get_connector_api_docs_urls,
get_connector_metadata,
)
from airbyte.registry import get_connector_docs_urls as _get_connector_docs_urls
from airbyte.registry import get_connector_version_history as _get_connector_version_history
from airbyte.sources.util import get_source

Expand Down Expand Up @@ -161,7 +161,7 @@ def get_connector_info(
connector.install()
config_spec_jsonschema = connector.config_spec

manifest_url = _DEFAULT_MANIFEST_URL.format(
manifest_url = _DEFAULT_METADATA_URL.format(
source_name=connector_name,
version="latest",
)
Expand All @@ -180,7 +180,7 @@ def get_connector_info(
read_only=True,
idempotent=True,
)
def get_api_docs_urls(
def get_connector_docs_urls(
connector_name: Annotated[
str,
Field(
Expand All @@ -191,14 +191,14 @@ def get_api_docs_urls(
),
],
) -> list[ApiDocsUrl] | Literal["Connector not found."]:
"""Get API documentation URLs for a connector.
"""Get documentation URLs for a connector.

This tool retrieves documentation URLs for a connector's upstream API from multiple sources:
This tool retrieves documentation URLs for a connector from multiple sources:
- Registry metadata (documentationUrl, externalDocumentationUrls)
- Connector manifest.yaml file (data.externalDocumentationUrls)
- Connector metadata.yaml file (data.externalDocumentationUrls)
"""
try:
return get_connector_api_docs_urls(connector_name)
return _get_connector_docs_urls(connector_name)
except exc.AirbyteConnectorNotRegisteredError:
return "Connector not found."

Expand Down
122 changes: 71 additions & 51 deletions airbyte/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@
_PYTHON_LANGUAGE_TAG = f"language:{_PYTHON_LANGUAGE}"
_MANIFEST_ONLY_TAG = f"language:{_MANIFEST_ONLY_LANGUAGE}"

_DEFAULT_MANIFEST_URL = (
"https://connectors.airbyte.com/files/metadata/airbyte/{source_name}/{version}/manifest.yaml"
_DEFAULT_METADATA_URL = (
"https://connectors.airbyte.com/files/metadata/airbyte/{source_name}/{version}/metadata.yaml"
)


Expand Down Expand Up @@ -312,58 +312,88 @@ class ApiDocsUrl(BaseModel):
model_config = {"populate_by_name": True}

@classmethod
def from_manifest_dict(cls, manifest_data: dict[str, Any]) -> list[Self]:
"""Extract documentation URLs from parsed manifest data.
def from_metadata_docs_list(
cls, docs: list[dict[str, Any]], *, source: str, context: str
) -> list[Self]:
"""Extract documentation URLs from a list of metadata documentation dictionaries.

Args:
manifest_data: The parsed manifest.yaml data as a dictionary
docs: List of documentation dictionaries with 'title' and 'url' fields
source: The source identifier for these documentation URLs
context: Context string for error messages (e.g., "Metadata", "Registry")

Returns:
List of ApiDocsUrl objects extracted from the manifest
List of ApiDocsUrl objects extracted from the docs list

Raises:
PyAirbyteInternalError: If a documentation entry is missing required 'title' or
'url' field
"""
results: list[Self] = []

data_section = manifest_data.get("data")
if isinstance(data_section, dict):
external_docs = data_section.get("externalDocumentationUrls")
if isinstance(external_docs, list):
results = [
for doc in docs:
try:
results.append(
cls(
title=doc["title"],
url=doc["url"],
source="data_external_docs",
source=source,
doc_type=doc.get("type", "other"),
requires_login=doc.get("requiresLogin", False),
)
for doc in external_docs
]

)
except KeyError as e:
raise exc.PyAirbyteInternalError(
message=f"{context} parsing error: missing required field in {doc}: {e}"
) from e
return results

@classmethod
def from_metadata_dict(cls, metadata_data: dict[str, Any]) -> list[Self]:
"""Extract documentation URLs from parsed metadata.

Args:
metadata_data: The parsed metadata.yaml data as a dictionary

def _manifest_url_for(connector_name: str) -> str:
"""Get the expected URL of the manifest.yaml file for a connector.
Returns:
List of ApiDocsUrl objects extracted from the metadata

Raises:
PyAirbyteInternalError: If a documentation entry is missing required 'title' or
'url' field
"""
data_section = metadata_data.get("data")
if isinstance(data_section, dict):
external_docs = data_section.get("externalDocumentationUrls")
if isinstance(external_docs, list):
return cls.from_metadata_docs_list(
external_docs, source="metadata_external_docs", context="Metadata"
)
return []


def _metadata_url_for(connector_name: str) -> str:
"""Get the expected URL of the metadata.yaml file for a connector.

Args:
connector_name: The canonical connector name (e.g., "source-facebook-marketing")

Returns:
The URL to the connector's manifest.yaml file
The URL to the connector's metadata.yaml file
"""
return _DEFAULT_MANIFEST_URL.format(
return _DEFAULT_METADATA_URL.format(
source_name=connector_name,
version="latest",
)


def _fetch_manifest_dict(url: str) -> dict[str, Any]:
"""Fetch and parse a manifest.yaml file from a URL.
def _fetch_metadata_dict(url: str) -> dict[str, Any]:
"""Fetch and parse a metadata.yaml file from a URL.

Args:
url: The URL to fetch the manifest from
url: The URL to fetch the metadata from

Returns:
The parsed manifest data as a dictionary, or empty dict if manifest not found (404)
The parsed metadata as a dictionary, or empty dict if metadata not found (404)

Raises:
HTTPError: If the request fails with a non-404 status code
Expand All @@ -386,6 +416,9 @@ def _extract_docs_from_registry(connector_name: str) -> list[ApiDocsUrl]:

Returns:
List of ApiDocsUrl objects extracted from the registry

Raises:
PyAirbyteInternalError: If a documentation entry is missing required 'title' or 'url' field
"""
registry_url = _get_registry_url()
response = requests.get(registry_url, timeout=10)
Expand All @@ -407,40 +440,34 @@ def _extract_docs_from_registry(connector_name: str) -> list[ApiDocsUrl]:
title="Airbyte Documentation",
url=connector_entry["documentationUrl"],
source="registry",
doc_type="internal",
)
)

if connector_entry and "externalDocumentationUrls" in connector_entry:
external_docs = connector_entry["externalDocumentationUrls"]
if isinstance(external_docs, list):
docs_urls.extend(
[
ApiDocsUrl(
title=doc["title"],
url=doc["url"],
source="registry_external_docs",
doc_type=doc.get("type", "other"),
requires_login=doc.get("requiresLogin", False),
)
for doc in external_docs
]
ApiDocsUrl.from_metadata_docs_list(
external_docs, source="registry_external_docs", context="Registry"
)
)

return docs_urls


def get_connector_api_docs_urls(connector_name: str) -> list[ApiDocsUrl]:
"""Get API documentation URLs for a connector.
def get_connector_docs_urls(connector_name: str) -> list[ApiDocsUrl]:
"""Get documentation URLs for a connector.

This function retrieves documentation URLs for a connector's upstream API from multiple sources:
This function retrieves documentation URLs for a connector from multiple sources:
- Registry metadata (documentationUrl, externalDocumentationUrls)
- Connector manifest.yaml file (data.externalDocumentationUrls)
- Connector metadata.yaml file (data.externalDocumentationUrls)

Args:
connector_name: The canonical connector name (e.g., "source-facebook-marketing")

Returns:
List of ApiDocsUrl objects with documentation URLs, deduplicated by URL.
List of ApiDocsUrl objects with documentation URLs.

Raises:
AirbyteConnectorNotRegisteredError: If the connector is not found in the registry.
Expand All @@ -459,19 +486,12 @@ def get_connector_api_docs_urls(connector_name: str) -> list[ApiDocsUrl]:
registry_urls = _extract_docs_from_registry(connector_name)
docs_urls.extend(registry_urls)

manifest_url = _manifest_url_for(connector_name)
manifest_data = _fetch_manifest_dict(manifest_url)
manifest_urls = ApiDocsUrl.from_manifest_dict(manifest_data)
docs_urls.extend(manifest_urls)

seen_urls = set()
unique_docs_urls = []
for doc_url in docs_urls:
if doc_url.url not in seen_urls:
seen_urls.add(doc_url.url)
unique_docs_urls.append(doc_url)
metadata_url = _metadata_url_for(connector_name)
metadata_data = _fetch_metadata_dict(metadata_url)
metadata_urls = ApiDocsUrl.from_metadata_dict(metadata_data)
docs_urls.extend(metadata_urls)

return unique_docs_urls
return docs_urls


def get_connector_version_history(
Expand Down
Loading