Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions .github/workflows/claude.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ jobs:
with:
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
github_token: ${{ secrets.GITHUB_TOKEN }}
allowed_non_write_users: Copilot
allowed_bots: "github-actions[bot],copilot[bot],dependabot[bot],copilot,github-actions,gemini[bot],claude[bot]"
allowed_non_write_users: Copilot,copilot,jules[bot],jules
allowed_bots: "github-actions[bot],copilot[bot],dependabot[bot],copilot,github-actions,gemini[bot],claude[bot],jules[bot]"
trigger_phrase: "@claude"
assignee_trigger: claude[bot]
label_trigger: claude
Expand Down Expand Up @@ -105,6 +105,8 @@ jobs:
with:
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
github_token: ${{ secrets.GITHUB_TOKEN }}
allowed_non_write_users: Copilot,copilot,jules[bot],jules
allowed_bots: "github-actions[bot],copilot[bot],dependabot[bot],copilot,github-actions,gemini[bot],claude[bot],jules[bot]"
trigger_phrase: "@claude"
assignee_trigger: claude
label_trigger: claude
Expand Down Expand Up @@ -140,6 +142,8 @@ jobs:
with:
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
github_token: ${{ secrets.GITHUB_TOKEN }}
allowed_non_write_users: Copilot,copilot,jules[bot],jules
allowed_bots: "github-actions[bot],copilot[bot],dependabot[bot],copilot,github-actions,gemini[bot],claude[bot],jules[bot]"
trigger_phrase: "@claude"
assignee_trigger: claude
label_trigger: claude
Expand Down Expand Up @@ -177,6 +181,8 @@ jobs:
with:
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
github_token: ${{ secrets.GITHUB_TOKEN }}
allowed_non_write_users: Copilot,copilot,jules[bot],jules
allowed_bots: "github-actions[bot],copilot[bot],dependabot[bot],copilot,github-actions,gemini[bot],claude[bot],jules[bot]"
trigger_phrase: "@claude"
assignee_trigger: claude
label_trigger: claude
Expand Down
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -229,5 +229,5 @@ test-results.xml
mise.local.toml
mise.local.env

.gemini/
gha-creds-*.json
.exportify/
!.exportify/config.toml
20 changes: 10 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ dependencies = [
# So we pin it to make sure we don't break on minor releases
"pydantic==2.12.5",
# for, you know, platform dirs
"platformdirs>=4.9.2",
"platformdirs>=4.9.4",
# psutil used for resource governance/limiting by engine
"psutil>=7.2.2",
"textcase>=0.4.5",
Expand All @@ -140,20 +140,20 @@ dependencies = [
# for local providers (sentence-transformers, fastembed) to detect CPU/GPU features
"py-cpuinfo>=9.0.0",
# * ================ CLI Dependencies ==================*
"cyclopts>=4.5.1",
"rich>=14.3.0",
"cyclopts>=4.10.0",
"rich>=14.3.3",
# * ================ Provider Clients ==================*
# we must pin these to specific versions to ensure compatibility with our ClientOptions subclasses
"boto3==1.42.19",
"cohere==5.20.1",
"cohere==5.20.7",
"fastembed==0.7.4; python_version < '3.14'",
"google-genai==1.56.0",
# NOTE: We're waiting on pydantic-ai to update to 1.0+ before we can upgrade too
"huggingface-hub==0.36.2",
"huggingface-hub>=1.7.1",
"mistralai==1.10.0",
"openai==2.17.0",
"qdrant-client==1.16.2",
"pydantic-ai-slim>=1.56.0",
"openai==2.28.0",
"qdrant-client==1.17.1",
"pydantic-ai-slim>=1.68.0",
"sentence-transformers==5.2.0; python_version <= '3.14'",
"voyageai==0.3.7",
# * ================ Indexing and Engine ==================*
Expand All @@ -171,12 +171,12 @@ dependencies = [
# fastmcp is the core MCP server framework
"fastmcp>=2.14.5",
# just used for types but we need them at runtime for Pydantic models
"mcp>=1.19.0",
"mcp>=1.23.3",
# Runs the core admin/management server
"uvicorn[standard]>=0.40.0",
# * ================ Configuration and Settings ==================*
# pydantic-settings with toml and yaml support for config files
"pydantic-settings[toml,yaml]>=2.12.0", # Pulls: tomli>=2.0.1, pyyaml>=6.0.1
"pydantic-settings[toml,yaml]>=2.13.1", # Pulls: tomli>=2.0.1, pyyaml>=6.0.1
# For writing toml config files
"tomli-w>=1.2.0",
# * ================ Telemetry and Observability ==================*
Expand Down
2 changes: 1 addition & 1 deletion scripts/build/generate-mcp-server-json.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
ConfigLanguage,
EnvFormat,
Provider,
ProviderCategory,
ProviderEnvVarInfo,
ProviderEnvVars,
ProviderCategory,
SemanticSearchLanguage,
)

Expand Down
5 changes: 4 additions & 1 deletion scripts/model_data/hf-models.json
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@
]
}
},
"models": {},
"models": {
"Alibaba-NLP/gte-modernbert-base": {
"adapted_from": null,
Expand Down Expand Up @@ -3924,4 +3925,6 @@
"opensearch-project/opensearch-neural-sparse-encoding-doc-v2-mini"
]
}
}
{
"models": {}
}
Empty file modified scripts/model_data/hf-models.json.license
100755 → 100644
Empty file.
31 changes: 14 additions & 17 deletions scripts/model_data/mteb_to_codeweaver.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,11 @@


# make sure codeweaver is importable
sys.path.insert(0, str(Path(__file__).parent.parent))
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))

from codeweaver.core import Provider
from codeweaver.providers import PartialCapabilities
from codeweaver.providers.provider import Provider

from codeweaver.providers.embedding.capabilities.types import PartialCapabilities


# TODO: Finish refactor to use these inline constants and eliminate the hf-models.json
Expand Down Expand Up @@ -99,6 +100,13 @@
Note: FastEmbed also has some aliases, but we handle those dynamically below.
"""

KNOWN_ALIASES: dict[str, dict[ModelName, ModelName]] = {"ollama": OLLAMA_ALIASES}
"""A mapping of provider names to their HF name → provider alias mappings.

Keys are provider name strings (e.g. "ollama") and values are dicts mapping HF model names
to the provider-specific alias. FastEmbed aliases are handled dynamically via get_fastembed_aliases().
"""

KNOWN_SPARSE_MODELS = {
Provider.FASTEMBED: [
"Qdrant/bm25",
Expand Down Expand Up @@ -382,13 +390,7 @@ def attempt_to_get_version(name: str) -> str | int | float | None:

type DataMap = dict[ModelName, SimplifiedModelMeta]

type ModelMap = dict[
ModelMaker,
dict[
ModelName,
tuple[Annotated[HFModelProviders, BeforeValidator(lambda v: Provider.from_string(v))], ...],
],
]
type ModelMap = dict[ModelMaker, dict[ModelName, tuple[HFModelProviders, ...]]]
"""A mapping of model makers to their models and the providers that support each model."""


Expand Down Expand Up @@ -520,29 +522,24 @@ def load(cls) -> RootJson:
return cls.model_validate_json(cls._json_path.read_text())


"""
if JSON_CACHE.exists():
_ROOT = RootJson.load()
DATA = _ROOT.models
MODEL_MAP_DATA = _ROOT.model_map
ALIAS_MAP_DATA = _ROOT.aliases
SPARSE_MODELS = _ROOT.sparse_models

FLATTENED_ALIASES = _ROOT.flattened_aliases
else:
_ROOT = RootJson(models={})
DATA = {}
MODEL_MAP_DATA = {}
ALIAS_MAP_DATA = {}
SPARSE_MODELS = {}
FLATTENED_ALIASES = {}
"""


def mteb_to_capabilities(model: SimplifiedModelMeta) -> PartialCapabilities:
"""
Convert an MTEB model metadata dictionary to a PartialCapabilities object.
"""
loader = getattr(model, "loader", {})
loader = loader if isinstance(loader, dict) else {}
caps = {
"name": model["name"],
"default_dimension": model.get("embed_dim"),
Expand Down
Empty file modified scripts/model_data/secondary_providers.json
100755 → 100644
Empty file.
Empty file modified scripts/model_data/secondary_providers.json.license
100755 → 100644
Empty file.
2 changes: 1 addition & 1 deletion src/codeweaver/cli/commands/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ async def _perform_clear_operation(
response = display.console.input(
"[yellow]Are you sure you want to continue? (yes/no):[/yellow] "
)
if response.lower() not in ["yes", "y"]:
if response.lower() not in {"yes", "y"}:
display.print_info("Operation cancelled")
sys.exit(0)

Expand Down
114 changes: 72 additions & 42 deletions src/codeweaver/core/di/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,20 +109,13 @@
unwrapped = annotated_type(annotation)
return unwrapped if unwrapped is not None else annotation

def _resolve_string_type(

Check failure on line 112 in src/codeweaver/core/di/container.py

View workflow job for this annotation

GitHub Actions / Lint / Lint and Format

ruff (C901)

src/codeweaver/core/di/container.py:112:9: C901 `_resolve_string_type` is too complex (21 > 10)
self, type_str: str, globalns: dict[str, Any] | None = None
) -> Any | None:
"""Resolve a string type annotation to an actual type.
"""Resolve a string type annotation to an actual type manually using AST.

Note: We use eval() here (not ast.literal_eval()) because we're resolving
type names like "EmbeddingProvider" to actual type objects, not evaluating
literal values. ast.literal_eval() only works for Python literals (strings,
numbers, lists, dicts, etc.) and cannot resolve type names.

This is safe because:
1. We only eval in the controlled globalns namespace from the module
2. This is the standard approach for resolving string type annotations
3. typing.get_type_hints() uses eval internally for the same purpose
Note: We explicitly avoid using eval() to prevent arbitrary code execution
from malicious string-based type annotations.

Args:
type_str: The string representation of a type.
Expand All @@ -131,40 +124,75 @@
Returns:
The resolved type, or None if resolution fails.
"""
if not globalns:
if not type_str or not globalns:
return None

# 1. Exact match in globalns
if type_str in globalns:
return globalns[type_str]

# 2. Parse the string into an AST safely without evaluating
import ast
try:
tree = ast.parse(type_str, mode="eval")
except SyntaxError:
return None

# First, try to evaluate the string as a type reference
# ruff: noqa: S307 - eval is necessary for type resolution, not literal evaluation
with suppress(Exception):
return eval(type_str, globalns)
# If direct eval failed, check if it's an Annotated pattern like "Annotated[SomeType, ...]"
# In this case, try to resolve the base type from registered factories
if type_str.startswith("Annotated["):
# Extract the base type name (first argument to Annotated)
# Simple parsing: "Annotated[TypeName, ...]" -> "TypeName"
with suppress(Exception):
# Find the first comma or closing bracket
start = len("Annotated[")
end = type_str.find(",", start)
if end == -1:
end = type_str.find("]", start)

if end > start:
base_type_str = type_str[start:end].strip()

# Try to find this type in registered factories
for factory_type in self._factories:
if _get_name(factory_type) == base_type_str:
# Reconstruct the Annotated type using the resolved base type
# We need Annotated from typing

# Try to eval the full annotation with the base type injected
enhanced_globalns = globalns.copy()
enhanced_globalns[base_type_str] = factory_type

with suppress(Exception):
return eval(type_str, enhanced_globalns)
# Helper function to evaluate safe AST nodes manually
def _safe_eval_node(node: ast.AST) -> Any:

Check failure on line 142 in src/codeweaver/core/di/container.py

View workflow job for this annotation

GitHub Actions / Lint / Lint and Format

ruff (C901)

src/codeweaver/core/di/container.py:142:13: C901 `_safe_eval_node` is too complex (16 > 10)
if isinstance(node, ast.Name):
if node.id in globalns:
return globalns[node.id]
import builtins
if hasattr(builtins, node.id):
return getattr(builtins, node.id)
# Check factories
for factory_type in self._factories:
if _get_name(factory_type) == node.id:
return factory_type
raise ValueError(f"Unknown name: {node.id}")
elif isinstance(node, ast.Attribute):
value = _safe_eval_node(node.value)
return getattr(value, node.attr)
elif isinstance(node, ast.Subscript):
value = _safe_eval_node(node.value)
if isinstance(node.slice, ast.Tuple):
slice_val = tuple(_safe_eval_node(n) for n in node.slice.elts)
else:
slice_val = _safe_eval_node(node.slice)
# Handle Annotated specifically if value is Annotated
# Some types don't support being subscripted, so we try our best
return value[slice_val]
elif isinstance(node, ast.Constant):
return node.value
elif isinstance(node, ast.List):
return [_safe_eval_node(n) for n in node.elts]
elif isinstance(node, ast.Tuple):
return tuple(_safe_eval_node(n) for n in node.elts)
elif isinstance(node, ast.BinOp):
if isinstance(node.op, ast.BitOr):
left = _safe_eval_node(node.left)
right = _safe_eval_node(node.right)
return left | right
raise ValueError(f"Unsupported binary operator: {type(node.op).__name__}")
elif isinstance(node, ast.Call):
func = _safe_eval_node(node.func)
# ONLY allow Depends(...) function calls
from codeweaver.core.di.dependency import Depends
if func is not Depends:
raise ValueError(f"Function calls are not allowed in type annotations, except Depends. Got {func}")

args = [_safe_eval_node(arg) for arg in node.args]
kwargs = {kw.arg: _safe_eval_node(kw.value) for kw in node.keywords if kw.arg is not None}
return Depends(*args, **kwargs)
else:
raise ValueError(f"Unsupported AST node type: {type(node).__name__}")

try:
return _safe_eval_node(tree.body)
except Exception:

Check failure on line 193 in src/codeweaver/core/di/container.py

View workflow job for this annotation

GitHub Actions / Lint / Lint and Format

ruff (S110)

src/codeweaver/core/di/container.py:192:9: S110 `try`-`except`-`pass` detected, consider logging the exception
pass

# Fallback: try to find a factory by matching type name
return next(
(
Expand Down Expand Up @@ -562,6 +590,8 @@
if self._is_union_type(interface):
instance = await self._resolve_union_interface(interface, cache_key, _resolution_stack)
return cast(T, instance)
elif interface is type(None):
return cast(T, None)

# 1. Check overrides first
# We check overrides before tags and singletons because overrides
Expand Down
5 changes: 4 additions & 1 deletion src/codeweaver/core/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,10 @@ async def _check_profile(container: Container) -> bool | None:
):
from codeweaver.providers.config.profiles import ProviderProfile

return profile in [ProviderProfile.RECOMMENDED_CLOUD, ProviderProfile.RECOMMENDED]
return (
profile is ProviderProfile.RECOMMENDED_CLOUD
or profile is ProviderProfile.RECOMMENDED
)
return None


Expand Down
Loading
Loading