Skip to content

Commit 68b50d4

Browse files
committed
fix: resolve test failures — PromptLoader path + missing data_ingestion_ic.json
test failures (TestPromptLoader — 10 tests failed on all 3 CI matrix targets): - Tests hardcoded 'src/quant_pod/prompts' path, resolving to tests/src/... which is an untracked legacy directory (old src-layout). CI doesn't have it. Fix: change to parent.parent.parent/"packages/quant_pod/prompts" (3 levels up to repo root, then correct package path). - data_ingestion_ic.json was excluded from git by the 'data/' gitignore rule. Add negation: !packages/*/prompts/**/data/** and force-add the file. Also commits pre-existing local changes: - packages/quant_pod/mcp/server.py: refactor _fetch_price_data to use provider registry with DuckDB cache → fallback chain; fix ruff I001 - packages/quantcore/mcp/server.py: initialize DataProviderRegistry in lifespan; update fetch_market_data docstring to reflect provider chain behaviour - packages/quant_pod/tools/mcp_bridge.py: new file (pre-existing uncommitted) Note: Docker/Trivy scan has 2 HIGH CVEs (jaraco.context, wheel) — not a gate blocker (all-checks only requires lint + test + security, not docker).
1 parent b14028b commit 68b50d4

File tree

7 files changed

+232
-39
lines changed

7 files changed

+232
-39
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ env/
3838
data/
3939
!packages/*/data/
4040
!packages/*/data/**
41+
# Nested data dirs inside prompts (IC pod subdirectory named "data")
42+
!packages/*/prompts/**/data/
43+
!packages/*/prompts/**/data/**
4144
*.duckdb
4245
*.db
4346
*.parquet

packages/quant_pod/mcp/server.py

Lines changed: 44 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -909,36 +909,58 @@ def _fetch_price_data(
909909
symbol: str,
910910
start_date: str | None = None,
911911
end_date: str | None = None,
912-
) -> "pd.DataFrame": # noqa: F821
913-
"""Fetch OHLCV price data from QuantCore DataStore or provider."""
912+
) -> "pd.DataFrame | None": # noqa: F821
913+
"""Fetch OHLCV price data using the configured provider registry.
914+
915+
Resolution order:
916+
1. Local DuckDB cache (fastest, no network)
917+
2. Provider registry (DATA_PROVIDER_PRIORITY from .env)
918+
919+
Returns None if all sources fail.
920+
"""
921+
import pandas as pd
922+
from quantcore.config.settings import get_settings
923+
from quantcore.config.timeframes import Timeframe
924+
from quantcore.data.base import AssetClass
925+
from quantcore.data.registry import DataProviderRegistry
926+
927+
def _apply_date_filter(df: pd.DataFrame) -> pd.DataFrame:
928+
if start_date:
929+
df = df[df.index >= start_date]
930+
if end_date:
931+
df = df[df.index <= end_date]
932+
return df
933+
934+
# 1. Local DuckDB cache
914935
try:
915936
from quantcore.data.storage import DataStore
916937

917-
store = DataStore()
918-
df = store.load(symbol)
919-
if df is not None and not df.empty:
920-
if start_date:
921-
df = df[df.index >= start_date]
922-
if end_date:
923-
df = df[df.index <= end_date]
924-
return df
925-
except Exception:
926-
pass
938+
with DataStore() as store:
939+
df = store.load(symbol)
940+
if df is not None and not df.empty:
941+
return _apply_date_filter(df)
942+
except Exception as exc:
943+
logger.debug(f"DuckDB cache miss for {symbol}: {exc}")
927944

928-
# Fallback: try fetching from provider
945+
# 2. Provider registry — respects DATA_PROVIDER_PRIORITY
929946
try:
930-
from quantcore.data.providers import get_data_provider
947+
from datetime import datetime, timedelta
948+
949+
settings = get_settings()
950+
registry = DataProviderRegistry.from_settings(settings)
931951

932-
provider = get_data_provider()
933-
df = provider.fetch_ohlcv(symbol, interval="daily")
952+
end_dt = datetime.strptime(end_date, "%Y-%m-%d") if end_date else datetime.now()
953+
start_dt = (
954+
datetime.strptime(start_date, "%Y-%m-%d")
955+
if start_date
956+
else end_dt - timedelta(days=365 * 6)
957+
)
958+
959+
df = registry.fetch_ohlcv(symbol, AssetClass.EQUITY, Timeframe.D1, start_dt, end_dt)
934960
if df is not None and not df.empty:
935-
if start_date:
936-
df = df[df.index >= start_date]
937-
if end_date:
938-
df = df[df.index <= end_date]
939961
return df
940-
except Exception:
941-
pass
962+
except Exception as exc:
963+
logger.warning(f"Provider registry fetch failed for {symbol}: {exc}")
942964

943965
return None
944966

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"name": "data_ingestion_ic",
3+
"role": "Data Ingestion Specialist for {symbol}",
4+
"goal": "Fetch and validate market data using QuantCore MCP tools. Return raw OHLCV data, data quality metrics, and coverage info.",
5+
"backstory": "You are the data pipeline specialist. Your job is to fetch market data, check for gaps/staleness, and report raw data quality metrics. You use fetch_market_data and load_market_data tools. Return RAW data summaries - no interpretation, just facts.",
6+
"settings": {
7+
"llm": "openai/gpt-4o",
8+
"reasoning": true,
9+
"verbose": true,
10+
"allow_delegation": false,
11+
"max_iter": 20,
12+
"respect_context_window": true
13+
},
14+
"tools": ["fetch_market_data", "load_market_data", "list_stored_symbols", "get_symbol_snapshot"],
15+
"pod": "data"
16+
}
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# Data Ingestion IC - Detailed Prompt
2+
3+
## Role
4+
You are the **Data Ingestion Specialist** - the foundation of the trading system's data pipeline.
5+
6+
## Mission
7+
Fetch, validate, and report market data quality. You are the first line of defense against bad data reaching the analysis pipeline.
8+
9+
## Capabilities
10+
11+
### Tools Available
12+
- `fetch_market_data` - Fetch OHLCV data from external sources (Alpha Vantage, etc.)
13+
- `load_market_data` - Load stored market data from local cache
14+
- `list_stored_symbols` - Check what symbols are available in local storage
15+
- `get_symbol_snapshot` - Get current price and basic stats for a symbol
16+
17+
## Detailed Instructions
18+
19+
### Step 1: Data Availability Check
20+
Before fetching new data, always check what's already available:
21+
```
22+
1. Use list_stored_symbols to see cached data
23+
2. Check if symbol has recent data (within last trading day)
24+
3. Only fetch new data if cache is stale or missing
25+
```
26+
27+
### Step 2: Data Fetching
28+
When fetching market data:
29+
```
30+
1. Request sufficient history (minimum 200 bars for indicator calculations)
31+
2. Verify the response contains expected fields (open, high, low, close, volume)
32+
3. Note any gaps in the data (weekends expected, mid-week gaps are problems)
33+
```
34+
35+
### Step 3: Data Quality Assessment
36+
For every data fetch, compute and report:
37+
```
38+
- Total bars retrieved
39+
- Date range coverage (start date to end date)
40+
- Missing data points (count and percentage)
41+
- Volume anomalies (zero volume days)
42+
- Price anomalies (gaps > 5%, unchanged OHLC)
43+
- Data freshness (time since last bar)
44+
```
45+
46+
### Step 4: Output Format
47+
Return a structured data quality report:
48+
```
49+
SYMBOL: {symbol}
50+
DATE RANGE: {start_date} to {end_date}
51+
TOTAL BARS: {count}
52+
COVERAGE: {coverage_pct}%
53+
GAPS: {gap_count} ({gap_pct}%)
54+
LAST UPDATE: {last_bar_timestamp}
55+
QUALITY SCORE: {quality_score}/100
56+
57+
ISSUES (if any):
58+
- {issue_1}
59+
- {issue_2}
60+
61+
RAW METRICS:
62+
- Average Daily Volume: {avg_volume}
63+
- Price Range: ${low_price} - ${high_price}
64+
- Current Price: ${current_price}
65+
```
66+
67+
## Critical Rules
68+
69+
1. **NO INTERPRETATION** - Report facts only. Don't say "data looks good" - say "100% coverage, 0 gaps"
70+
2. **NO ASSUMPTIONS** - If data is missing, report it. Don't fill gaps with guesses.
71+
3. **FAIL LOUDLY** - If you cannot fetch data, report the exact error. No silent failures.
72+
4. **RAW OUTPUT** - Your job is to provide raw data status. Let Pod Managers interpret.
73+
74+
## Example Scenarios
75+
76+
### Scenario 1: Clean Data
77+
```
78+
SYMBOL: SPY
79+
DATE RANGE: 2024-01-01 to 2024-12-10
80+
TOTAL BARS: 236
81+
COVERAGE: 100%
82+
GAPS: 0 (0%)
83+
LAST UPDATE: 2024-12-10 16:00:00 EST
84+
QUALITY SCORE: 100/100
85+
86+
RAW METRICS:
87+
- Average Daily Volume: 78,234,521
88+
- Price Range: $460.12 - $608.35
89+
- Current Price: $605.78
90+
```
91+
92+
### Scenario 2: Data Issues
93+
```
94+
SYMBOL: ILLIQUID_STOCK
95+
DATE RANGE: 2024-01-01 to 2024-12-10
96+
TOTAL BARS: 198
97+
COVERAGE: 84%
98+
GAPS: 38 (16%)
99+
LAST UPDATE: 2024-12-09 16:00:00 EST (STALE)
100+
QUALITY SCORE: 62/100
101+
102+
ISSUES:
103+
- 38 missing trading days in date range
104+
- Data is 1 day stale (missing Dec 10)
105+
- 12 zero-volume days detected
106+
107+
RAW METRICS:
108+
- Average Daily Volume: 45,231
109+
- Price Range: $12.45 - $18.92
110+
- Current Price: $15.67
111+
```
112+
113+
## Integration Notes
114+
115+
This IC feeds into the **Data Pod Manager** who will:
116+
- Decide if data quality is sufficient for analysis
117+
- Request re-fetch if data is stale
118+
- Flag symbols with poor data to upstream managers

packages/quant_pod/tools/mcp_bridge.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,20 @@ class LoadMarketDataInput(BaseModel):
263263
end_date: str | None = Field(None, description="End date (YYYY-MM-DD)")
264264

265265

266+
class EmptyInput(BaseModel):
267+
"""Input schema for tools that take no parameters."""
268+
269+
pass
270+
271+
272+
class MarketRegimeSnapshotInput(BaseModel):
273+
"""Input for get_market_regime_snapshot tool."""
274+
275+
end_date: str | None = Field(
276+
None, description="End date filter (YYYY-MM-DD) for historical simulation"
277+
)
278+
279+
266280
class SymbolInput(BaseModel):
267281
"""Input for symbol-based tools."""
268282

@@ -841,6 +855,7 @@ class ListStoredSymbolsTool(BaseTool):
841855
description: str = (
842856
"List all symbols stored in the local database with their available timeframes."
843857
)
858+
args_schema: type[BaseModel] = EmptyInput
844859

845860
def _run(self) -> str:
846861
async def _exec():
@@ -928,6 +943,7 @@ class ListAvailableIndicatorsTool(BaseTool):
928943

929944
name: str = "list_available_indicators"
930945
description: str = "List all available technical indicators with their descriptions."
946+
args_schema: type[BaseModel] = EmptyInput
931947

932948
def _run(self) -> str:
933949
async def _exec():
@@ -1570,6 +1586,7 @@ class GetMarketRegimeSnapshotTool(BaseTool):
15701586
description: str = (
15711587
"Get current market regime classification (trending, ranging, volatile) with confidence."
15721588
)
1589+
args_schema: type[BaseModel] = MarketRegimeSnapshotInput
15731590

15741591
def _run(self, end_date: str | None = None) -> str:
15751592
async def _exec():

packages/quantcore/mcp/server.py

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ class ServerContext:
4747
settings: Settings
4848
data_store: Any = None
4949
feature_factory: Any = None
50+
data_registry: Any = None
5051

5152

5253
@asynccontextmanager
@@ -83,6 +84,11 @@ async def lifespan(server: FastMCP):
8384
include_technical_indicators=True,
8485
)
8586

87+
# Provider registry — initialized once, used by fetch_market_data
88+
from quantcore.data.registry import DataProviderRegistry
89+
90+
ctx.data_registry = DataProviderRegistry.from_settings(settings)
91+
8692
server.context = ctx
8793
logger.info("QuantCore MCP Server initialized")
8894

@@ -184,34 +190,45 @@ async def fetch_market_data(
184190
outputsize: str = "compact",
185191
) -> dict[str, Any]:
186192
"""
187-
Fetch OHLCV market data from Alpha Vantage API.
193+
Fetch OHLCV market data using the configured provider chain.
194+
195+
Uses DATA_PROVIDER_PRIORITY (default: alpaca,polygon,alpha_vantage) with
196+
automatic fallback. Stores fetched data in DuckDB for future load_market_data calls.
188197
189198
Args:
190199
symbol: Stock/ETF symbol (e.g., "SPY", "AAPL", "QQQ")
191200
timeframe: Data frequency - "daily", "1h", "4h", "weekly"
192-
outputsize: "compact" (100 bars) or "full" (20+ years)
201+
outputsize: "compact" (~6 months) or "full" (5+ years)
193202
194203
Returns:
195204
Dictionary with OHLCV data and metadata
196205
"""
197-
from quantcore.data.fetcher import AlphaVantageClient
206+
from datetime import datetime, timedelta
207+
208+
from quantcore.data.base import AssetClass
198209

199-
client = AlphaVantageClient()
210+
ctx: ServerContext = mcp.context
211+
registry = ctx.data_registry
200212
tf = _parse_timeframe(timeframe)
201213

214+
# Convert outputsize to date range — providers like Alpaca require
215+
# explicit start/end rather than Alpha Vantage's compact/full enum
216+
end_date = datetime.now()
217+
start_date = end_date - timedelta(days=365 * 6 if outputsize == "full" else 180)
218+
202219
try:
203-
if tf == Timeframe.D1:
204-
df = client.fetch_daily(symbol, outputsize=outputsize)
205-
elif tf == Timeframe.W1:
206-
df = client.fetch_weekly(symbol)
207-
else:
208-
# Intraday
209-
interval = "60min" if tf == Timeframe.H1 else "60min"
210-
df = client.fetch_intraday(symbol, interval=interval, outputsize=outputsize)
220+
df = registry.fetch_ohlcv(symbol, AssetClass.EQUITY, tf, start_date, end_date)
211221

212222
if df.empty:
213223
return {"error": f"No data returned for {symbol}", "symbol": symbol}
214224

225+
# Persist to local DuckDB so load_market_data works without re-fetching
226+
if ctx.data_store:
227+
try:
228+
ctx.data_store.save(symbol, tf, df)
229+
except Exception as store_exc:
230+
logger.warning(f"Data fetched but failed to persist locally: {store_exc}")
231+
215232
return {
216233
"symbol": symbol,
217234
"timeframe": tf.value,

tests/quant_pod/test_trading_crew.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,12 +123,12 @@ class TestPromptLoader:
123123

124124
def test_prompts_directory_exists(self):
125125
"""Test that prompts directory exists."""
126-
prompts_dir = Path(__file__).parent.parent / "src/quant_pod/prompts"
126+
prompts_dir = Path(__file__).parent.parent.parent / "packages/quant_pod/prompts"
127127
assert prompts_dir.exists()
128128

129129
def test_ic_json_files_exist(self):
130130
"""Test that IC JSON config files exist."""
131-
prompts_dir = Path(__file__).parent.parent / "src/quant_pod/prompts"
131+
prompts_dir = Path(__file__).parent.parent.parent / "packages/quant_pod/prompts"
132132
ics_dir = prompts_dir / "ics"
133133

134134
# Check some IC files exist
@@ -138,20 +138,20 @@ def test_ic_json_files_exist(self):
138138

139139
def test_pod_manager_json_files_exist(self):
140140
"""Test that pod manager JSON config files exist."""
141-
prompts_dir = Path(__file__).parent.parent / "src/quant_pod/prompts"
141+
prompts_dir = Path(__file__).parent.parent.parent / "packages/quant_pod/prompts"
142142
pm_dir = prompts_dir / "pod_managers"
143143

144144
assert (pm_dir / "data_pod_manager.json").exists()
145145
assert (pm_dir / "technicals_pod_manager.json").exists()
146146

147147
def test_assistant_json_exists(self):
148148
"""Test that assistant JSON config exists."""
149-
prompts_dir = Path(__file__).parent.parent / "src/quant_pod/prompts"
149+
prompts_dir = Path(__file__).parent.parent.parent / "packages/quant_pod/prompts"
150150
assert (prompts_dir / "assistant/trading_assistant.json").exists()
151151

152152
def test_supertrader_json_exists(self):
153153
"""Test that supertrader JSON config exists."""
154-
prompts_dir = Path(__file__).parent.parent / "src/quant_pod/prompts"
154+
prompts_dir = Path(__file__).parent.parent.parent / "packages/quant_pod/prompts"
155155
assert (prompts_dir / "supertrader/super_trader.json").exists()
156156

157157
def test_load_agent_config(self):

0 commit comments

Comments
 (0)