diff --git a/README.md b/README.md index 4b9c7b8..ebe3935 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ -[Dashboard](https://app.hex.tech/1644b22a-abe5-4113-9d5f-3ad05e4a8de7/app/Numinous-031erYRYSssIrH3W3KcyHg/latest) • [Website](https://numinouslabs.io/) • [Twitter](https://x.com/numinous_ai) • +[Discord](https://discord.gg/qKPeYPc3) • [Dashboard](https://app.hex.tech/1644b22a-abe5-4113-9d5f-3ad05e4a8de7/app/Numinous-031erYRYSssIrH3W3KcyHg/latest) • [Website](https://numinouslabs.io/) • [Twitter](https://x.com/numinous_ai) • [Network](https://taostats.io/subnets/6/chart) --- diff --git a/docs/miner-setup.md b/docs/miner-setup.md index 1da8f1b..347891f 100644 --- a/docs/miner-setup.md +++ b/docs/miner-setup.md @@ -465,4 +465,4 @@ numi fetch-logs # Fetch validator execution logs **Next Steps:** 1. Read [subnet-rules.md](./subnet-rules.md) for competition rules and constraints 2. Review [architecture.md](./architecture.md) for system details -3. Check the example [agents](https://github.com/numinouslabs/numinous/blob/main/neurons/miner/agents/example.py) +3. Check example agents in `neurons/miner/agents/` diff --git a/neurons/validator/alembic/versions/M_2025_11_26__16_53_09_add_agent_runs_table.py b/neurons/validator/alembic/versions/M_2025_11_26__16_53_09_add_agent_runs_table.py new file mode 100644 index 0000000..9ed53cf --- /dev/null +++ b/neurons/validator/alembic/versions/M_2025_11_26__16_53_09_add_agent_runs_table.py @@ -0,0 +1,84 @@ +"""Add agent_runs table + +Revision ID: 40606aaa49f9 +Revises: d679a148c4f2 +Create Date: 2025-11-26 16:53:09.000000 + +""" + +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "40606aaa49f9" +down_revision: Union[str, None] = "d679a148c4f2" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.execute( + """ + CREATE TABLE IF NOT EXISTS agent_runs ( + run_id TEXT PRIMARY KEY, + unique_event_id TEXT NOT NULL, + agent_version_id TEXT NOT NULL, + miner_uid INTEGER NOT NULL, + miner_hotkey TEXT NOT NULL, + status TEXT NOT NULL, + exported INTEGER DEFAULT 0, + is_final INTEGER DEFAULT 1, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (unique_event_id) REFERENCES events(unique_event_id), + FOREIGN KEY (agent_version_id) REFERENCES miner_agents(version_id) + ) + """ + ) + + op.execute( + """ + CREATE INDEX idx_agent_runs_event ON agent_runs(unique_event_id) + """ + ) + + op.execute( + """ + CREATE INDEX idx_agent_runs_agent ON agent_runs(agent_version_id) + """ + ) + + op.execute( + """ + CREATE INDEX idx_agent_runs_miner ON agent_runs(miner_uid, miner_hotkey) + """ + ) + + op.execute( + """ + CREATE INDEX idx_agent_runs_status ON agent_runs(status) + """ + ) + + op.execute( + """ + CREATE INDEX idx_agent_runs_exported ON agent_runs(exported) WHERE exported = 0 + """ + ) + + op.execute( + """ + CREATE INDEX idx_agent_runs_is_final ON agent_runs(is_final) + """ + ) + + op.execute( + """ + CREATE INDEX idx_agent_runs_created_at ON agent_runs(created_at) + """ + ) + + +def downgrade() -> None: + pass diff --git a/neurons/validator/db/operations.py b/neurons/validator/db/operations.py index e96ae07..ce73964 100644 --- a/neurons/validator/db/operations.py +++ b/neurons/validator/db/operations.py @@ -361,7 +361,7 @@ async def get_miners_count(self) -> int: return row[0] - async def get_predictions_to_export(self, current_interval_minutes: int, batch_size: int): + async def get_predictions_to_export(self, batch_size: int): return await self.__db_client.many( """ SELECT @@ -383,13 +383,12 @@ async def get_predictions_to_export(self, current_interval_minutes: int, batch_s events e ON e.unique_event_id = p.unique_event_id WHERE p.exported = ? - AND p.interval_start_minutes < ? ORDER BY p.ROWID ASC LIMIT ? """, - [PredictionExportedStatus.NOT_EXPORTED, current_interval_minutes, batch_size], + [PredictionExportedStatus.NOT_EXPORTED, batch_size], ) async def mark_predictions_as_exported(self, ids: list[str]): diff --git a/neurons/validator/db/tests/test_db_operations_part_1.py b/neurons/validator/db/tests/test_db_operations_part_1.py index 357272e..8b3121d 100644 --- a/neurons/validator/db/tests/test_db_operations_part_1.py +++ b/neurons/validator/db/tests/test_db_operations_part_1.py @@ -1082,20 +1082,15 @@ async def test_get_predictions_to_export( [PredictionExportedStatus.EXPORTED, "unique_event_id_2"], ) - current_interval_minutes = 11 - - result = await db_operations.get_predictions_to_export( - current_interval_minutes=current_interval_minutes, batch_size=1 - ) + result = await db_operations.get_predictions_to_export(batch_size=1) assert len(result) == 1 assert result[0][1] == "unique_event_id_1" - result = await db_operations.get_predictions_to_export( - current_interval_minutes=current_interval_minutes, batch_size=20 - ) + result = await db_operations.get_predictions_to_export(batch_size=20) - assert len(result) == 2 + # (unique_event_id_2 is already exported) + assert len(result) == 3 async def test_mark_predictions_as_exported( self, db_client: DatabaseClient, db_operations: DatabaseOperations diff --git a/neurons/validator/models/agent_runs.py b/neurons/validator/models/agent_runs.py new file mode 100644 index 0000000..9312b8f --- /dev/null +++ b/neurons/validator/models/agent_runs.py @@ -0,0 +1,57 @@ +from datetime import datetime +from enum import Enum, IntEnum +from typing import Any, Optional + +from pydantic import BaseModel, field_validator + + +class AgentRunStatus(str, Enum): + SUCCESS = "success" + INTERNAL_AGENT_ERROR = "internal_agent_error" + INVALID_SANDBOX_OUTPUT = "invalid_sandbox_output" + SANDBOX_TIMEOUT = "sandbox_timeout" + + +class AgentRunExportedStatus(IntEnum): + NOT_EXPORTED = 0 + EXPORTED = 1 + + +class IsFinalStatus(IntEnum): + NOT_FINAL = 0 + IS_FINAL = 1 + + +class AgentRunsModel(BaseModel): + run_id: str + unique_event_id: str + agent_version_id: str + miner_uid: int + miner_hotkey: str + status: AgentRunStatus + exported: Optional[bool] = False + is_final: Optional[bool] = True + created_at: Optional[datetime] = None + updated_at: Optional[datetime] = None + model_config = {"arbitrary_types_allowed": True} + + @property + def primary_key(self): + return ["run_id"] + + @field_validator("exported", mode="before") + def parse_exported_as_bool(cls, v: Any) -> bool: + # If the DB returns an integer, convert it to boolean + if isinstance(v, int): + return bool(v) + return v + + @field_validator("is_final", mode="before") + def parse_is_final_as_bool(cls, v: Any) -> bool: + # If the DB returns an integer, convert it to boolean + if isinstance(v, int): + return bool(v) + return v + + +RUN_FIELDS = AgentRunsModel.model_fields.keys() diff --git a/neurons/validator/models/desearch.py b/neurons/validator/models/desearch.py index 7631fc7..1a8e674 100644 --- a/neurons/validator/models/desearch.py +++ b/neurons/validator/models/desearch.py @@ -101,12 +101,12 @@ class DesearchEndpoint(StrEnum): # Cost per 100 searches DESEARCH_PRICING: typing.Dict[DesearchEndpoint, typing.Any] = { DesearchEndpoint.AI_SEARCH: { - ModelEnum.NOVA: 0.6, + ModelEnum.NOVA: 0.4, ModelEnum.ORBIT: 2.2, ModelEnum.HORIZON: 2.6, }, DesearchEndpoint.AI_WEB_SEARCH: { - ModelEnum.NOVA: 0.6, + ModelEnum.NOVA: 0.4, ModelEnum.ORBIT: 1.7, ModelEnum.HORIZON: 2.1, }, diff --git a/neurons/validator/models/tests/test_agent_runs_models.py b/neurons/validator/models/tests/test_agent_runs_models.py new file mode 100644 index 0000000..59f2428 --- /dev/null +++ b/neurons/validator/models/tests/test_agent_runs_models.py @@ -0,0 +1,250 @@ +from datetime import datetime + +import pytest +from pydantic import ValidationError + +from neurons.validator.models.agent_runs import ( + AgentRunExportedStatus, + AgentRunsModel, + AgentRunStatus, + IsFinalStatus, +) + + +class TestAgentRunsModel: + def test_create_minimal(self): + # Minimal required fields + model = AgentRunsModel( + run_id="run_123", + unique_event_id="event_456", + agent_version_id="agent_v1", + miner_uid=42, + miner_hotkey="5GTest...", + status=AgentRunStatus.SUCCESS, + ) + + assert model.run_id == "run_123" + assert model.unique_event_id == "event_456" + assert model.agent_version_id == "agent_v1" + assert model.miner_uid == 42 + assert model.miner_hotkey == "5GTest..." + assert model.status == AgentRunStatus.SUCCESS + + # Defaults + assert model.exported is False + assert model.is_final is True + assert model.created_at is None + assert model.updated_at is None + + def test_create_full_success(self): + created = datetime(2024, 1, 1, 12, 0, 0) + updated = datetime(2024, 1, 1, 12, 30, 0) + + model = AgentRunsModel( + run_id="run_abc_123", + unique_event_id="event_xyz_456", + agent_version_id="agent_version_1", + miner_uid=99, + miner_hotkey="hotkey_xyz", + status=AgentRunStatus.SUCCESS, + exported=True, + is_final=True, + created_at=created, + updated_at=updated, + ) + + assert model.run_id == "run_abc_123" + assert model.unique_event_id == "event_xyz_456" + assert model.agent_version_id == "agent_version_1" + assert model.miner_uid == 99 + assert model.miner_hotkey == "hotkey_xyz" + assert model.status == AgentRunStatus.SUCCESS + assert model.exported is True + assert model.is_final is True + assert model.created_at == created + assert model.updated_at == updated + + def test_create_with_error_status(self): + model = AgentRunsModel( + run_id="run_error_1", + unique_event_id="event_1", + agent_version_id="agent_v1", + miner_uid=10, + miner_hotkey="hotkey_10", + status=AgentRunStatus.INTERNAL_AGENT_ERROR, + ) + + assert model.status == AgentRunStatus.INTERNAL_AGENT_ERROR + + def test_exported_int_to_bool(self): + # exported as integer should convert to bool + model = AgentRunsModel( + run_id="run_1", + unique_event_id="event_1", + agent_version_id="agent_v1", + miner_uid=1, + miner_hotkey="hotkey_1", + status=AgentRunStatus.SUCCESS, + exported=1, + ) + assert model.exported is True + + model2 = AgentRunsModel( + run_id="run_2", + unique_event_id="event_1", + agent_version_id="agent_v1", + miner_uid=1, + miner_hotkey="hotkey_1", + status=AgentRunStatus.SUCCESS, + exported=0, + ) + assert model2.exported is False + + def test_exported_bool_passthrough(self): + # exported as boolean should pass through + model = AgentRunsModel( + run_id="run_1", + unique_event_id="event_1", + agent_version_id="agent_v1", + miner_uid=1, + miner_hotkey="hotkey_1", + status=AgentRunStatus.SUCCESS, + exported=True, + ) + assert model.exported is True + + def test_is_final_int_to_bool(self): + # is_final as integer should convert to bool + model = AgentRunsModel( + run_id="run_1", + unique_event_id="event_1", + agent_version_id="agent_v1", + miner_uid=1, + miner_hotkey="hotkey_1", + status=AgentRunStatus.SUCCESS, + is_final=1, + ) + assert model.is_final is True + + model2 = AgentRunsModel( + run_id="run_2", + unique_event_id="event_1", + agent_version_id="agent_v1", + miner_uid=1, + miner_hotkey="hotkey_1", + status=AgentRunStatus.SANDBOX_TIMEOUT, + is_final=0, + ) + assert model2.is_final is False + + def test_all_status_types(self): + # Test all status enum values + statuses_to_test = [ + AgentRunStatus.SUCCESS, + AgentRunStatus.INTERNAL_AGENT_ERROR, + AgentRunStatus.INVALID_SANDBOX_OUTPUT, + AgentRunStatus.SANDBOX_TIMEOUT, + ] + + for status in statuses_to_test: + model = AgentRunsModel( + run_id=f"run_{status.value}", + unique_event_id="event_1", + agent_version_id="agent_v1", + miner_uid=1, + miner_hotkey="hotkey_1", + status=status, + ) + assert model.status == status + + def test_invalid_status_type(self): + # status must be AgentRunStatus enum + with pytest.raises(ValidationError): + AgentRunsModel( + run_id="run_1", + unique_event_id="event_1", + agent_version_id="agent_v1", + miner_uid=1, + miner_hotkey="hotkey_1", + status="invalid_status", + ) + + def test_invalid_miner_uid_type(self): + # miner_uid must be integer + with pytest.raises(ValidationError): + AgentRunsModel( + run_id="run_1", + unique_event_id="event_1", + agent_version_id="agent_v1", + miner_uid="not_an_int", + miner_hotkey="hotkey_1", + status=AgentRunStatus.SUCCESS, + ) + + def test_primary_key_property(self): + model = AgentRunsModel( + run_id="run_123", + unique_event_id="event_456", + agent_version_id="agent_v1", + miner_uid=42, + miner_hotkey="5GTest...", + status=AgentRunStatus.SUCCESS, + ) + assert model.primary_key == ["run_id"] + + def test_retry_scenario(self): + # Simulate retry scenario: first two attempts not final, third is final + run1 = AgentRunsModel( + run_id="run_attempt_1", + unique_event_id="event_1", + agent_version_id="agent_v1", + miner_uid=42, + miner_hotkey="hotkey_42", + status=AgentRunStatus.SANDBOX_TIMEOUT, + is_final=False, + ) + assert run1.is_final is False + assert run1.status == AgentRunStatus.SANDBOX_TIMEOUT + + run2 = AgentRunsModel( + run_id="run_attempt_2", + unique_event_id="event_1", + agent_version_id="agent_v1", + miner_uid=42, + miner_hotkey="hotkey_42", + status=AgentRunStatus.SANDBOX_TIMEOUT, + is_final=False, + ) + assert run2.is_final is False + + run3 = AgentRunsModel( + run_id="run_attempt_3", + unique_event_id="event_1", + agent_version_id="agent_v1", + miner_uid=42, + miner_hotkey="hotkey_42", + status=AgentRunStatus.SUCCESS, + is_final=True, + ) + assert run3.is_final is True + assert run3.status == AgentRunStatus.SUCCESS + + +class TestAgentRunStatus: + def test_status_enum_values(self): + assert AgentRunStatus.SUCCESS.value == "success" + assert AgentRunStatus.INTERNAL_AGENT_ERROR.value == "internal_agent_error" + assert AgentRunStatus.INVALID_SANDBOX_OUTPUT.value == "invalid_sandbox_output" + assert AgentRunStatus.SANDBOX_TIMEOUT.value == "sandbox_timeout" + + +class TestAgentRunExportedStatus: + def test_exported_status_enum_values(self): + assert AgentRunExportedStatus.NOT_EXPORTED == 0 + assert AgentRunExportedStatus.EXPORTED == 1 + + +class TestIsFinalStatus: + def test_is_final_status_enum_values(self): + assert IsFinalStatus.NOT_FINAL == 0 + assert IsFinalStatus.IS_FINAL == 1 diff --git a/neurons/validator/sandbox/manager.py b/neurons/validator/sandbox/manager.py index 0d6c9cf..d91e0cb 100644 --- a/neurons/validator/sandbox/manager.py +++ b/neurons/validator/sandbox/manager.py @@ -415,6 +415,20 @@ def finish_with_error(error_msg: str, result: SandboxResult) -> None: sandbox.container.wait(timeout=sandbox.timeout) except (TimeoutError, requests.exceptions.ReadTimeout): sandbox.container.kill() + + try: + result.logs = sandbox.container.logs(stderr=False).decode("utf-8") + self.logger.debug( + "Captured partial logs on timeout", + extra={"sandbox_id": sandbox_id, "lines": len(result.logs.splitlines())}, + ) + except Exception as e: + self.logger.warning( + "Failed to capture logs on timeout", + extra={"sandbox_id": sandbox_id, "error": str(e)}, + ) + result.logs = f"Failed to capture partial logs on timeout: {e}" + finish_with_error("Timeout exceeded", result) return diff --git a/neurons/validator/tasks/export_predictions.py b/neurons/validator/tasks/export_predictions.py index 95b8e73..b10225b 100644 --- a/neurons/validator/tasks/export_predictions.py +++ b/neurons/validator/tasks/export_predictions.py @@ -2,10 +2,7 @@ from neurons.validator.models.numinous_client import MinerPrediction, PostPredictionsRequestBody from neurons.validator.numinous_client.client import NuminousClient from neurons.validator.scheduler.task import AbstractTask -from neurons.validator.utils.common.interval import ( - get_interval_iso_datetime, - get_interval_start_minutes, -) +from neurons.validator.utils.common.interval import get_interval_iso_datetime from neurons.validator.utils.logger.logger import NuminousLogger @@ -73,11 +70,9 @@ def interval_seconds(self): async def run(self): while True: - current_interval_minutes = get_interval_start_minutes() - # Get predictions to export predictions = await self.db_operations.get_predictions_to_export( - current_interval_minutes=current_interval_minutes, batch_size=self.batch_size + batch_size=self.batch_size ) if len(predictions) == 0: diff --git a/neurons/validator/tasks/run_agents.py b/neurons/validator/tasks/run_agents.py index 2a3ba3e..5dc2d1e 100644 --- a/neurons/validator/tasks/run_agents.py +++ b/neurons/validator/tasks/run_agents.py @@ -263,6 +263,17 @@ async def store_prediction( extra={"event_id": event_id, "agent_version_id": agent.version_id, "error": str(e)}, ) + def _build_error_logs(self, logs: str, error_msg: str, traceback: Optional[str] = None) -> str: + if "Timeout" in error_msg: + logs += f"\n\n{'='*50}\nTIMEOUT\n{'='*50}\n" + logs += "Execution exceeded timeout limit\n" + else: + logs += f"\n\n{'='*50}\nERROR DETAILS\n{'='*50}\n" + logs += f"Error: {error_msg}\n" + if traceback: + logs += f"\nTraceback:\n{traceback}" + return logs + async def post_agent_logs(self, run_id: str, logs: str) -> None: try: original_length = len(logs) @@ -399,6 +410,17 @@ async def execute_agent_for_event( result = await self.run_sandbox(agent_code, event_data, run_id) + if result is None: + logs = "Sandbox timeout - no logs" + else: + logs = result.get("logs", "No logs available") + if result.get("status") == "error": + logs = self._build_error_logs( + logs, result.get("error", "Unknown error"), result.get("traceback") + ) + + await self.post_agent_logs(run_id, logs) + if result is None: self.logger.error( "Sandbox execution failed or timed out", @@ -414,7 +436,6 @@ async def execute_agent_for_event( if prediction_value is None: return - await self.post_agent_logs(run_id, result.get("logs", "No logs available")) await self.store_prediction( event_id, agent, prediction_value, run_id, interval_start_minutes ) diff --git a/neurons/validator/tasks/tests/test_export_predictions.py b/neurons/validator/tasks/tests/test_export_predictions.py index 5246709..9cd43da 100644 --- a/neurons/validator/tasks/tests/test_export_predictions.py +++ b/neurons/validator/tasks/tests/test_export_predictions.py @@ -229,11 +229,12 @@ async def test_run( await export_predictions_task.run() # Assert - assert export_predictions_task.api_client.post_predictions.call_count == 2 + assert export_predictions_task.api_client.post_predictions.call_count == 3 mock_calls = export_predictions_task.api_client.post_predictions.mock_calls first_call = mock_calls[0] second_call = mock_calls[1] + third_call = mock_calls[2] # fetching it early to get submitted = CURRENT_TIMESTAMP from the database result = await db_client.many( @@ -300,11 +301,40 @@ async def test_run( ) }, ) + assert third_call == ( + "__call__", + { + "body": PostPredictionsRequestBody( + submissions=[ + { + "unique_event_id": "unique_event_id_3", + "provider_type": "market_3", + "prediction": 1.0, + "interval_start_minutes": current_interval_minutes, + "interval_agg_prediction": 1.0, + "interval_agg_count": 1, + "interval_datetime": get_interval_iso_datetime( + current_interval_minutes + ), + "miner_hotkey": "neuronHotkey_3", + "miner_uid": 3, + "validator_hotkey": "validator_hotkey_test", + "validator_uid": 0, + "title": None, + "outcome": None, + "submitted_at": result[2][1], + "run_id": "b23e4567-e89b-12d3-a456-42661417400a", + "version_id": "c23e4567-e89b-12d3-a456-42661417400b", + } + ] + ) + }, + ) assert len(result) == 3 assert result[0][0] == PredictionExportedStatus.EXPORTED assert result[1][0] == PredictionExportedStatus.EXPORTED - assert result[2][0] == PredictionExportedStatus.NOT_EXPORTED + assert result[2][0] == PredictionExportedStatus.EXPORTED async def test_run_no_predictions(self, export_predictions_task: ExportPredictions): # Mock API client diff --git a/neurons/validator/tasks/tests/test_run_agents.py b/neurons/validator/tasks/tests/test_run_agents.py index a7956fc..e748ac8 100644 --- a/neurons/validator/tasks/tests/test_run_agents.py +++ b/neurons/validator/tasks/tests/test_run_agents.py @@ -997,3 +997,172 @@ async def test_post_agent_logs_truncates_long_logs( assert str(call_args.run_id) == run_id assert "LOG TRUNCATED" in call_args.log_content assert len(call_args.log_content) < 30000 + + +@pytest.mark.asyncio +class TestRunAgentsErrorLogging: + async def test_logs_exported_on_agent_execution_error( + self, + mock_db_operations, + mock_sandbox_manager, + mock_metagraph, + mock_api_client, + mock_logger, + sample_agent, + sample_event_tuple, + ): + task = RunAgents( + interval_seconds=600.0, + db_operations=mock_db_operations, + sandbox_manager=mock_sandbox_manager, + metagraph=mock_metagraph, + api_client=mock_api_client, + logger=mock_logger, + timeout_seconds=120, + ) + + task.load_agent_code = AsyncMock(return_value="def agent_main(): pass") + error_result = { + "status": "error", + "error": "agent_main() must return a dict, got NoneType.", + "traceback": "Traceback (most recent call last):\n File ...\nException: ...", + "logs": "[AGENT_RUNNER] Starting\n[AGENT_RUNNER] Error occurred", + } + task.run_sandbox = AsyncMock(return_value=error_result) + + await task.execute_agent_for_event( + event_id="event_123", + agent=sample_agent, + event_tuple=sample_event_tuple, + interval_start_minutes=1000, + ) + + mock_api_client.post_agent_logs.assert_called_once() + body = mock_api_client.post_agent_logs.call_args[0][0] + logs = body.log_content + + assert "[AGENT_RUNNER] Starting" in logs + assert "ERROR DETAILS" in logs + assert "agent_main() must return a dict" in logs + assert "Traceback" in logs + + async def test_logs_exported_on_timeout( + self, + mock_db_operations, + mock_sandbox_manager, + mock_metagraph, + mock_api_client, + mock_logger, + sample_agent, + sample_event_tuple, + ): + task = RunAgents( + interval_seconds=600.0, + db_operations=mock_db_operations, + sandbox_manager=mock_sandbox_manager, + metagraph=mock_metagraph, + api_client=mock_api_client, + logger=mock_logger, + timeout_seconds=120, + ) + + task.load_agent_code = AsyncMock(return_value="def agent_main(): pass") + timeout_result = { + "status": "error", + "error": "Timeout exceeded", + "logs": "[AGENT_RUNNER] Starting\n[AGENT_RUNNER] Processing...\n", + } + task.run_sandbox = AsyncMock(return_value=timeout_result) + + await task.execute_agent_for_event( + event_id="event_123", + agent=sample_agent, + event_tuple=sample_event_tuple, + interval_start_minutes=1000, + ) + + mock_api_client.post_agent_logs.assert_called_once() + body = mock_api_client.post_agent_logs.call_args[0][0] + logs = body.log_content + + assert "[AGENT_RUNNER] Starting" in logs + assert "TIMEOUT" in logs + assert "Execution exceeded timeout limit" in logs + + async def test_logs_exported_on_validation_error( + self, + mock_db_operations, + mock_sandbox_manager, + mock_metagraph, + mock_api_client, + mock_logger, + sample_agent, + sample_event_tuple, + ): + task = RunAgents( + interval_seconds=600.0, + db_operations=mock_db_operations, + sandbox_manager=mock_sandbox_manager, + metagraph=mock_metagraph, + api_client=mock_api_client, + logger=mock_logger, + timeout_seconds=120, + ) + + task.load_agent_code = AsyncMock(return_value="def agent_main(): pass") + invalid_result = { + "status": "success", + "output": {"event_id": "event_123"}, + "logs": "[AGENT_RUNNER] Starting\n[AGENT_RUNNER] Completed", + } + task.run_sandbox = AsyncMock(return_value=invalid_result) + + await task.execute_agent_for_event( + event_id="event_123", + agent=sample_agent, + event_tuple=sample_event_tuple, + interval_start_minutes=1000, + ) + + mock_api_client.post_agent_logs.assert_called_once() + body = mock_api_client.post_agent_logs.call_args[0][0] + logs = body.log_content + + assert "[AGENT_RUNNER] Starting" in logs + assert "[AGENT_RUNNER] Completed" in logs + + async def test_logs_exported_on_result_none( + self, + mock_db_operations, + mock_sandbox_manager, + mock_metagraph, + mock_api_client, + mock_logger, + sample_agent, + sample_event_tuple, + ): + task = RunAgents( + interval_seconds=600.0, + db_operations=mock_db_operations, + sandbox_manager=mock_sandbox_manager, + metagraph=mock_metagraph, + api_client=mock_api_client, + logger=mock_logger, + timeout_seconds=120, + ) + + task.load_agent_code = AsyncMock(return_value="def agent_main(): pass") + task.run_sandbox = AsyncMock(return_value=None) + + await task.execute_agent_for_event( + event_id="event_123", + agent=sample_agent, + event_tuple=sample_event_tuple, + interval_start_minutes=1000, + ) + + mock_api_client.post_agent_logs.assert_called_once() + body = mock_api_client.post_agent_logs.call_args[0][0] + logs = body.log_content + + assert "Sandbox timeout - no logs" in logs