From 48ba00dc67c7a905729558d3caf877255989e5ea Mon Sep 17 00:00:00 2001 From: Gus Fraser Date: Sat, 2 Aug 2025 16:12:19 +0100 Subject: [PATCH 1/2] Added --parallel for faster test execution --- .gitignore | 2 + README.md | 112 ++++++++++++++++++++ replicantx/cli.py | 246 ++++++++++++++++++++++++++++++++++++------- replicantx/models.py | 1 + 4 files changed, 322 insertions(+), 39 deletions(-) diff --git a/.gitignore b/.gitignore index c4a1528..85fd0bd 100644 --- a/.gitignore +++ b/.gitignore @@ -209,5 +209,7 @@ helix_tests/ *.secret.yaml *_private.yaml *_secret.yaml +tests/*.md +tests/*.json reports/ \ No newline at end of file diff --git a/README.md b/README.md index e2bc491..2dccd52 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ - **Technical Debugging**: Debug mode (`--debug`) with detailed HTTP, validation, and AI processing logs - **Multiple Authentication**: Supabase email+password, custom JWT, or no-auth - **CLI Interface**: Easy-to-use command-line interface with `replicantx run` +- **Parallel Execution**: Run multiple test scenarios concurrently for faster execution - **Automatic .env Loading**: No manual environment variable sourcing required - **GitHub Actions Ready**: Built-in workflow for PR testing with Render preview URLs - **Rich Reporting**: Markdown and JSON reports with timing and assertion results @@ -394,6 +395,12 @@ replicantx run tests/*.yaml --debug # Combined monitoring and debugging replicantx run tests/*.yaml --debug --watch +# Run tests in parallel for faster execution +replicantx run tests/*.yaml --parallel + +# Run with limited concurrency to prevent API overload +replicantx run tests/*.yaml --parallel --max-concurrent 3 + # Validate test files without running replicantx validate tests/*.yaml --verbose ``` @@ -510,6 +517,111 @@ replicantx run tests/*.yaml --debug --ci replicantx run tests/*.yaml --debug --verbose --report performance.json ``` +### ⚔ Parallel Test Execution + +ReplicantX supports parallel execution of test scenarios for significantly faster test runs, especially when testing multiple scenarios against the same API. + +#### šŸš€ **Basic Parallel Execution** + +Run all scenarios in parallel (overrides individual scenario settings): + +```bash +# Run all tests in parallel +replicantx run tests/*.yaml --parallel + +# Run with limited concurrency to prevent API overload +replicantx run tests/*.yaml --parallel --max-concurrent 3 +``` + +#### šŸ“‹ **Per-Scenario Configuration** + +Control parallel execution at the scenario level: + +```yaml +# tests/parallel_scenario.yaml +name: "Parallel Test Scenario" +base_url: "https://api.example.com/chat" +auth: + provider: noop +level: basic +parallel: true # Enable parallel execution for this scenario +steps: + - user: "Hello, test message" + expect_contains: ["response"] +``` + +```yaml +# tests/sequential_scenario.yaml +name: "Sequential Test Scenario" +base_url: "https://api.example.com/chat" +auth: + provider: noop +level: basic +parallel: false # Run sequentially (default) +steps: + - user: "Hello, test message" + expect_contains: ["response"] +``` + +#### šŸ”„ **Execution Modes** + +**Automatic Detection:** +- If any scenario has `parallel: true`, all scenarios run in parallel +- If `--parallel` flag is used, all scenarios run in parallel (overrides individual settings) +- Otherwise, scenarios run sequentially + +**Mixed Execution:** +```bash +# Some scenarios parallel, some sequential - all run in parallel +replicantx run tests/parallel_*.yaml tests/sequential_*.yaml +``` + +#### āš™ļø **Concurrency Control** + +**Unlimited Concurrency (Default):** +```bash +replicantx run tests/*.yaml --parallel +``` + +**Limited Concurrency:** +```bash +# Limit to 3 concurrent scenarios +replicantx run tests/*.yaml --parallel --max-concurrent 3 + +# Limit to 1 (effectively sequential but with parallel infrastructure) +replicantx run tests/*.yaml --parallel --max-concurrent 1 +``` + +#### šŸ“Š **Performance Benefits** + +**Example: 10 scenarios, each taking 5 seconds** + +| Mode | Duration | Speed Improvement | +|------|----------|-------------------| +| Sequential | ~50 seconds | 1x | +| Parallel (unlimited) | ~5 seconds | 10x | +| Parallel (max 3) | ~17 seconds | 3x | + +#### āš ļø **Considerations** + +**API Rate Limits:** +- Use `--max-concurrent` to avoid overwhelming your API +- Monitor API response times during parallel execution +- Consider your API's rate limiting policies + +**Resource Usage:** +- Parallel execution uses more memory and network connections +- Monitor system resources during large parallel test runs + +**Test Dependencies:** +- Tests that depend on execution order should use `parallel: false` +- Consider using sequential execution for tests that modify shared state + +**Debugging:** +- Parallel execution may make debugging more complex +- Use `--verbose` to see detailed output from all scenarios +- Consider running problematic tests sequentially for debugging + ### Authentication Providers #### Supabase diff --git a/replicantx/cli.py b/replicantx/cli.py index abd1abb..cb6a76b 100644 --- a/replicantx/cli.py +++ b/replicantx/cli.py @@ -87,6 +87,12 @@ def run( max_retries: Optional[int] = typer.Option( None, "--max-retries", help="Override default max retries" ), + parallel: bool = typer.Option( + False, "--parallel", help="Run scenarios in parallel (overrides individual scenario settings)" + ), + max_concurrent: Optional[int] = typer.Option( + None, "--max-concurrent", help="Maximum number of scenarios to run concurrently (default: unlimited)" + ), ): """Run test scenarios from YAML files. @@ -99,6 +105,7 @@ def run( replicantx run tests/*.yaml --report report.md replicantx run tests/agent_test.yaml --watch --debug replicantx run tests/*.yaml --ci --report results.json + replicantx run tests/*.yaml --parallel --max-concurrent 3 """ console.print(f"šŸš€ ReplicantX {__version__} - Starting test execution") @@ -135,6 +142,8 @@ def run( watch=watch, timeout_override=timeout, max_retries_override=max_retries, + parallel=parallel, + max_concurrent=max_concurrent, )) @@ -147,6 +156,8 @@ async def run_scenarios_async( watch: bool = False, timeout_override: Optional[int] = None, max_retries_override: Optional[int] = None, + parallel: bool = False, + max_concurrent: Optional[int] = None, ): """Run scenarios asynchronously.""" # Initialize test suite report @@ -190,45 +201,34 @@ async def run_scenarios_async( console=console, ) as progress: - for file_path, config in scenarios: - task = progress.add_task(f"Running {Path(file_path).name}...", total=None) - - try: - # Create appropriate runner based on level - if config.level == TestLevel.BASIC: - runner = BasicScenarioRunner(config, debug=debug, watch=watch) - elif config.level == TestLevel.AGENT: - runner = AgentScenarioRunner(config, debug=debug, watch=watch) - else: - raise ValueError(f"Unsupported test level: {config.level}") - - # Run the scenario - scenario_report = await runner.run() - suite_report.scenario_reports.append(scenario_report) - - # Update counters - if scenario_report.passed: - suite_report.passed_scenarios += 1 - progress.update(task, description=f"āœ… {Path(file_path).name}") - else: - suite_report.failed_scenarios += 1 - progress.update(task, description=f"āŒ {Path(file_path).name}") - - if verbose: - console.print(f" Steps: {scenario_report.passed_steps}/{scenario_report.total_steps}") - console.print(f" Duration: {scenario_report.duration_seconds:.2f}s") - if scenario_report.error: - console.print(f" Error: {scenario_report.error}") - - except Exception as e: - console.print(f"āŒ Error running {file_path}: {e}") - suite_report.failed_scenarios += 1 - progress.update(task, description=f"āŒ {Path(file_path).name} (error)") - - if ci_mode: - raise typer.Exit(1) - - progress.remove_task(task) + # Determine execution mode + should_run_parallel = parallel or any(config.parallel for _, config in scenarios) + + if should_run_parallel: + console.print(f"šŸ”„ Running scenarios in parallel mode") + if max_concurrent: + console.print(f"šŸ“Š Max concurrent scenarios: {max_concurrent}") + await run_scenarios_parallel( + scenarios=scenarios, + suite_report=suite_report, + progress=progress, + ci_mode=ci_mode, + verbose=verbose, + debug=debug, + watch=watch, + max_concurrent=max_concurrent, + ) + else: + console.print(f"šŸ”„ Running scenarios sequentially") + await run_scenarios_sequential( + scenarios=scenarios, + suite_report=suite_report, + progress=progress, + ci_mode=ci_mode, + verbose=verbose, + debug=debug, + watch=watch, + ) # Finalize report suite_report.completed_at = datetime.now() @@ -248,6 +248,174 @@ async def run_scenarios_async( console.print(f"\nāœ… Test execution completed successfully") +async def run_scenarios_sequential( + scenarios: List[tuple], + suite_report: TestSuiteReport, + progress: Progress, + ci_mode: bool, + verbose: bool, + debug: bool, + watch: bool, +): + """Run scenarios sequentially.""" + for file_path, config in scenarios: + task = progress.add_task(f"Running {Path(file_path).name}...", total=None) + + try: + # Create appropriate runner based on level + if config.level == TestLevel.BASIC: + runner = BasicScenarioRunner(config, debug=debug, watch=watch) + elif config.level == TestLevel.AGENT: + runner = AgentScenarioRunner(config, debug=debug, watch=watch) + else: + raise ValueError(f"Unsupported test level: {config.level}") + + # Run the scenario + scenario_report = await runner.run() + suite_report.scenario_reports.append(scenario_report) + + # Update counters + if scenario_report.passed: + suite_report.passed_scenarios += 1 + progress.update(task, description=f"āœ… {Path(file_path).name}") + else: + suite_report.failed_scenarios += 1 + progress.update(task, description=f"āŒ {Path(file_path).name}") + + if verbose: + console.print(f" Steps: {scenario_report.passed_steps}/{scenario_report.total_steps}") + console.print(f" Duration: {scenario_report.duration_seconds:.2f}s") + if scenario_report.error: + console.print(f" Error: {scenario_report.error}") + + except Exception as e: + console.print(f"āŒ Error running {file_path}: {e}") + suite_report.failed_scenarios += 1 + progress.update(task, description=f"āŒ {Path(file_path).name} (error)") + + if ci_mode: + raise typer.Exit(1) + + progress.remove_task(task) + + +async def run_scenarios_parallel( + scenarios: List[tuple], + suite_report: TestSuiteReport, + progress: Progress, + ci_mode: bool, + verbose: bool, + debug: bool, + watch: bool, + max_concurrent: Optional[int] = None, +): + """Run scenarios in parallel.""" + import asyncio + from asyncio import Semaphore + + # Create semaphore for concurrency control + semaphore = Semaphore(max_concurrent) if max_concurrent else None + + async def run_single_scenario(file_path: str, config: ScenarioConfig): + """Run a single scenario with proper error handling.""" + task_id = None + try: + # Add progress task + task_id = progress.add_task(f"Running {Path(file_path).name}...", total=None) + + # Apply semaphore if concurrency is limited + if semaphore: + async with semaphore: + return await _execute_scenario(file_path, config, task_id, debug, watch, verbose) + else: + return await _execute_scenario(file_path, config, task_id, debug, watch, verbose) + + except Exception as e: + console.print(f"āŒ Error running {file_path}: {e}") + if task_id: + progress.update(task_id, description=f"āŒ {Path(file_path).name} (error)") + return { + 'file_path': file_path, + 'config': config, + 'report': None, + 'error': str(e), + 'task_id': task_id + } + + # Create all scenario tasks + tasks = [run_single_scenario(file_path, config) for file_path, config in scenarios] + + # Run all scenarios concurrently + results = await asyncio.gather(*tasks, return_exceptions=True) + + # Process results + for result in results: + if isinstance(result, Exception): + console.print(f"āŒ Unexpected error: {result}") + suite_report.failed_scenarios += 1 + continue + + file_path = result['file_path'] + config = result['config'] + scenario_report = result['report'] + error = result.get('error') + task_id = result['task_id'] + + if error: + suite_report.failed_scenarios += 1 + if ci_mode: + raise typer.Exit(1) + elif scenario_report: + suite_report.scenario_reports.append(scenario_report) + + # Update counters + if scenario_report.passed: + suite_report.passed_scenarios += 1 + progress.update(task_id, description=f"āœ… {Path(file_path).name}") + else: + suite_report.failed_scenarios += 1 + progress.update(task_id, description=f"āŒ {Path(file_path).name}") + + if verbose: + console.print(f" Steps: {scenario_report.passed_steps}/{scenario_report.total_steps}") + console.print(f" Duration: {scenario_report.duration_seconds:.2f}s") + if scenario_report.error: + console.print(f" Error: {scenario_report.error}") + + # Remove progress task + if task_id: + progress.remove_task(task_id) + + +async def _execute_scenario( + file_path: str, + config: ScenarioConfig, + task_id: int, + debug: bool, + watch: bool, + verbose: bool, +): + """Execute a single scenario and return the result.""" + # Create appropriate runner based on level + if config.level == TestLevel.BASIC: + runner = BasicScenarioRunner(config, debug=debug, watch=watch) + elif config.level == TestLevel.AGENT: + runner = AgentScenarioRunner(config, debug=debug, watch=watch) + else: + raise ValueError(f"Unsupported test level: {config.level}") + + # Run the scenario + scenario_report = await runner.run() + + return { + 'file_path': file_path, + 'config': config, + 'report': scenario_report, + 'error': None, + 'task_id': task_id + } + + def substitute_env_vars(value): """Recursively substitute environment variables in YAML data. diff --git a/replicantx/models.py b/replicantx/models.py index 59c5697..069e640 100644 --- a/replicantx/models.py +++ b/replicantx/models.py @@ -237,6 +237,7 @@ class ScenarioConfig(BaseModel): max_retries: int = Field(3, description="Maximum number of retries per step") retry_delay_seconds: float = Field(1.0, description="Delay between retries") validate_politeness: bool = Field(False, description="Whether to validate politeness/conversational tone in responses") + parallel: bool = Field(False, description="Whether to run this scenario in parallel with others") @model_validator(mode='after') def validate_scenario(self) -> 'ScenarioConfig': From a2658a9c00bebd7cf8d37bfb4d774bee57ce8abc Mon Sep 17 00:00:00 2001 From: Gus Fraser Date: Sat, 2 Aug 2025 16:31:21 +0100 Subject: [PATCH 2/2] Bump to v0.1.6 --- .github/workflows/replicantx.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/replicantx.yml b/.github/workflows/replicantx.yml index 495403c..00e446d 100644 --- a/.github/workflows/replicantx.yml +++ b/.github/workflows/replicantx.yml @@ -7,7 +7,7 @@ jobs: env: SUPABASE_URL: ${{ secrets.SUPABASE_URL }} SUPABASE_ANON_KEY: ${{ secrets.SUPABASE_ANON_KEY }} - REPLICANTX_TARGET: pr-${{ github.event.pull_request.number }}-helix-api.onrender.com + REPLICANTX_TARGET: pr-${{ github.event.pull_request.number }}-your-api.onrender.com steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 diff --git a/pyproject.toml b/pyproject.toml index bf807dc..d53b3c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "replicantx" -version = "0.1.5" +version = "0.1.6" description = "End-to-end testing harness for AI agents via web service API" readme = "README.md" requires-python = ">=3.11"