From 63500aed1b4738e7a46aa00ae599b7ba9132e61b Mon Sep 17 00:00:00 2001 From: Gus Fraser Date: Sat, 2 Aug 2025 16:47:55 +0100 Subject: [PATCH 1/4] Goal achieved && num steps combined = success --- replicantx/scenarios/agent.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/replicantx/scenarios/agent.py b/replicantx/scenarios/agent.py index 14ee434..e67aa72 100644 --- a/replicantx/scenarios/agent.py +++ b/replicantx/scenarios/agent.py @@ -285,6 +285,10 @@ async def run(self) -> ScenarioReport: conversation_summary = self.replicant_agent.get_conversation_summary() report.error = report.error or self._format_conversation_summary(conversation_summary) + # Update report.passed to consider both step success and goal achievement + goal_achieved = conversation_summary.get('goal_achieved', False) + report.passed = report.passed and goal_achieved + # Add conversation history to the last step result for reporting if report.step_results and self.replicant_agent: conversation_history = self._format_full_conversation() @@ -295,11 +299,14 @@ async def run(self) -> ScenarioReport: if self.watch: self._watch_log("") self._watch_log("📊 [bold green]CONVERSATION COMPLETE[/bold green]") - status = "✅ SUCCESS" if report.passed else "❌ FAILED" + # Consider both step success and goal achievement for overall success + goal_achieved = conversation_summary.get('goal_achieved', False) + overall_success = report.passed and goal_achieved + status = "✅ SUCCESS" if overall_success else "❌ FAILED" self._watch_log(f"🏁 Status: {status}") self._watch_log(f"🔢 Steps: {report.passed_steps}/{report.total_steps} passed") self._watch_log(f"⏱️ Duration: {report.total_duration_ms/1000:.1f}s") - self._watch_log(f"🎯 Goal achieved: {'Yes' if conversation_summary.get('goal_achieved', False) else 'No'}") + self._watch_log(f"🎯 Goal achieved: {'Yes' if goal_achieved else 'No'}") self._watch_log(f"📝 Facts used: {conversation_summary.get('facts_used', 0)}") self._watch_log(f"💬 Total turns: {conversation_summary.get('total_turns', 0)}") @@ -314,8 +321,9 @@ async def run(self) -> ScenarioReport: self._watch_log(f"📊 Confidence: {confidence:.2f}") self._watch_log(f"💭 Reasoning: {reasoning}") - self._debug_log("Scenario completed successfully", { + self._debug_log("Scenario completed", { "passed": report.passed, + "goal_achieved": goal_achieved, "total_steps": report.total_steps, "passed_steps": report.passed_steps, "total_duration_ms": report.total_duration_ms, From bf48a5daf855d7e2ff352b07190059c426012bf1 Mon Sep 17 00:00:00 2001 From: Gus Fraser Date: Sat, 2 Aug 2025 17:00:23 +0100 Subject: [PATCH 2/4] Added CONTRIBUTING guideline --- CONTRIBUTING.md | 353 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 353 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..1e1f2d3 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,353 @@ +# Contributing to ReplicantX + +Thank you for your interest in contributing to ReplicantX! This document provides guidelines and best practices for contributing to the project. + +## Table of Contents + +- [Getting Started](#getting-started) +- [Development Workflow](#development-workflow) +- [Creating Issues](#creating-issues) +- [Making Changes](#making-changes) +- [Code Style and Standards](#code-style-and-standards) +- [Testing](#testing) +- [Submitting Changes](#submitting-changes) +- [Review Process](#review-process) +- [Release Process](#release-process) + +## Getting Started + +### Prerequisites + +- Python 3.8 or higher +- Git +- A GitHub account + +### Setting Up Your Development Environment + +1. **Fork the Repository** + ```bash + # Navigate to https://github.com/HelixTechnologies/replicantx + # Click the "Fork" button in the top-right corner + ``` + +2. **Clone Your Fork** + ```bash + git clone https://github.com/YOUR_USERNAME/replicantx.git + cd replicantx + ``` + +3. **Add the Upstream Remote** + ```bash + git remote add upstream https://github.com/HelixTechnologies/replicantx.git + ``` + +4. **Install Development Dependencies** + ```bash + pip install -e . + pip install -r requirements.txt + ``` + +5. **Verify Installation** + ```bash + python -m replicantx.cli --version + ``` + +## Development Workflow + +### 1. Keep Your Fork Updated + +Before starting any new work, ensure your fork is up to date: + +```bash +git fetch upstream +git checkout main +git merge upstream/main +git push origin main +``` + +### 2. Create a Feature Branch + +Always work on a new branch for your changes: + +```bash +git checkout -b feature/your-feature-name +# or +git checkout -b fix/your-bug-fix +# or +git checkout -b docs/your-documentation-update +``` + +### Branch Naming Conventions + +- `feature/` - New features or enhancements +- `fix/` - Bug fixes +- `docs/` - Documentation updates +- `test/` - Test improvements or additions +- `refactor/` - Code refactoring +- `chore/` - Maintenance tasks + +Examples: +- `feature/add-parallel-execution` +- `fix/goal-evaluation-logic` +- `docs/update-contributing-guidelines` +- `test/add-integration-tests` + +## Creating Issues + +### Before Creating an Issue + +1. **Search existing issues** to avoid duplicates +2. **Check the documentation** to see if your question is already answered +3. **Reproduce the issue** if it's a bug + +### Issue Templates + +When creating an issue, use the appropriate template: + +#### Bug Report +- **Clear description** of the bug +- **Steps to reproduce** the issue +- **Expected vs actual behavior** +- **Environment details** (OS, Python version, etc.) +- **Minimal test case** if applicable + +#### Feature Request +- **Clear description** of the feature +- **Use case** and motivation +- **Proposed implementation** (if you have ideas) +- **Alternative solutions** considered + +#### Documentation Issue +- **What's missing or unclear** +- **Where it should be added** +- **Suggested content** + +## Making Changes + +### Code Style and Standards + +#### Python Code Style + +- Follow [PEP 8](https://www.python.org/dev/peps/pep-0008/) style guidelines +- Use type hints for function parameters and return values +- Keep functions focused and under 50 lines when possible +- Use descriptive variable and function names + +#### Commit Message Guidelines + +Use conventional commit format: + +``` +(): + +[optional body] + +[optional footer] +``` + +Types: +- `feat`: New feature +- `fix`: Bug fix +- `docs`: Documentation changes +- `style`: Code style changes (formatting, etc.) +- `refactor`: Code refactoring +- `test`: Adding or updating tests +- `chore`: Maintenance tasks + +Examples: +``` +feat(cli): add parallel execution support + +fix(agent): correct goal evaluation logic + +docs(readme): update installation instructions + +test(scenarios): add integration tests for auth providers +``` + +### Making Your Changes + +1. **Make focused, atomic commits** + ```bash + git add . + git commit -m "feat(agent): add intelligent goal evaluation" + ``` + +2. **Keep commits small and focused** + - One logical change per commit + - Don't mix different types of changes + - Use meaningful commit messages + +3. **Test your changes** + ```bash + # Run the test suite + python -m pytest tests/ + + # Run specific tests + python -m replicantx.cli run tests/your-test.yaml + + # Run with different options + python -m replicantx.cli run tests/ --debug --watch + ``` + +## Testing + +### Running Tests + +```bash +# Run all tests +python -m pytest + +# Run with coverage +python -m pytest --cov=replicantx + +# Run specific test files +python -m pytest tests/test_scenarios.py + +# Run ReplicantX scenarios +python -m replicantx.cli run tests/*.yaml +``` + +### Writing Tests + +- Write tests for new features +- Ensure existing tests still pass +- Add integration tests for complex scenarios +- Test both success and failure cases + +### Test Scenarios + +When adding new test scenarios: + +1. Create YAML files in the `tests/` directory +2. Use descriptive names that explain the test purpose +3. Include both basic and replicant agent scenarios +4. Test different authentication methods +5. Test edge cases and error conditions + +## Submitting Changes + +### 1. Push Your Branch + +```bash +git push origin feature/your-feature-name +``` + +### 2. Create a Pull Request + +1. Go to your fork on GitHub +2. Click "Compare & pull request" for your branch +3. Fill out the pull request template +4. Add appropriate labels +5. Request reviews from maintainers + +### Pull Request Guidelines + +#### Title +Use the same format as commit messages: +``` +feat(cli): add parallel execution support +``` + +#### Description +Include: +- **Summary** of changes +- **Motivation** for the change +- **Testing** performed +- **Breaking changes** (if any) +- **Related issues** (use `Fixes #123` or `Closes #123`) + +#### Example PR Description +```markdown +## Summary +Adds parallel execution support to the CLI, allowing multiple scenarios to run concurrently. + +## Motivation +Running scenarios sequentially can be slow for large test suites. Parallel execution significantly reduces total execution time. + +## Changes +- Added `--parallel` and `--max-concurrent` CLI options +- Implemented `run_scenarios_parallel()` function +- Added `parallel: bool` field to ScenarioConfig +- Updated documentation with usage examples + +## Testing +- Added unit tests for parallel execution logic +- Tested with 10 concurrent scenarios +- Verified proper error handling and cleanup +- Confirmed no regression in sequential execution + +## Breaking Changes +None - all changes are backward compatible. + +Fixes #45 +``` + +## Review Process + +### What Reviewers Look For + +1. **Code Quality** + - Follows style guidelines + - Proper error handling + - Good documentation + +2. **Functionality** + - Works as intended + - Handles edge cases + - Doesn't break existing features + +3. **Testing** + - Adequate test coverage + - Tests pass + - New scenarios work correctly + +### Responding to Review Comments + +1. **Address all comments** before requesting re-review +2. **Make additional commits** if needed +3. **Explain your reasoning** if you disagree +4. **Ask for clarification** if something is unclear + +### Getting Your PR Merged + +- All tests must pass +- Code review must be approved +- Documentation must be updated +- No merge conflicts + +## Release Process + +### Version Bumping + +We use semantic versioning (MAJOR.MINOR.PATCH): + +- **MAJOR**: Breaking changes +- **MINOR**: New features (backward compatible) +- **PATCH**: Bug fixes (backward compatible) + +### Release Checklist + +Before a release: +- [ ] All tests pass +- [ ] Documentation is updated +- [ ] Version is bumped in `pyproject.toml` +- [ ] CHANGELOG.md is updated +- [ ] Release notes are prepared + +## Getting Help + +### Communication Channels + +- **GitHub Issues**: For bugs, feature requests, and questions +- **GitHub Discussions**: For general questions and community discussion +- **Pull Requests**: For code reviews and technical discussions + +## Recognition + +Contributors will be recognized in: +- The project's README.md +- Release notes +- GitHub contributors page + +Thank you for contributing to ReplicantX! 🚀 \ No newline at end of file From e93d2a0d2e2a87bd9849a5cdb3d40e59b9c2d985 Mon Sep 17 00:00:00 2001 From: Gus Fraser Date: Sat, 2 Aug 2025 17:02:19 +0100 Subject: [PATCH 3/4] Updated guidelines & goal achieved --- CODE_OF_CONDUCT.md | 155 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 CODE_OF_CONDUCT.md diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..ada3422 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,155 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +[INSERT CONTACT METHOD]. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Reporting Guidelines + +If you experience or witness unacceptable behavior, or have any other concerns, +please report it by contacting the project maintainers at [INSERT CONTACT METHOD]. + +When reporting, please include: + +* Your contact information +* Names (real, nicknames, or pseudonyms) of any individuals involved +* Description of what occurred +* Description of when and where the incident occurred +* Any additional context that is helpful for understanding the incident +* Whether you believe this incident is ongoing +* Any other information you believe we should have + +## Addressing Grievances + +If you feel you have been falsely or unfairly accused of violating this Code of +Conduct, you should report it with a concise description of your grievance. +Your grievance will be handled in accordance with our existing governing +policies. + +## Project Maintainers + +The current project maintainers are: + +* [INSERT MAINTAINER NAME] - [INSERT CONTACT METHOD] +* [INSERT MAINTAINER NAME] - [INSERT CONTACT METHOD] + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org), +version 2.1, available at +https://www.contributor-covenant.org/version/2/1/code_of_conduct.html. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity). + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. \ No newline at end of file From 1cd7766ebbb9cbb40e32b27d642481f806f558df Mon Sep 17 00:00:00 2001 From: Gus Fraser Date: Sat, 2 Aug 2025 17:05:47 +0100 Subject: [PATCH 4/4] Bump to 0.1.7 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d53b3c4..fd991cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "replicantx" -version = "0.1.6" +version = "0.1.7" description = "End-to-end testing harness for AI agents via web service API" readme = "README.md" requires-python = ">=3.11"