diff --git a/.github/workflows/lecture-translation-migration.yml b/.github/workflows/lecture-translation-migration.yml new file mode 100644 index 0000000..5ba09ab --- /dev/null +++ b/.github/workflows/lecture-translation-migration.yml @@ -0,0 +1,101 @@ +name: Lecture Translation Migration + +on: + repository_dispatch: + types: [pr-opened, pr-synchronized] + workflow_dispatch: + inputs: + source_repo: + description: 'Source repository (e.g., QuantEcon/lecture-python.myst)' + required: true + default: 'QuantEcon/lecture-python.myst' + pr_number: + description: 'PR number in source repository' + required: true + type: number + target_repo: + description: 'Target repository (e.g., QuantEcon/lecture-python.zh-cn)' + required: true + default: 'QuantEcon/lecture-python.zh-cn' + +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + TRANSLATION_REVIEWER: nisha617 + +jobs: + check-translation-status: + runs-on: ubuntu-latest + outputs: + translated-files: ${{ steps.check.outputs.translated-files }} + should-proceed: ${{ steps.check.outputs.should-proceed }} + steps: + - name: Checkout meta repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install requests PyYAML openai gitpython + + - name: Get PR details and check translation status + id: check + run: | + python scripts/check_translation_status.py \ + --source-repo "${{ github.event.client_payload.repository || inputs.source_repo }}" \ + --pr-number "${{ github.event.client_payload.pr_number || inputs.pr_number }}" \ + --target-repo "${{ github.event.client_payload.target_repo || inputs.target_repo }}" + + create-translation-pr: + needs: check-translation-status + if: needs.check-translation-status.outputs.should-proceed == 'true' + runs-on: ubuntu-latest + steps: + - name: Checkout meta repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install requests PyYAML openai gitpython diff-match-patch + + - name: Create translation PR + run: | + python scripts/create_translation_pr.py \ + --source-repo "${{ github.event.client_payload.repository || inputs.source_repo }}" \ + --pr-number "${{ github.event.client_payload.pr_number || inputs.pr_number }}" \ + --target-repo "${{ github.event.client_payload.target_repo || inputs.target_repo }}" \ + --translated-files '${{ needs.check-translation-status.outputs.translated-files }}' + + create-fallback-issue: + needs: [check-translation-status, create-translation-pr] + if: always() && (needs.check-translation-status.outputs.should-proceed == 'false' || failure()) + runs-on: ubuntu-latest + steps: + - name: Checkout meta repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install requests PyYAML + + - name: Create fallback issue + run: | + python scripts/create_fallback_issue.py \ + --source-repo "${{ github.event.client_payload.repository || inputs.source_repo }}" \ + --pr-number "${{ github.event.client_payload.pr_number || inputs.pr_number }}" \ + --target-repo "${{ github.event.client_payload.target_repo || inputs.target_repo }}" \ + --reason "${{ needs.create-translation-pr.result }}" \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..029d9b8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,55 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Virtual environments +venv/ +env/ +ENV/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db + +# Logs +*.log + +# Temporary files +*.tmp +*.temp + +# Environment variables +.env +.env.local +.env.production + +# Test outputs +test_output/ +temp_repos/ \ No newline at end of file diff --git a/INTEGRATION_EXAMPLE.md b/INTEGRATION_EXAMPLE.md new file mode 100644 index 0000000..69c4d79 --- /dev/null +++ b/INTEGRATION_EXAMPLE.md @@ -0,0 +1,79 @@ +# Integration with Source Repository + +To fully integrate this translation workflow, add the following workflow to the source repository (`QuantEcon/lecture-python.myst`): + +## `.github/workflows/trigger-translation.yml` + +```yaml +name: Trigger Translation Check + +on: + pull_request: + types: [opened, synchronize, reopened] + paths: + - 'lectures/*.md' + +jobs: + trigger-translation: + runs-on: ubuntu-latest + if: github.event.pull_request.draft == false + steps: + - name: Trigger translation workflow + run: | + curl -X POST \ + -H "Authorization: token ${{ secrets.META_REPO_TOKEN }}" \ + -H "Accept: application/vnd.github.v3+json" \ + -H "Content-Type: application/json" \ + https://api.github.com/repos/QuantEcon/meta/dispatches \ + -d '{ + "event_type": "pr-opened", + "client_payload": { + "repository": "${{ github.repository }}", + "pr_number": ${{ github.event.number }}, + "target_repo": "QuantEcon/lecture-python.zh-cn" + } + }' + + - name: Comment on PR + uses: actions/github-script@v6 + with: + script: | + github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: 'πŸ”„ Translation workflow triggered! If any of the modified lectures have been translated to Chinese, a translation PR will be automatically created.' + }) +``` + +## Required Secrets + +Add the following secret to the source repository: + +- `META_REPO_TOKEN`: A GitHub personal access token with the following permissions: + - `repo` scope for the meta repository + - Permission to trigger workflow dispatches + +## Alternative: Webhook Integration + +Instead of using repository dispatch, you can set up a webhook: + +1. Go to repository Settings β†’ Webhooks +2. Add webhook with URL: `https://api.github.com/repos/QuantEcon/meta/dispatches` +3. Set content type to `application/json` +4. Select "Pull requests" events +5. Add webhook secret if needed + +## Testing the Integration + +1. Create a test PR in the source repository that modifies a lecture file +2. Check that the workflow is triggered in the meta repository +3. Verify that the correct files are identified for translation +4. Check that PRs or issues are created as expected + +## Monitoring + +Monitor the workflow execution in: +- Actions tab of the meta repository +- Check logs for debugging information +- Review created PRs and issues in target repository \ No newline at end of file diff --git a/README.md b/README.md index bf878f7..c0373b4 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,36 @@ # meta For issues and discussion covering more than one repository + +## Translation Automation Workflow + +This repository now includes an automated workflow for translating lecture changes from English to Chinese. When lectures are modified in `QuantEcon/lecture-python.myst`, the system automatically: + +1. **Detects** which lectures have been changed +2. **Checks** if those lectures have been translated to Chinese +3. **Translates** the changes using AI +4. **Creates** a pull request in `QuantEcon/lecture-python.zh-cn` +5. **Tags** reviewers for verification + +### Quick Start + +1. **Setup Secrets**: Configure `GITHUB_TOKEN` and `OPENAI_API_KEY` in repository settings +2. **Integration**: Add trigger workflow to source repository (see `INTEGRATION_EXAMPLE.md`) +3. **Test**: Create a PR in the English repository to test the workflow + +### Files + +- πŸ“‹ **`.github/workflows/lecture-translation-migration.yml`** - Main workflow +- 🐍 **`scripts/`** - Python automation scripts +- πŸ“š **`TRANSLATION_WORKFLOW.md`** - Detailed documentation +- πŸ”— **`INTEGRATION_EXAMPLE.md`** - Source repository integration +- βœ… **`scripts/validate_setup.py`** - Setup validation +- 🎬 **`scripts/demo_workflow.py`** - Workflow demonstration + +### Quick Validation + +```bash +python scripts/validate_setup.py # Check setup +python scripts/demo_workflow.py # See workflow demo +``` + +See `TRANSLATION_WORKFLOW.md` for complete documentation. diff --git a/TRANSLATION_WORKFLOW.md b/TRANSLATION_WORKFLOW.md new file mode 100644 index 0000000..79495c3 --- /dev/null +++ b/TRANSLATION_WORKFLOW.md @@ -0,0 +1,153 @@ +# Translation Migration Workflow + +This repository contains a GitHub Actions workflow that automatically detects when lectures are modified in the English repository (`lecture-python.myst`) and migrates those changes to the Chinese repository (`lecture-python.zh-cn`) if the lectures have already been translated. + +## How It Works + +### Workflow Overview + +1. **Trigger**: The workflow is triggered when a PR is opened or updated in the `lecture-python.myst` repository +2. **Detection**: It identifies which lecture files have been modified +3. **Translation Check**: It checks the Chinese repository's `_toc.yml` to see which lectures have been translated +4. **Translation**: For translated lectures, it automatically translates the changes using OpenAI's API +5. **PR Creation**: It creates a new PR in the Chinese repository with the translated changes +6. **Review**: It tags @nisha617 for review of the translation +7. **Fallback**: If translation fails, it creates an issue for manual review + +### Files + +- `.github/workflows/lecture-translation-migration.yml`: Main workflow file +- `scripts/check_translation_status.py`: Checks which modified files have been translated +- `scripts/create_translation_pr.py`: Creates a PR with translated changes +- `scripts/create_fallback_issue.py`: Creates an issue when automatic translation fails + +## Setup Requirements + +### Secrets + +The workflow requires the following secrets to be configured in the repository: + +1. **`GITHUB_TOKEN`**: GitHub personal access token with appropriate permissions + - Needs access to read from source repository + - Needs access to create PRs and issues in target repository + +2. **`OPENAI_API_KEY`**: OpenAI API key for translation services + - Used to translate content from English to Chinese + - Requires a valid OpenAI account with API access + +### Permissions + +The `GITHUB_TOKEN` needs the following permissions: +- `contents: read` - To read files from repositories +- `pull-requests: read` - To read PR details and files +- `contents: write` - To create branches and update files +- `pull-requests: write` - To create PRs +- `issues: write` - To create fallback issues + +## Triggering the Workflow + +### Automatic Triggering + +The workflow can be triggered automatically from the source repository by sending a repository dispatch event: + +```bash +curl -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + -H "Accept: application/vnd.github.v3+json" \ + https://api.github.com/repos/QuantEcon/meta/dispatches \ + -d '{ + "event_type": "pr-opened", + "client_payload": { + "repository": "QuantEcon/lecture-python.myst", + "pr_number": 123, + "target_repo": "QuantEcon/lecture-python.zh-cn" + } + }' +``` + +### Manual Triggering + +The workflow can also be triggered manually through the GitHub Actions interface: + +1. Go to the Actions tab in the meta repository +2. Select "Lecture Translation Migration" workflow +3. Click "Run workflow" +4. Fill in the required parameters: + - Source repository (e.g., `QuantEcon/lecture-python.myst`) + - PR number + - Target repository (e.g., `QuantEcon/lecture-python.zh-cn`) + +## Integration with Source Repository + +To fully automate the process, add a webhook or workflow to the source repository that triggers this workflow when PRs are opened: + +```yaml +# In QuantEcon/lecture-python.myst/.github/workflows/trigger-translation.yml +name: Trigger Translation Check + +on: + pull_request: + types: [opened, synchronize] + paths: + - 'lectures/*.md' + +jobs: + trigger-translation: + runs-on: ubuntu-latest + steps: + - name: Trigger translation workflow + run: | + curl -X POST \ + -H "Authorization: token ${{ secrets.META_REPO_TOKEN }}" \ + -H "Accept: application/vnd.github.v3+json" \ + https://api.github.com/repos/QuantEcon/meta/dispatches \ + -d '{ + "event_type": "pr-opened", + "client_payload": { + "repository": "${{ github.repository }}", + "pr_number": ${{ github.event.number }}, + "target_repo": "QuantEcon/lecture-python.zh-cn" + } + }' +``` + +## Translation Quality + +The workflow uses OpenAI's GPT-4 model for translation with specific instructions to: + +- Preserve markdown formatting +- Keep code snippets untranslated +- Maintain mathematical formulas +- Use appropriate academic Chinese terminology +- Preserve technical terms commonly used in English + +## Review Process + +When a translation PR is created: + +1. @nisha617 is automatically tagged for review +2. The PR includes links to the original PR +3. A checklist is provided for reviewers +4. The PR description includes context about the changes + +## Troubleshooting + +### Common Issues + +1. **Translation fails**: Check OpenAI API key and quota +2. **Permission denied**: Verify GitHub token permissions +3. **No translated files found**: Check that files are listed in target repository's `_toc.yml` + +### Logs + +Check the workflow logs in the Actions tab for detailed error messages and debugging information. + +## Configuration + +### Environment Variables + +- `TRANSLATION_REVIEWER`: GitHub username for the translation reviewer (default: `nisha617`) + +### Customization + +The scripts can be customized for different language pairs or repositories by modifying the parameters and translation prompts. \ No newline at end of file diff --git a/scripts/check_translation_status.py b/scripts/check_translation_status.py new file mode 100755 index 0000000..c52b6ef --- /dev/null +++ b/scripts/check_translation_status.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 +""" +Check translation status for lecture files. + +This script: +1. Gets the list of modified files from a PR in the source repository +2. Checks which of these files have been translated in the target repository +3. Outputs the list of translated files that need updates +""" + +import argparse +import json +import os +import sys +import requests +import yaml +from pathlib import Path + + +def get_github_headers(): + """Get headers for GitHub API requests.""" + token = os.environ.get('GITHUB_TOKEN') + if not token: + raise ValueError("GITHUB_TOKEN environment variable is required") + + return { + 'Authorization': f'token {token}', + 'Accept': 'application/vnd.github.v3+json' + } + + +def get_pr_files(source_repo, pr_number): + """Get list of files modified in a PR.""" + headers = get_github_headers() + url = f"https://api.github.com/repos/{source_repo}/pulls/{pr_number}/files" + + response = requests.get(url, headers=headers) + response.raise_for_status() + + files = response.json() + + # Filter for lecture files (markdown files in lectures directory) + lecture_files = [] + for file in files: + filename = file['filename'] + if filename.startswith('lectures/') and filename.endswith('.md'): + # Extract just the lecture name without path and extension + lecture_name = Path(filename).stem + if lecture_name not in ['intro', 'status', 'troubleshooting', 'zreferences']: + lecture_files.append({ + 'filename': filename, + 'lecture_name': lecture_name, + 'status': file['status'], + 'patch': file.get('patch', '') + }) + + return lecture_files + + +def get_translated_lectures(target_repo): + """Get list of lectures that have been translated in the target repository.""" + headers = get_github_headers() + + # Get the _toc.yml file to see which lectures are translated + url = f"https://api.github.com/repos/{target_repo}/contents/lectures/_toc.yml" + + response = requests.get(url, headers=headers) + response.raise_for_status() + + content = response.json() + + # Decode the base64 content + import base64 + toc_content = base64.b64decode(content['content']).decode('utf-8') + + # Parse YAML + toc_data = yaml.safe_load(toc_content) + + # Extract lecture names from the table of contents + translated_lectures = set() + + def extract_files_from_chapters(chapters): + files = [] + for chapter in chapters: + if 'file' in chapter: + files.append(chapter['file']) + if 'sections' in chapter: + files.extend(extract_files_from_chapters(chapter['sections'])) + return files + + # Extract from parts + if 'parts' in toc_data: + for part in toc_data['parts']: + if 'chapters' in part: + translated_lectures.update(extract_files_from_chapters(part['chapters'])) + + # Extract from chapters at root level + if 'chapters' in toc_data: + translated_lectures.update(extract_files_from_chapters(toc_data['chapters'])) + + return translated_lectures + + +def main(): + parser = argparse.ArgumentParser(description='Check translation status for lecture files') + parser.add_argument('--source-repo', required=True, help='Source repository (e.g., QuantEcon/lecture-python.myst)') + parser.add_argument('--pr-number', required=True, type=int, help='PR number in source repository') + parser.add_argument('--target-repo', required=True, help='Target repository (e.g., QuantEcon/lecture-python.zh-cn)') + + args = parser.parse_args() + + try: + # Get modified files from PR + print(f"Getting modified files from PR #{args.pr_number} in {args.source_repo}") + modified_files = get_pr_files(args.source_repo, args.pr_number) + + if not modified_files: + print("No lecture files found in PR") + with open(os.environ['GITHUB_OUTPUT'], 'a') as f: + f.write(f"should-proceed=false\n") + f.write(f"translated-files={json.dumps([])}\n") + return + + print(f"Found {len(modified_files)} modified lecture files") + + # Get translated lectures from target repository + print(f"Getting translated lectures from {args.target_repo}") + translated_lectures = get_translated_lectures(args.target_repo) + + print(f"Found {len(translated_lectures)} translated lectures") + + # Find intersection - files that are modified AND translated + translated_files = [] + for file_info in modified_files: + if file_info['lecture_name'] in translated_lectures: + translated_files.append(file_info) + print(f"Found translated lecture to update: {file_info['lecture_name']}") + + if translated_files: + print(f"Will process {len(translated_files)} translated lecture files") + with open(os.environ['GITHUB_OUTPUT'], 'a') as f: + f.write(f"should-proceed=true\n") + f.write(f"translated-files={json.dumps(translated_files)}\n") + else: + print("No translated lectures found that need updating") + with open(os.environ['GITHUB_OUTPUT'], 'a') as f: + f.write(f"should-proceed=false\n") + f.write(f"translated-files={json.dumps([])}\n") + + except Exception as e: + print(f"Error: {e}") + sys.exit(1) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/scripts/create_fallback_issue.py b/scripts/create_fallback_issue.py new file mode 100755 index 0000000..e55847b --- /dev/null +++ b/scripts/create_fallback_issue.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +""" +Create fallback issue when translation PR cannot be created. + +This script creates an issue in the target repository linking to the original PR +when automatic translation fails or when no translated files are found. +""" + +import argparse +import os +import sys +import requests + + +def get_github_headers(): + """Get headers for GitHub API requests.""" + token = os.environ.get('GITHUB_TOKEN') + if not token: + raise ValueError("GITHUB_TOKEN environment variable is required") + + return { + 'Authorization': f'token {token}', + 'Accept': 'application/vnd.github.v3+json' + } + + +def get_pr_details(repo, pr_number): + """Get details of a pull request.""" + headers = get_github_headers() + url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}" + + response = requests.get(url, headers=headers) + response.raise_for_status() + + return response.json() + + +def create_issue(repo, title, body, labels=None, assignees=None): + """Create an issue in the repository.""" + headers = get_github_headers() + url = f"https://api.github.com/repos/{repo}/issues" + + data = { + 'title': title, + 'body': body + } + + if labels: + data['labels'] = labels + + if assignees: + data['assignees'] = assignees + + response = requests.post(url, headers=headers, json=data) + response.raise_for_status() + + return response.json() + + +def main(): + parser = argparse.ArgumentParser(description='Create fallback issue for translation updates') + parser.add_argument('--source-repo', required=True, help='Source repository') + parser.add_argument('--pr-number', required=True, type=int, help='Source PR number') + parser.add_argument('--target-repo', required=True, help='Target repository') + parser.add_argument('--reason', help='Reason for fallback (optional)') + + args = parser.parse_args() + + try: + # Get PR details + pr_details = get_pr_details(args.source_repo, args.pr_number) + + # Determine the reason for the fallback + if args.reason == 'skipped': + reason_text = "No translated lectures were found that needed updating." + elif args.reason == 'failure': + reason_text = "Automatic translation failed due to technical issues." + else: + reason_text = "Automatic translation could not be completed." + + # Create issue title and body + issue_title = f"Manual translation review needed for {args.source_repo} PR #{args.pr_number}" + + issue_body = f"""## Manual Translation Review Required + +A pull request in the English lecture repository requires manual review for potential translation updates. + +### Source PR Details +- **Repository**: {args.source_repo} +- **PR Number**: #{args.pr_number} +- **Title**: {pr_details['title']} +- **Author**: @{pr_details['user']['login']} +- **URL**: {pr_details['html_url']} + +### Reason for Manual Review +{reason_text} + +### Action Required +Please review the changes in the source PR and determine if any translation updates are needed: + +1. Review the modified files in the [source PR]({pr_details['html_url']}) +2. Check if any of the modified lectures have been translated to Chinese +3. If translation updates are needed, create a new PR with the translated changes +4. If no updates are needed, close this issue + +### Files Modified +The following files were modified in the source PR: +""" + + # Get the list of modified files + headers = get_github_headers() + files_url = f"https://api.github.com/repos/{args.source_repo}/pulls/{args.pr_number}/files" + files_response = requests.get(files_url, headers=headers) + files_response.raise_for_status() + + files = files_response.json() + for file in files: + if file['filename'].startswith('lectures/') and file['filename'].endswith('.md'): + issue_body += f"\n- `{file['filename']}` ({file['status']})" + + issue_body += f""" + +### Review Checklist +- [ ] Reviewed source PR changes +- [ ] Checked which lectures are translated +- [ ] Determined if translation updates are needed +- [ ] Created translation PR or closed this issue + +@{os.environ.get('TRANSLATION_REVIEWER', 'nisha617')} please review this request. + +--- +*This issue was automatically created by the translation workflow.*""" + + # Create the issue + issue = create_issue( + args.target_repo, + issue_title, + issue_body, + labels=['translation', 'manual-review'], + assignees=[os.environ.get('TRANSLATION_REVIEWER', 'nisha617')] + ) + + print(f"Created issue: {issue['html_url']}") + + except Exception as e: + print(f"Error: {e}") + sys.exit(1) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/scripts/create_translation_pr.py b/scripts/create_translation_pr.py new file mode 100755 index 0000000..b8e4870 --- /dev/null +++ b/scripts/create_translation_pr.py @@ -0,0 +1,304 @@ +#!/usr/bin/env python3 +""" +Create translation PR for lecture files. + +This script: +1. Gets the changes from the source PR +2. Translates the content from English to Chinese +3. Creates a new PR in the target repository with the translated changes +""" + +import argparse +import json +import os +import sys +import requests +import base64 +import openai +from pathlib import Path +import re + + +def get_github_headers(): + """Get headers for GitHub API requests.""" + token = os.environ.get('GITHUB_TOKEN') + if not token: + raise ValueError("GITHUB_TOKEN environment variable is required") + + return { + 'Authorization': f'token {token}', + 'Accept': 'application/vnd.github.v3+json' + } + + +def setup_openai(): + """Setup OpenAI client for translation.""" + api_key = os.environ.get('OPENAI_API_KEY') + if not api_key: + raise ValueError("OPENAI_API_KEY environment variable is required") + + openai.api_key = api_key + + +def translate_content(content, source_lang='English', target_lang='Chinese'): + """Translate content using OpenAI.""" + try: + response = openai.chat.completions.create( + model="gpt-4", + messages=[ + { + "role": "system", + "content": f"""You are a professional translator specializing in academic and technical content, particularly economics and programming tutorials. + +Translate the following content from {source_lang} to {target_lang}. + +IMPORTANT RULES: +1. Preserve ALL markdown formatting exactly (headers, code blocks, math formulas, links, etc.) +2. Do NOT translate: + - Code snippets and programming code + - Mathematical formulas and equations + - Variable names and function names + - URLs and file paths + - Technical terms that are commonly used in English in Chinese academic contexts +3. DO translate: + - Regular text and explanations + - Comments in code (but preserve the comment syntax) + - Figure captions and labels +4. Maintain the exact same structure and formatting +5. Use simplified Chinese characters +6. Keep the same line breaks and spacing + +The content is from an economics/programming lecture series, so maintain appropriate academic tone.""" + }, + { + "role": "user", + "content": content + } + ], + temperature=0.3, + max_tokens=4000 + ) + + return response.choices[0].message.content + + except Exception as e: + print(f"Translation error: {e}") + raise + + +def get_file_content(repo, file_path, ref='main'): + """Get content of a file from GitHub repository.""" + headers = get_github_headers() + url = f"https://api.github.com/repos/{repo}/contents/{file_path}" + + params = {'ref': ref} + response = requests.get(url, headers=headers, params=params) + response.raise_for_status() + + content_data = response.json() + content = base64.b64decode(content_data['content']).decode('utf-8') + + return content, content_data['sha'] + + +def update_file_content(repo, file_path, content, commit_message, sha, branch='main'): + """Update file content in GitHub repository.""" + headers = get_github_headers() + url = f"https://api.github.com/repos/{repo}/contents/{file_path}" + + data = { + 'message': commit_message, + 'content': base64.b64encode(content.encode('utf-8')).decode('utf-8'), + 'sha': sha, + 'branch': branch + } + + response = requests.put(url, headers=headers, json=data) + response.raise_for_status() + + return response.json() + + +def create_branch(repo, branch_name, base_branch='main'): + """Create a new branch in the repository.""" + headers = get_github_headers() + + # Get the SHA of the base branch + ref_url = f"https://api.github.com/repos/{repo}/git/refs/heads/{base_branch}" + response = requests.get(ref_url, headers=headers) + response.raise_for_status() + base_sha = response.json()['object']['sha'] + + # Create new branch + create_url = f"https://api.github.com/repos/{repo}/git/refs" + data = { + 'ref': f'refs/heads/{branch_name}', + 'sha': base_sha + } + + response = requests.post(create_url, headers=headers, json=data) + if response.status_code == 422: + # Branch already exists + print(f"Branch {branch_name} already exists") + return + + response.raise_for_status() + + +def create_pull_request(repo, head_branch, base_branch, title, body): + """Create a pull request in the repository.""" + headers = get_github_headers() + url = f"https://api.github.com/repos/{repo}/pulls" + + data = { + 'title': title, + 'head': head_branch, + 'base': base_branch, + 'body': body + } + + response = requests.post(url, headers=headers, json=data) + response.raise_for_status() + + return response.json() + + +def apply_diff_translation(original_content, patch, translated_patch): + """Apply translated changes to the original content.""" + # This is a simplified approach - in a real implementation, + # you'd want more sophisticated diff parsing and application + + # For now, we'll use a simple line-by-line replacement approach + lines = original_content.split('\n') + + # Parse the patch to understand what changed + patch_lines = patch.split('\n') + + # This is a simplified implementation + # A more robust version would properly parse git diffs + + return translated_patch + + +def get_pr_details(repo, pr_number): + """Get details of a pull request.""" + headers = get_github_headers() + url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}" + + response = requests.get(url, headers=headers) + response.raise_for_status() + + return response.json() + + +def main(): + parser = argparse.ArgumentParser(description='Create translation PR for lecture files') + parser.add_argument('--source-repo', required=True, help='Source repository') + parser.add_argument('--pr-number', required=True, type=int, help='Source PR number') + parser.add_argument('--target-repo', required=True, help='Target repository') + parser.add_argument('--translated-files', required=True, help='JSON string of files to translate') + + args = parser.parse_args() + + try: + setup_openai() + + # Parse the translated files + translated_files = json.loads(args.translated_files) + + if not translated_files: + print("No files to process") + return + + # Get PR details for context + pr_details = get_pr_details(args.source_repo, args.pr_number) + + # Create a branch for the translation + branch_name = f"translation-update-pr-{args.pr_number}" + print(f"Creating branch: {branch_name}") + create_branch(args.target_repo, branch_name) + + updated_files = [] + + for file_info in translated_files: + print(f"Processing file: {file_info['lecture_name']}") + + source_file = file_info['filename'] + target_file = f"lectures/{file_info['lecture_name']}.md" + + try: + # Get current content from source repository + source_content, _ = get_file_content(args.source_repo, source_file) + + # Get current content from target repository + target_content, target_sha = get_file_content(args.target_repo, target_file) + + # For simplicity, translate the entire file content + # In a more sophisticated implementation, you'd translate only the diff + print(f"Translating content for {file_info['lecture_name']}") + translated_content = translate_content(source_content) + + # Update the file in the target repository + commit_message = f"Update {file_info['lecture_name']} translation from PR #{args.pr_number}" + + update_file_content( + args.target_repo, + target_file, + translated_content, + commit_message, + target_sha, + branch_name + ) + + updated_files.append(file_info['lecture_name']) + print(f"Updated {target_file}") + + except Exception as e: + print(f"Error processing {file_info['lecture_name']}: {e}") + continue + + if updated_files: + # Create pull request + pr_title = f"Translation update from {args.source_repo} PR #{args.pr_number}" + pr_body = f"""## Automated Translation Update + +This PR contains translation updates for the following lectures based on changes in [{args.source_repo} PR #{args.pr_number}]({pr_details['html_url']}): + +{chr(10).join(f'- {file}' for file in updated_files)} + +### Original PR Details +- **Title**: {pr_details['title']} +- **Author**: @{pr_details['user']['login']} +- **URL**: {pr_details['html_url']} + +### Review Instructions +Please review the translated content to ensure: +1. Technical accuracy is maintained +2. Chinese terminology is appropriate +3. Code examples and formulas are preserved correctly +4. Formatting and structure are consistent + +@{os.environ.get('TRANSLATION_REVIEWER', 'nisha617')} please review this translation. + +--- +*This PR was automatically generated by the translation workflow.*""" + + pr = create_pull_request( + args.target_repo, + branch_name, + 'main', + pr_title, + pr_body + ) + + print(f"Created PR: {pr['html_url']}") + else: + print("No files were successfully updated") + + except Exception as e: + print(f"Error: {e}") + sys.exit(1) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/scripts/demo_workflow.py b/scripts/demo_workflow.py new file mode 100755 index 0000000..099823a --- /dev/null +++ b/scripts/demo_workflow.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 +""" +Mock demonstration of the translation workflow. + +This script simulates the entire workflow without making real API calls, +demonstrating how the system would work in practice. +""" + +import json +import sys +from pathlib import Path + + +def mock_get_pr_files(source_repo, pr_number): + """Mock function to simulate getting PR files.""" + print(f"πŸ“₯ Getting modified files from {source_repo} PR #{pr_number}") + + # Simulate some modified lecture files + mock_files = [ + { + 'filename': 'lectures/linear_algebra.md', + 'lecture_name': 'linear_algebra', + 'status': 'modified', + 'patch': '@@ -10,7 +10,7 @@ This lecture covers...\n-old content\n+new content' + }, + { + 'filename': 'lectures/new_lecture.md', + 'lecture_name': 'new_lecture', + 'status': 'added', + 'patch': '+This is a completely new lecture' + }, + { + 'filename': 'lectures/prob_matrix.md', + 'lecture_name': 'prob_matrix', + 'status': 'modified', + 'patch': '@@ -50,3 +50,5 @@ Matrix operations...\n+Additional examples' + } + ] + + for file in mock_files: + print(f" πŸ“„ {file['filename']} ({file['status']})") + + return mock_files + + +def mock_get_translated_lectures(target_repo): + """Mock function to simulate getting translated lectures.""" + print(f"πŸ“š Getting translated lectures from {target_repo}") + + # Simulate translated lectures from the Chinese repository + translated_lectures = { + 'linear_algebra', 'prob_matrix', 'kalman', 'finite_markov', + 'lqcontrol', 'mccall_model', 'optgrowth' + } + + print(f" Found {len(translated_lectures)} translated lectures") + return translated_lectures + + +def mock_translation_status_check(): + """Demonstrate the translation status checking process.""" + print("πŸ” STEP 1: Checking Translation Status") + print("=" * 50) + + # Mock getting PR files + modified_files = mock_get_pr_files("QuantEcon/lecture-python.myst", 123) + + # Mock getting translated lectures + translated_lectures = mock_get_translated_lectures("QuantEcon/lecture-python.zh-cn") + + # Find matches + translated_files = [] + for file_info in modified_files: + if file_info['lecture_name'] in translated_lectures: + translated_files.append(file_info) + print(f" βœ… {file_info['lecture_name']} needs translation update") + else: + print(f" ⏸️ {file_info['lecture_name']} not yet translated") + + print(f"\nπŸ“Š Result: {len(translated_files)} files need translation updates") + return translated_files + + +def mock_translation_process(files): + """Demonstrate the translation process.""" + print("\n🌐 STEP 2: Translation Process") + print("=" * 50) + + for file_info in files: + print(f"πŸ“ Translating {file_info['lecture_name']}") + print(f" πŸ“₯ Source: {file_info['filename']}") + print(f" πŸ€– Using OpenAI GPT-4 for translation") + print(f" πŸ“€ Target: lectures/{file_info['lecture_name']}.md") + print(f" βœ… Translation completed") + print() + + +def mock_pr_creation(files): + """Demonstrate the PR creation process.""" + print("πŸ”€ STEP 3: Creating Translation PR") + print("=" * 50) + + branch_name = "translation-update-pr-123" + print(f"🌿 Creating branch: {branch_name}") + + for file_info in files: + print(f" πŸ“ Updating lectures/{file_info['lecture_name']}.md") + + print(f"\nπŸ“‹ Creating PR with:") + print(f" Title: Translation update from QuantEcon/lecture-python.myst PR #123") + print(f" Reviewer: @nisha617") + print(f" Files: {', '.join(f['lecture_name'] for f in files)}") + print(f" βœ… PR created successfully!") + + +def mock_fallback_scenario(): + """Demonstrate fallback issue creation.""" + print("\n⚠️ STEP 3 (Alternative): Fallback Issue Creation") + print("=" * 50) + + print("πŸ”„ Translation failed or no translated files found") + print("πŸ“ Creating issue for manual review:") + print(" Title: Manual translation review needed for QuantEcon/lecture-python.myst PR #123") + print(" Assignee: @nisha617") + print(" Labels: translation, manual-review") + print(" βœ… Issue created successfully!") + + +def main(): + """Run the complete workflow demonstration.""" + print("πŸš€ Translation Workflow Demonstration") + print("=" * 60) + print("This demo shows how the automated translation workflow operates") + print("=" * 60) + + # Step 1: Check translation status + translated_files = mock_translation_status_check() + + if translated_files: + # Step 2: Translate content + mock_translation_process(translated_files) + + # Step 3: Create PR + mock_pr_creation(translated_files) + else: + # Alternative: Create fallback issue + mock_fallback_scenario() + + print("\nπŸŽ‰ Workflow Complete!") + print("\nThis demonstrates the complete automated workflow:") + print("1. βœ… Detect modified lecture files from source PR") + print("2. βœ… Check which files have been translated") + print("3. βœ… Translate content using OpenAI") + print("4. βœ… Create PR with translated changes") + print("5. βœ… Tag reviewer for review") + print("6. βœ… Fallback to issue creation if needed") + + print("\nπŸ“š Next steps:") + print("- Configure GITHUB_TOKEN and OPENAI_API_KEY secrets") + print("- Set up repository dispatch trigger in source repo") + print("- Test with a real PR") + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/scripts/example_trigger.py b/scripts/example_trigger.py new file mode 100644 index 0000000..f1d98a7 --- /dev/null +++ b/scripts/example_trigger.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +""" +Example workflow trigger for testing. + +This script simulates triggering the workflow with sample data. +""" + +import json +import requests +import os +import argparse + + +def trigger_workflow_dispatch(repo, token, inputs): + """Trigger workflow dispatch event.""" + headers = { + 'Authorization': f'token {token}', + 'Accept': 'application/vnd.github.v3+json' + } + + url = f"https://api.github.com/repos/{repo}/actions/workflows/lecture-translation-migration.yml/dispatches" + + data = { + 'ref': 'main', + 'inputs': inputs + } + + response = requests.post(url, headers=headers, json=data) + return response + + +def main(): + parser = argparse.ArgumentParser(description='Test workflow trigger') + parser.add_argument('--token', help='GitHub token (or use GITHUB_TOKEN env var)') + parser.add_argument('--repo', default='QuantEcon/meta', help='Target repository') + parser.add_argument('--source-repo', default='QuantEcon/lecture-python.myst', help='Source repository') + parser.add_argument('--pr-number', type=int, default=1, help='PR number to test with') + parser.add_argument('--target-repo', default='QuantEcon/lecture-python.zh-cn', help='Target repository') + + args = parser.parse_args() + + token = args.token or os.environ.get('GITHUB_TOKEN') + if not token: + print("Error: GitHub token required (use --token or GITHUB_TOKEN env var)") + return 1 + + inputs = { + 'source_repo': args.source_repo, + 'pr_number': str(args.pr_number), + 'target_repo': args.target_repo + } + + print(f"Triggering workflow in {args.repo}") + print(f"Inputs: {json.dumps(inputs, indent=2)}") + + try: + response = trigger_workflow_dispatch(args.repo, token, inputs) + + if response.status_code == 204: + print("βœ… Workflow triggered successfully!") + else: + print(f"❌ Failed to trigger workflow: {response.status_code}") + print(response.text) + return 1 + + except Exception as e: + print(f"Error: {e}") + return 1 + + return 0 + + +if __name__ == '__main__': + import sys + sys.exit(main()) \ No newline at end of file diff --git a/scripts/test_workflow.py b/scripts/test_workflow.py new file mode 100755 index 0000000..e52b472 --- /dev/null +++ b/scripts/test_workflow.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +""" +Test script for the translation workflow. + +This script tests the basic functionality without making actual API calls. +""" + +import json +import sys +import os +from pathlib import Path + +# Add the scripts directory to the path +sys.path.insert(0, str(Path(__file__).parent)) + +def test_file_structure(): + """Test that all required files exist.""" + required_files = [ + '.github/workflows/lecture-translation-migration.yml', + 'scripts/check_translation_status.py', + 'scripts/create_translation_pr.py', + 'scripts/create_fallback_issue.py', + 'TRANSLATION_WORKFLOW.md' + ] + + missing_files = [] + for file_path in required_files: + if not Path(file_path).exists(): + missing_files.append(file_path) + + if missing_files: + print(f"❌ Missing files: {missing_files}") + return False + else: + print("βœ… All required files exist") + return True + + +def test_script_syntax(): + """Test that all Python scripts have valid syntax.""" + script_files = [ + 'scripts/check_translation_status.py', + 'scripts/create_translation_pr.py', + 'scripts/create_fallback_issue.py' + ] + + all_valid = True + for script_path in script_files: + try: + with open(script_path, 'r') as f: + compile(f.read(), script_path, 'exec') + print(f"βœ… {script_path} has valid syntax") + except SyntaxError as e: + print(f"❌ {script_path} has syntax error: {e}") + all_valid = False + + return all_valid + + +def test_workflow_syntax(): + """Test that the GitHub workflow file has valid YAML syntax.""" + try: + import yaml + with open('.github/workflows/lecture-translation-migration.yml', 'r') as f: + yaml.safe_load(f) + print("βœ… Workflow YAML is valid") + return True + except yaml.YAMLError as e: + print(f"❌ Workflow YAML is invalid: {e}") + return False + except ImportError: + print("⚠️ PyYAML not available, skipping YAML validation") + return True + + +def main(): + """Run all tests.""" + print("Testing Translation Workflow Setup") + print("=" * 40) + + tests = [ + test_file_structure, + test_script_syntax, + test_workflow_syntax + ] + + results = [] + for test in tests: + try: + result = test() + results.append(result) + except Exception as e: + print(f"❌ Test {test.__name__} failed with error: {e}") + results.append(False) + print() + + if all(results): + print("πŸŽ‰ All tests passed! The workflow is ready to use.") + return 0 + else: + print("❌ Some tests failed. Please fix the issues before using the workflow.") + return 1 + + +if __name__ == '__main__': + sys.exit(main()) \ No newline at end of file diff --git a/scripts/validate_setup.py b/scripts/validate_setup.py new file mode 100755 index 0000000..3f05cb5 --- /dev/null +++ b/scripts/validate_setup.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 +""" +Validation script for the translation workflow setup. + +This script validates that all requirements are met and provides setup guidance. +""" + +import os +import sys +import subprocess +from pathlib import Path + + +def check_file_permissions(): + """Check that script files have execute permissions.""" + script_files = [ + 'scripts/check_translation_status.py', + 'scripts/create_translation_pr.py', + 'scripts/create_fallback_issue.py', + 'scripts/test_workflow.py', + 'scripts/demo_workflow.py' + ] + + issues = [] + for script in script_files: + path = Path(script) + if path.exists(): + if not os.access(path, os.X_OK): + issues.append(f"{script} is not executable") + else: + issues.append(f"{script} does not exist") + + if issues: + print("❌ File permission issues:") + for issue in issues: + print(f" - {issue}") + print("\nFix with: chmod +x scripts/*.py") + return False + else: + print("βœ… All script files have correct permissions") + return True + + +def check_python_dependencies(): + """Check if required Python packages are available.""" + required_packages = [ + 'requests', + 'PyYAML', + 'openai', + 'gitpython' + ] + + missing_packages = [] + for package in required_packages: + try: + __import__(package.lower().replace('-', '_')) + except ImportError: + missing_packages.append(package) + + if missing_packages: + print("⚠️ Missing Python packages (will be installed in workflow):") + for package in missing_packages: + print(f" - {package}") + print("\nThese will be automatically installed in the GitHub Actions workflow.") + return True # Not a failure for our validation + else: + print("βœ… All required Python packages are available") + return True + + +def check_secrets_guidance(): + """Provide guidance on required secrets.""" + print("πŸ” Required Secrets Setup:") + print(" 1. GITHUB_TOKEN:") + print(" - Go to GitHub Settings > Developer settings > Personal access tokens") + print(" - Create token with 'repo' scope") + print(" - Add as repository secret") + print("") + print(" 2. OPENAI_API_KEY:") + print(" - Get API key from https://platform.openai.com/api-keys") + print(" - Add as repository secret") + print("") + print(" 3. Repository permissions:") + print(" - Token needs read access to source repository") + print(" - Token needs write access to target repository") + print("") + return True + + +def check_workflow_syntax(): + """Validate workflow YAML syntax.""" + try: + import yaml + workflow_file = '.github/workflows/lecture-translation-migration.yml' + with open(workflow_file, 'r') as f: + yaml.safe_load(f) + print("βœ… GitHub Actions workflow syntax is valid") + return True + except Exception as e: + print(f"❌ Workflow syntax error: {e}") + return False + + +def check_integration_setup(): + """Provide integration setup guidance.""" + print("πŸ”— Integration Setup:") + print(" 1. In the source repository (lecture-python.myst):") + print(" - Add the workflow from INTEGRATION_EXAMPLE.md") + print(" - Configure META_REPO_TOKEN secret") + print("") + print(" 2. Alternative: Set up webhook") + print(" - Repository Settings > Webhooks") + print(" - Point to GitHub API dispatch endpoint") + print("") + return True + + +def main(): + """Run all validation checks.""" + print("πŸ”§ Translation Workflow Validation") + print("=" * 50) + + checks = [ + ("File Permissions", check_file_permissions), + ("Python Dependencies", check_python_dependencies), + ("Workflow Syntax", check_workflow_syntax), + ("Secrets Guidance", check_secrets_guidance), + ("Integration Guidance", check_integration_setup) + ] + + all_passed = True + for name, check_func in checks: + print(f"\n{name}:") + print("-" * len(name)) + try: + result = check_func() + if not result: + all_passed = False + except Exception as e: + print(f"❌ Error in {name}: {e}") + all_passed = False + + print("\n" + "=" * 50) + if all_passed: + print("πŸŽ‰ Validation complete! Workflow is ready for deployment.") + print("\nπŸ“‹ Next steps:") + print("1. Configure required secrets in repository settings") + print("2. Set up integration in source repository") + print("3. Test with a real PR") + print("4. Monitor workflow execution in Actions tab") + else: + print("❌ Some issues need to be resolved before deployment.") + + return 0 if all_passed else 1 + + +if __name__ == '__main__': + sys.exit(main()) \ No newline at end of file