diff --git a/.github/workflows/transcreation.yml.example b/.github/workflows/transcreation.yml.example new file mode 100644 index 0000000..307b00c --- /dev/null +++ b/.github/workflows/transcreation.yml.example @@ -0,0 +1,347 @@ +name: Transcreation + +# Translates missing/changed keys across all locales and opens a PR. +# Uses the transcreation-exposed Claude Code skill. +# +# Trigger: manual dispatch or schedule (weekly). +# The QC workflow (translation-qc.yml) runs automatically on the resulting PR. + +on: + schedule: + - cron: '0 6 * * 1' # Monday 06:00 UTC + workflow_dispatch: + inputs: + locale: + description: 'Target locale (blank = all missing)' + type: choice + options: + - all + - de + - es + - pt + - fr + default: all + dry_run: + description: 'Dry run - diff only, no PR' + type: boolean + default: false + +concurrency: + group: transcreation + cancel-in-progress: false + +permissions: + contents: write + pull-requests: write + issues: write + +jobs: + diff: + name: Detect missing and stale translations + runs-on: ubuntu-latest + outputs: + has_work: ${{ steps.diff.outputs.has_work }} + summary: ${{ steps.diff.outputs.summary }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Diff locales against English source + id: diff + run: | + python3 << 'PYEOF' + import json, os, subprocess + + locale_filter = os.environ.get("LOCALE_FILTER", "all") + locales = ["de", "es", "pt", "fr"] + if locale_filter != "all": + locales = [locale_filter] + + # Current English source + en = json.load(open("public/locales/en/common.json")) + en_keys = set(en.keys()) + + def get_en_at_last_sync(lang): + """Get English source values at the time the locale was last translated. + + Finds the most recent commit that touched the target locale file, + then reads the English source at that commit. Keys where the English + value changed since that commit have stale translations. + + Limitation: if someone edits the target locale file for reasons + other than a sync (e.g., fixing a typo), this resets the baseline + and may miss EN changes that happened before that edit.""" + target_path = f"public/locales/{lang}/common.json" + en_path = "public/locales/en/common.json" + try: + result = subprocess.run( + ["git", "log", "-1", "--format=%H", "--", target_path], + capture_output=True, text=True, check=True + ) + last_sync_sha = result.stdout.strip() + if not last_sync_sha: + return None + + result = subprocess.run( + ["git", "show", f"{last_sync_sha}:{en_path}"], + capture_output=True, text=True, check=True + ) + return json.loads(result.stdout) + except (subprocess.CalledProcessError, json.JSONDecodeError): + return None + + total_work = 0 + summary_lines = [] + diff_details = {} + + for lang in locales: + path = f"public/locales/{lang}/common.json" + if not os.path.exists(path): + summary_lines.append(f"{lang}: new locale (all {len(en_keys)} keys)") + total_work += len(en_keys) + diff_details[lang] = {"new": list(en_keys), "stale": [], "orphaned": []} + continue + + target = json.load(open(path)) + target_keys = set(target.keys()) + + missing = sorted(en_keys - target_keys) + orphaned = sorted(target_keys - en_keys) + + stale = [] + en_at_sync = get_en_at_last_sync(lang) + if en_at_sync: + for key in sorted(en_keys & target_keys): + old_val = en_at_sync.get(key) + new_val = en.get(key) + if old_val is not None and old_val != new_val: + stale.append(key) + + parts = [] + if missing: + parts.append(f"{len(missing)} missing") + if stale: + parts.append(f"{len(stale)} stale") + if orphaned: + parts.append(f"{len(orphaned)} orphaned") + if not parts: + parts.append("up to date") + + summary_lines.append(f"{lang}: {', '.join(parts)}") + total_work += len(missing) + len(stale) + diff_details[lang] = {"new": missing, "stale": stale, "orphaned": orphaned} + + summary = "; ".join(summary_lines) + has_work = "true" if total_work > 0 else "false" + + with open("/tmp/i18n-diff.json", "w") as f: + json.dump(diff_details, f, indent=2) + + with open(os.environ["GITHUB_OUTPUT"], "a") as f: + f.write(f"has_work={has_work}\n") + f.write(f"summary={summary}\n") + + print(f"has_work={has_work}") + print(f"summary={summary}") + for lang, d in diff_details.items(): + if d["new"]: + print(f" {lang} new: {d['new'][:5]}{'...' if len(d['new']) > 5 else ''}") + if d["stale"]: + print(f" {lang} stale: {d['stale'][:5]}{'...' if len(d['stale']) > 5 else ''}") + if d["orphaned"]: + print(f" {lang} orphaned: {d['orphaned'][:5]}{'...' if len(d['orphaned']) > 5 else ''}") + PYEOF + env: + LOCALE_FILTER: ${{ inputs.locale || 'all' }} + + - name: Upload diff details + if: steps.diff.outputs.has_work == 'true' + uses: actions/upload-artifact@v4 + with: + name: i18n-diff + path: /tmp/i18n-diff.json + retention-days: 1 + + translate: + name: Translate missing keys + runs-on: ubuntu-latest + needs: diff + # On schedule trigger, inputs is empty so dry_run is null (always runs). + # Dry run is only available via manual workflow_dispatch. + if: needs.diff.outputs.has_work == 'true' && inputs.dry_run != true + steps: + - uses: actions/checkout@v4 + + - name: Download diff details + uses: actions/download-artifact@v4 + with: + name: i18n-diff + path: /tmp/ + + - name: Set up Claude Code skills symlink + run: mkdir -p .claude && ln -sf ../tools/skills .claude/skills + + - name: Build translation prompt + id: prompt + run: | + python3 << 'PYEOF' + import json, os + + diff = json.load(open("/tmp/i18n-diff.json")) + en = json.load(open("public/locales/en/common.json")) + + sections = [] + for lang, d in diff.items(): + if not d["new"] and not d["stale"]: + continue + + parts = [] + + if d["new"]: + keys_with_values = {k: en[k] for k in d["new"]} + parts.append(f"NEW KEYS to translate ({len(d['new'])}):\n{json.dumps(keys_with_values, indent=2, ensure_ascii=False)}") + + if d["stale"]: + keys_with_values = {k: en[k] for k in d["stale"]} + parts.append(f"STALE KEYS to re-translate (English source changed) ({len(d['stale'])}):\n{json.dumps(keys_with_values, indent=2, ensure_ascii=False)}") + + if d["orphaned"]: + parts.append(f"ORPHANED KEYS (flag only, do not delete): {d['orphaned']}") + + sections.append(f"### Locale: {lang}\nTarget file: public/locales/{lang}/common.json\n\n" + "\n\n".join(parts)) + + prompt = "Use the transcreation-exposed skill.\n\n" + prompt += "The diff has already been computed. Translate ONLY the keys listed below - do not re-translate unchanged keys.\n\n" + prompt += "For NEW keys: add translations to the target locale file.\n" + prompt += "For STALE keys: replace the existing translation with a fresh one (the English source text changed).\n" + prompt += "For ORPHANED keys: do not delete them. Note them in your output.\n\n" + prompt += "Read each target locale file first to maintain consistency with existing translations.\n" + prompt += "Write updated JSON files back to their locale paths.\n" + prompt += "Follow all skill rules: no we/us/our, no em dashes, preserve {{placeholders}}, match brevity.\n\n" + prompt += "\n\n".join(sections) + + with open(os.environ["GITHUB_OUTPUT"], "a") as f: + # Use multiline output for the prompt + f.write("prompt<> "$GITHUB_OUTPUT" + else + echo "changed=true" >> "$GITHUB_OUTPUT" + fi + + - name: Create PR + if: steps.changes.outputs.changed == 'true' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DIFF_SUMMARY: ${{ needs.diff.outputs.summary }} + run: | + BRANCH="i18n/sync-translations-$(date +%Y%m%d-%H%M)" + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git checkout -b "$BRANCH" + git add public/locales/ + + git commit -m "$(cat < /tmp/pr-body.md << 'BODY_EOF' + ## Summary + + Automated translation sync using the transcreation-exposed Claude Code skill. + + BODY_EOF + + printf '**Diff:** %s\n\n' "$DIFF_SUMMARY" >> /tmp/pr-body.md + + cat >> /tmp/pr-body.md << 'BODY_EOF' + ## What was done + + - Diffed public/locales/en/common.json against all target locales + - Translated new keys (added to EN since last sync) + - Re-translated stale keys (EN source text changed since last sync) + - Unchanged translations were not modified + - Orphaned keys (removed from EN) were left in place for manual review + + ## Review checklist + + The translation-qc workflow runs on this PR and posts a scored review. + + - [ ] QC workflow passed (check PR comments) + - [ ] Spot-check 5 random strings per locale + - [ ] No we/us/our in any translation + - [ ] No em dashes in any translation + - [ ] All placeholders preserved + - [ ] Orphaned keys reviewed (remove or remap as needed) + BODY_EOF + + gh pr create \ + --title "i18n: sync translations" \ + --body-file /tmp/pr-body.md \ + --label "i18n" diff --git a/.github/workflows/translation-qc.yml.example b/.github/workflows/translation-qc.yml.example new file mode 100644 index 0000000..88e4b82 --- /dev/null +++ b/.github/workflows/translation-qc.yml.example @@ -0,0 +1,227 @@ +name: Translation QC + +# Runs quality review on translation PRs and posts a scored report as a comment. +# Uses the qc-review Claude Code skill. +# +# Trigger: PRs that touch locale files, or manual dispatch on any PR. + +on: + pull_request: + paths: + - 'public/locales/*/common.json' + workflow_dispatch: + inputs: + pr_number: + description: 'PR number to review' + type: number + required: true + +concurrency: + group: translation-qc-${{ github.event.pull_request.number || inputs.pr_number }} + cancel-in-progress: true + +permissions: + contents: read + pull-requests: write + issues: write + +jobs: + detect: + name: Detect changed locales + runs-on: ubuntu-latest + outputs: + locales: ${{ steps.detect.outputs.locales }} + has_changes: ${{ steps.detect.outputs.has_changes }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Determine PR ref + id: pr + run: | + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + echo "ref=${{ inputs.pr_number }}" >> "$GITHUB_OUTPUT" + else + echo "ref=${{ github.event.pull_request.number }}" >> "$GITHUB_OUTPUT" + fi + + - name: Detect changed locale files + id: detect + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + FILES=$(gh pr diff "${{ steps.pr.outputs.ref }}" --name-only | grep '^public/locales/.*/common.json$' || true) + + if [ -z "$FILES" ]; then + echo "has_changes=false" >> "$GITHUB_OUTPUT" + echo "locales=[]" >> "$GITHUB_OUTPUT" + echo "No locale files changed in this PR" + exit 0 + fi + + LOCALES=$(echo "$FILES" | sed 's|public/locales/\(.*\)/common.json|\1|' | sort -u | grep -v '^en$' || true) + + if [ -z "$LOCALES" ]; then + echo "has_changes=false" >> "$GITHUB_OUTPUT" + echo "locales=[]" >> "$GITHUB_OUTPUT" + echo "Only English source changed, no translations to review" + exit 0 + fi + + JSON=$(echo "$LOCALES" | jq -R . | jq -s -c .) + echo "locales=$JSON" >> "$GITHUB_OUTPUT" + echo "has_changes=true" >> "$GITHUB_OUTPUT" + echo "Changed locales: $JSON" + + review: + name: QC review (${{ matrix.locale }}) + runs-on: ubuntu-latest + needs: detect + if: needs.detect.outputs.has_changes == 'true' + strategy: + matrix: + locale: ${{ fromJson(needs.detect.outputs.locales) }} + fail-fast: false + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha || github.sha }} + fetch-depth: 0 + + - name: Extract changed keys for this locale + id: scope + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + PR_NUM="${{ github.event.pull_request.number || inputs.pr_number }}" + LOCALE="${{ matrix.locale }}" + TARGET="public/locales/${LOCALE}/common.json" + + CHANGED_KEYS=$(gh pr diff "$PR_NUM" -- "$TARGET" \ + | grep '^+' | grep -v '^+++' \ + | sed -n 's/^+[[:space:]]*"\([^"]*\)".*/\1/p' \ + | sort -u) + + KEY_COUNT=$(echo "$CHANGED_KEYS" | grep -c . || true) + + if [ "$KEY_COUNT" -gt 200 ]; then + echo "scope=full" >> "$GITHUB_OUTPUT" + echo "scope_note=This is a large change ($KEY_COUNT keys). Review the full file." >> "$GITHUB_OUTPUT" + echo "Large change ($KEY_COUNT keys) - full file review" + elif [ "$KEY_COUNT" -gt 0 ]; then + KEYS_CSV=$(echo "$CHANGED_KEYS" | tr '\n' ', ' | sed 's/,$//') + echo "scope=scoped" >> "$GITHUB_OUTPUT" + echo "scope_note=Review ONLY these changed keys (not the full file): ${KEYS_CSV}" >> "$GITHUB_OUTPUT" + echo "Scoped review: $KEY_COUNT changed keys" + else + echo "scope=none" >> "$GITHUB_OUTPUT" + echo "No translatable key changes detected" + fi + + - name: Set up Claude Code skills symlink + if: steps.scope.outputs.scope != 'none' + run: mkdir -p .claude && ln -sf ../tools/skills .claude/skills + + - name: Run QC review + if: steps.scope.outputs.scope != 'none' + uses: anthropics/claude-code-action@v1 + timeout-minutes: 20 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + prompt: | + Use the qc-review skill. Review the ${{ matrix.locale }} translation at public/locales/${{ matrix.locale }}/common.json against the English source at public/locales/en/common.json. Language pair: EN -> ${{ matrix.locale }}. Content type: UI strings. + + ${{ steps.scope.outputs.scope_note }} + + Produce the full scored report in the exact format specified by the skill (Part 5). Focus on: accuracy, fluency, terminology consistency, voice (no we/us/our, no em dashes), and completeness (all placeholders preserved). + + Write the report to /tmp/qc-report-${{ matrix.locale }}.md. Output only the report, no other commentary. + claude_args: | + --allowedTools "Read,Glob,Grep,Write" + + - name: Upload review report + if: steps.scope.outputs.scope != 'none' + uses: actions/upload-artifact@v4 + with: + name: qc-report-${{ matrix.locale }} + path: /tmp/qc-report-${{ matrix.locale }}.md + retention-days: 30 + + comment: + name: Post QC results + runs-on: ubuntu-latest + needs: [detect, review] + if: always() && needs.detect.outputs.has_changes == 'true' && needs.review.result != 'skipped' + steps: + - name: Download all review reports + uses: actions/download-artifact@v4 + with: + pattern: qc-report-* + path: /tmp/reports/ + merge-multiple: false + continue-on-error: true + + - name: Check for reports + id: check + run: | + if find /tmp/reports -name '*.md' 2>/dev/null | grep -q .; then + echo "has_reports=true" >> "$GITHUB_OUTPUT" + else + echo "has_reports=false" >> "$GITHUB_OUTPUT" + echo "No QC reports to post" + fi + + - name: Determine PR number + if: steps.check.outputs.has_reports == 'true' + id: pr + run: | + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + echo "number=${{ inputs.pr_number }}" >> "$GITHUB_OUTPUT" + else + echo "number=${{ github.event.pull_request.number }}" >> "$GITHUB_OUTPUT" + fi + + - name: Assemble and post comment + if: steps.check.outputs.has_reports == 'true' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + PR_NUMBER="${{ steps.pr.outputs.number }}" + + { + echo "## Translation QC Report" + echo "" + echo "Automated quality review using the \`qc-review\` Claude Code skill." + echo "" + + for report in /tmp/reports/qc-report-*/qc-report-*.md; do + if [ -f "$report" ]; then + LOCALE=$(basename "$report" .md | sed 's/qc-report-//') + echo "---" + echo "" + echo "### Locale: \`$LOCALE\`" + echo "" + cat "$report" + echo "" + fi + done + + echo "---" + echo "" + echo "*Generated with [Claude Code](https://claude.ai/code) using \`qc-review\` skill.*" + } > /tmp/combined-report.md + + EXISTING=$(gh api "repos/${{ github.repository }}/issues/${PR_NUMBER}/comments" \ + --jq '.[] | select(.body | startswith("## Translation QC Report")) | .id' \ + | head -1) + + if [ -n "$EXISTING" ]; then + gh api "repos/${{ github.repository }}/issues/comments/${EXISTING}" \ + -X PATCH \ + -F body=@/tmp/combined-report.md + echo "Updated existing QC comment" + else + gh pr comment "$PR_NUMBER" --body-file /tmp/combined-report.md + echo "Posted new QC comment" + fi diff --git a/tools/skills/README.md b/tools/skills/README.md new file mode 100644 index 0000000..9fba917 --- /dev/null +++ b/tools/skills/README.md @@ -0,0 +1,98 @@ +# Transcreation Skills + +Three Claude Code skills for translating and reviewing am-i.exposed content. These skills automate the translation workflow - from initial transcreation through quality control - while enforcing the project's voice rules and Bitcoin privacy terminology. + +## Setup + +Skills live in `tools/skills/` (tracked in git) and are symlinked into `.claude/skills/` (gitignored) so Claude Code can discover them. + +**First-time setup** (run once after cloning): + +```sh +mkdir -p .claude && ln -sf ../tools/skills .claude/skills +``` + +After this, Claude Code will auto-discover all three skills. + +## Skills + +### `transcreation-exposed` + +Transcreation for all am-i.exposed content - UI strings, finding descriptions, glossary definitions, FAQ, guide, and about page. Handles all supported language pairs (EN, DE, ES, PT, FR). + +Contains: + +- am-i.exposed voice definition (inline) - no "we/us/our", no em dashes +- 5-step translation process (classify, understand, extract intent, write, stop-slop) +- UI string rules for common.json translations (i18next double-brace syntax) +- Bitcoin/privacy terminology (do-not-translate list + per-language notes) +- Subagent strategy for large translation jobs +- Delivery checklists + +### `qc-review` + +Quality control and review framework for translated content. + +Contains: + +- 5-dimension scoring system (accuracy, fluency, terminology, voice, completeness) +- Issue classification (4 severity levels, 5 categories) +- 6-step review process +- Content-type checklists (UI strings, glossary/FAQ, about/marketing) +- Standardized report format +- Subagent strategy for large reviews + +### `stop-slop` + +AI writing pattern removal. Mandatory final pass for all translations. Catches filler phrases, passive voice, parallel structures, hedges, and other AI tells. + +## How They Work Together + +```text +stop-slop ++-- used by transcreation-exposed (Step 4 of every translation) ++-- used by qc-review (voice pass detects AI patterns) + +transcreation-exposed ++-- voice definition embedded inline ++-- references public/locales/en/common.json as source of truth ++-- referenced by qc-review when reviewing translations + +qc-review ++-- loads transcreation-exposed for voice and terminology reference +``` + +## Typical Workflows + +### Translate UI strings + +1. Invoke `transcreation-exposed` (it will pull in `stop-slop`) +2. Provide the source strings or point to `public/locales/en/common.json` +3. Specify target locale (e.g., de, es, pt, fr) +4. The skill guides the full process: classify, understand, translate, stop-slop + +### Translate long-form content + +Same as above. For content over 5,000 words or with 5+ distinct sections, the skill spawns subagents automatically. + +### Review existing translations + +1. Invoke `qc-review` (it will pull in `transcreation-exposed` and `stop-slop`) +2. Provide source and target files +3. Specify the language pair +4. The skill runs a 3-pass review and produces a scored report + +### Large-scale audit + +For the full common.json (~2061 keys), `qc-review` chunks by namespace prefix and spawns subagents. Each subagent reviews its namespace and returns a scored report. The orchestrator merges results and runs cross-namespace consistency checks. + +## Key References + +- **Source strings:** `public/locales/en/common.json` (2061 keys) +- **Existing translations:** `public/locales/{lang}/common.json` (de, es, pt, fr) +- **Locale config:** `src/lib/i18n/config.ts` +- **Placeholder syntax:** i18next double-brace: `{{variable}}` + +## Current Locale Status + +Run the transcreation workflow's diff job to get up-to-date key counts per locale. Source of truth: `public/locales/en/common.json`. diff --git a/tools/skills/qc-review/SKILL.md b/tools/skills/qc-review/SKILL.md new file mode 100644 index 0000000..a417252 --- /dev/null +++ b/tools/skills/qc-review/SKILL.md @@ -0,0 +1,325 @@ +--- +name: qc-review +description: "Quality control and review skill for translated content. Use when reviewing, auditing, or scoring translations for am-i.exposed. Evaluates accuracy, fluency, terminology, voice, and completeness. Produces structured review reports. Trigger on: review translation, QC, quality check, audit translation, proofread, translation review." +--- + +# Translation Quality Review + +Systematic QC framework for translated content. + +**Project:** am-i.exposed - client-side Bitcoin privacy analysis tool. Transcreation skill: `transcreation-exposed`. Source i18n files: `public/locales/`. No terminology reference files - use the do-not-translate list and per-language notes in the transcreation skill (Part 4). + +Uses the `stop-slop` skill for AI pattern removal. + +--- + +## Part 1: Review Dimensions & Scoring + +Five dimensions. Each scored 1-10. No rounding, no averaging across dimensions. + +| Dimension | Question | 10 looks like | 1 looks like | +|---|---|---|---| +| Accuracy | Same meaning as source? | Every semantic unit preserved. No additions, no omissions. | Meaning distorted or key information missing. | +| Fluency | Reads naturally in target language? | Native speaker for this audience wrote it. | Machine-translation artifacts. Unnatural phrasing. | +| Terminology | Domain terms correct and consistent? | Every Bitcoin/privacy term uses the established standard. Consistent throughout. | Terms wrong or inconsistent across the file. | +| Voice | Matches the project's voice? | Indistinguishable from content the project would publish. No "we/us/our". | Wrong register, tone, or uses first-person plural. | +| Completeness | Everything translated? | No untranslated strings, no missing segments, all placeholders intact. | Significant gaps, untranslated segments, broken placeholders. | + +**Accuracy** - compare source and target segment by segment. Look for semantic shifts, false friends, additions (content in target not in source), and omissions (content in source not in target). + +**Fluency** - read the target text without looking at the source. If a sentence makes you pause, it fails. Common failures: calques (source-language syntax leaking into target), over-literal word order, unnatural collocations. + +**Terminology** - check every Bitcoin/privacy term against the do-not-translate list and per-language terminology notes in the transcreation skill (Part 4). Verify consistency: same English term = same target term everywhere in the file. + +**Voice** - does the translation sound like the project? am-i.exposed is direct, technical, privacy-focused. **Critical:** no first-person plural ("we", "us", "our") allowed anywhere. No em dashes. Common failure: AI-generated translations flatten voice into generic corporate neutral or introduce first-person plural. + +**Completeness** - structural integrity. Every source key has a target. Every `{{variable}}` placeholder survives (double-brace i18next syntax). Every HTML tag is preserved. Common failure: placeholders silently deleted during translation. + +### Scoring Bands + +| Range | Verdict | Action | +|---|---|---| +| 45-50 | Ship-ready | No changes needed. Minor style preferences don't count against it. | +| 38-44 | Minor issues | Fix flagged items and ship. No structural problems. | +| 30-37 | Significant issues | Revise flagged areas and re-review. Patterns suggest systemic problems. | +| Below 30 | Reject | Retranslate. Fundamental accuracy, fluency, or terminology problems. | + +The total is a guide, not a gate. A single CRIT issue at 42/50 still blocks shipping. + +--- + +## Part 2: Issue Classification + +Two systems work together: severity and category. Every issue gets both: e.g., "CRIT/TERM" or "MIN/FLUENCY". + +### Severity Levels + +| Severity | Label | Definition | Action | Example | +|---|---|---|---|---| +| Critical | CRIT | Meaning changed, Bitcoin term wrong, placeholder broken, content missing, first-person plural used | Must fix before shipping | "UTXO" translated; `{{count}}` deleted; "We analyze your..." | +| Major | MAJ | Noticeable quality problem affecting user experience | Should fix before shipping | Wrong register; sentence doesn't parse; key term inconsistent | +| Minor | MIN | Correct but improvable; non-preferred synonym | Fix if convenient; acceptable to ship | Slightly awkward phrasing; word order preference; verbose | +| Style | STY | Voice or style preference, debatable | Note for translator's awareness | Could be shorter; rhythm slightly off | + +### Issue Categories + +**ACCURACY** +- Mistranslation - target says something different from source +- Addition - content in target not present in source +- Omission - content in source missing from target +- False friend - word that looks similar across languages but means something different +- Semantic shift - subtle meaning drift that changes the implication + +**FLUENCY** +- Unnatural phrasing - grammatically correct but no native speaker would write it that way +- Grammar error - subject-verb agreement, case, tense +- Punctuation error - misplaced commas, wrong quotation style for locale. **Exception:** do not flag technically-correct-but-unnecessary punctuation in short UI strings when the string reads naturally without it. +- Spelling error - typos, wrong diacriticals +- Calque - structure borrowed from source language + +**TERMINOLOGY** +- Wrong term - incorrect translation of a Bitcoin/privacy term +- Inconsistent term - same source term translated differently in different locations +- Untranslated term - left in English when it should be translated +- Over-translated term - translated into target when it should stay in English (Bitcoin terms: UTXO, CoinJoin, PayJoin, Taproot, etc.) + +**VOICE** +- Wrong register - too formal, too informal, too academic, too casual +- First-person plural - any use of "we", "us", "our" or target-language equivalents +- Em dash used - any em dash in any form +- AI slop pattern - flag the specific pattern from `stop-slop` (name it) +- Prohibited phrase - crypto-bro language, surveillance FUD, privacy buzzwords +- Generic flattening - distinctive source voice reduced to corporate neutral + +**COMPLETENESS** +- Missing translation - empty target for a source key +- Broken placeholder - `{{variable}}` altered, deleted, or malformed +- Broken HTML tag - unclosed tag, deleted tag, reordered nesting +- Untranslated segment - source-language text remaining in target file +- Encoding error - mojibake, wrong character set + +--- + +## Part 3: Review Process + +Six steps. Follow in order. + +**Single agent vs. subagents:** For small files (under ~100 keys or 5,000 words), one agent runs all six steps sequentially. For the full `common.json` (~2061 keys), the orchestrator handles Steps 1-2 and the file-reading strategy, then each subagent runs Steps 3-5 on its namespace chunk. The orchestrator runs Step 6 on the aggregated results. + +### Step 1: Identify the project and load context + +Load the transcreation skill (`transcreation-exposed`) for voice definition and terminology. Load `stop-slop`. Read `public/locales/en/common.json` as the source of truth. + +### Step 2: Classify the content type + +Content type determines which dimensions carry the most weight: + +| Content type | Primary dimensions | Secondary dimensions | +|---|---|---| +| UI strings | Completeness, Terminology | Fluency (brevity > elegance) | +| Finding descriptions | Accuracy, Terminology | Fluency | +| Glossary definitions | Accuracy, Voice | Fluency | +| FAQ / Guide | Accuracy, Completeness | Voice, Fluency | +| About / Marketing | Voice, Fluency | Accuracy of intent > literal accuracy | + +### Reading very long source files + +For large JSON files like `common.json` (~2061 keys): + +1. **Read the file in sections** - use offset/limit reads by namespace prefix. Build a namespace inventory with key counts. +2. **Compare source and target file structure** - identify which namespaces are fully translated, partially translated, or untranslated. +3. **Distribute chunks** - each subagent receives its namespace slice from both source and target files. +4. **Orchestrator assembles** - after subagents return, the orchestrator runs cross-chunk checks. + +### Step 3: First pass - structural checks + +Mechanical verification before reading for quality: + +- [ ] All source keys have corresponding translations (no empty values) +- [ ] All `{{variable}}` placeholders preserved exactly (double-brace i18next syntax, character-for-character) +- [ ] All HTML tags preserved and properly nested +- [ ] No source-language text remaining in target (except Bitcoin terms that stay English) +- [ ] Character encoding correct (no mojibake, no broken diacriticals) +- [ ] No em dashes in any string +- [ ] No first-person plural in any string + +If structural checks fail, log CRIT issues immediately. + +### Step 4: Second pass - accuracy and terminology + +Read source and target in parallel, segment by segment: + +- Does each segment convey the same meaning? +- Are Bitcoin/privacy terms correctly handled? (kept in English where required, translated consistently where appropriate) +- Numbers, scores, and formatting preserved? +- Any additions? (Content in target not in source.) +- Any omissions? (Content in source not in target.) + +For UI strings: check that source and target have the same number of sentences. + +### Step 5: Third pass - fluency and voice + +Read the target text ALONE. Do not look at the source. This is the "native reader" pass. + +- Does it read naturally? Would a native speaker write it this way? +- Is the register consistent throughout? +- Does it match the project voice? am-i.exposed: direct, technical, privacy-focused. +- **Critical voice check:** Any "we/us/our" or target-language equivalents? Flag as CRIT/VOICE. +- **Em dash check:** Any em dashes in any form? Flag as MAJ/VOICE. +- For multi-sentence strings, run a stop-slop check. Skip for short UI strings under ~10 words. +- Button labels imperative? Error messages specific? + +### Common false positives - do not flag these + +- **Target-language capitalization that differs from English.** Spanish, Portuguese, and French do not title-case headings. German capitalizes nouns but not adjectives/verbs. Sentence case is correct in these languages. +- **Missing articles in form labels.** Terse field labels and column headers omit articles in all target languages. +- **Bitcoin terms kept in English.** UTXO, CoinJoin, PayJoin, Taproot, etc. staying in English is correct, not a completeness issue. +- **Loan words.** "Heuristik" (DE), "cluster" (ES/PT) - established loan words in the target language are correct. + +### Step 6: Score and report + +Assign dimension scores based on findings. Compile the issue table. Write summary and recommendation. Use the report format from Part 5. + +Scoring guidelines: +- Start at 10 for each dimension. Deduct based on issue count and severity. +- One CRIT issue in a dimension: that dimension cannot score above 5. +- Three or more MAJ issues in a dimension: cap at 6. +- MIN and STY issues reduce scores by 0.5 each, roughly. + +--- + +## Part 4: Content-Type Checklists + +### Checklist A: UI Strings (common.json translations) + +- [ ] All keys have translations (no empty values, no English left unless intentional) +- [ ] Placeholders (`{{variable}}`) preserved exactly (double-brace syntax) +- [ ] HTML tags preserved and properly closed +- [ ] Button labels are imperative verbs +- [ ] Error messages are specific +- [ ] No first-person plural anywhere +- [ ] No em dashes anywhere +- [ ] Consistent terminology within each namespace +- [ ] Consistent terminology across related namespaces +- [ ] No UI string exceeds ~150% of source length +- [ ] Same English term maps to the same target term everywhere (cross-namespace check) +- [ ] Bitcoin/privacy terms from do-not-translate list kept in English + +### Checklist B: Long-Form / Glossary / FAQ + +- [ ] Central argument preserved +- [ ] Rhetorical devices reproduced, not flattened +- [ ] Section structure preserved +- [ ] No hedges or softeners added that aren't in the source +- [ ] No AI slop patterns in multi-sentence strings +- [ ] No first-person plural +- [ ] No em dashes +- [ ] Links and references still valid +- [ ] Technical descriptions accurate +- [ ] Bitcoin amounts and scores preserved exactly + +### Checklist C: About / Marketing Copy + +- [ ] Headlines transcreated (not literally translated) +- [ ] CTAs action-oriented and natural in target language +- [ ] No first-person plural (this is the most common violation in marketing) +- [ ] No em dashes +- [ ] No privacy FUD introduced ("Big Brother", "they're watching") +- [ ] No crypto-bro language +- [ ] Voice matches the project personality +- [ ] No AI-generated filler ("in today's world", "it's worth noting") + +--- + +## Part 5: Report Format + +Every review produces this structure. No exceptions. + +````markdown +## Translation Quality Review + +**Project:** am-i.exposed +**Content type:** [UI strings / Glossary / FAQ / About] +**Language pair:** [EN -> DE / EN -> ES / EN -> PT / EN -> FR] +**File(s) reviewed:** [path or description] +**Date:** [YYYY-MM-DD] + +### Scores + +| Dimension | Score | Notes | +|---|---|---| +| Accuracy | X/10 | [One sentence] | +| Fluency | X/10 | [One sentence] | +| Terminology | X/10 | [One sentence] | +| Voice | X/10 | [One sentence] | +| Completeness | X/10 | [One sentence] | +| **Total** | **XX/50** | **[Ship-ready / Minor issues / Significant issues / Reject]** | + +### Issues + +| # | Severity | Category | Location | Source | Current | Suggested fix | Notes | +|---|---|---|---|---|---|---|---| +| 1 | CRIT | VOICE | about.built_p1 | "...is built and maintained by..." | "Wir haben..." | "...wird entwickelt von..." | First-person plural | +| 2 | MAJ | TERM | finding.H3_desc | "Round amount" | "Runder Betrag" / "Rundbetrag" | Pick one, use everywhere | Inconsistent term | + +### Patterns + +[If 3+ issues share a root cause, name the pattern here. List affected locations.] + +### Summary + +[2-3 sentences. Overall assessment.] + +### Recommendation + +**[Ship / Fix and ship / Revise and re-review / Retranslate]** +[One sentence explaining the recommendation.] +```` + +--- + +## Part 6: Subagent Strategy for Large Reviews + +### Model recommendations + +| Role | Model | Why | +|---|---|---| +| Orchestrator | Opus | Cross-chunk consistency, pattern identification, final scoring | +| QC subagents | Sonnet | Structured comparison against checklists and terminology | + +### When to use subagents + +- UI string files over ~100 keys (the full `common.json` at ~2061 keys always requires subagents) +- Multiple documents submitted for review at once +- Full-locale translation audits + +Do not use subagents for single documents under 5,000 words or string files under ~100 keys. + +### How to chunk + +- **UI strings:** chunk by namespace prefix (`finding.*`, `remediation.*`, `glossary.*`, etc.). Each namespace or namespace group = one subagent. + +### What each subagent receives + +1. This QC skill (`qc-review`) +2. The transcreation skill's voice definition and terminology (Parts 1, 3, 4 from `transcreation-exposed`) +3. The `stop-slop` checklist +4. Their chunk of source + target text - **embedded directly in the subagent prompt as text** +5. A brief: project name, language pair, content type + +**Critical: no filesystem handoff.** Embed source and target key-value pairs directly in the subagent's prompt text. + +### What each subagent returns + +Each subagent returns a scored report for its chunk as **inline text in the final message**, using the exact format from Part 5. No file writes, no filesystem paths. + +### Orchestrator responsibilities + +After collecting all subagent reports, the orchestrator: + +1. **Aggregates dimension scores** - weighted by chunk size. +2. **Merges issue tables** - all issues into one consolidated table. Renumber sequentially. +3. **Runs cross-chunk consistency check** - verify that the same English term maps to the same target term across ALL namespaces. Flag divergences. +4. **Runs cross-chunk voice check** - verify no first-person plural anywhere. Verify no em dashes anywhere. +5. **Identifies cross-chunk patterns** - AI slop in some chunks but not others points to mixed human/machine translation. +6. **Produces one consolidated report** - single report, single score, single recommendation. diff --git a/tools/skills/stop-slop/SKILL.md b/tools/skills/stop-slop/SKILL.md new file mode 100644 index 0000000..39dbc09 --- /dev/null +++ b/tools/skills/stop-slop/SKILL.md @@ -0,0 +1,292 @@ +--- +name: stop-slop +description: Remove AI writing patterns from prose. Use when drafting, editing, or reviewing text to eliminate predictable AI tells. +--- + +# Stop Slop + +Eliminate predictable AI writing patterns from prose. + +## Core Rules + +1. **Cut filler phrases.** Remove throat-clearing openers, emphasis crutches, and all adverbs. +2. **Break formulaic structures.** Avoid binary contrasts, negative listings, dramatic fragmentation, rhetorical setups, false agency. +3. **Use active voice.** Every sentence needs a human subject doing something. No inanimate objects performing human actions ("the complaint becomes a fix"). **Exception for am-i.exposed:** passive voice is acceptable when it avoids first-person plural ("we/us/our"). "Your data is never stored" is better than "We never store your data." Active voice remains the default; passive is the escape hatch for the no-first-person rule. +4. **Be specific.** No vague declaratives ("The reasons are structural"). Name the specific thing. No lazy extremes ("every," "always," "never") doing vague work. +5. **Put the reader in the room.** No narrator-from-a-distance voice. "You" beats "People." Specifics beat abstractions. +6. **Vary rhythm.** Mix sentence lengths. Two items beat three. End paragraphs differently. No em dashes. +7. **Trust readers.** State facts directly. Skip softening, justification, hand-holding. +8. **Cut quotables.** If it sounds like a pull-quote, rewrite it. + +## Quick Checks + +Before delivering prose: + +- Any adverbs? Kill them. +- Any passive voice? Find the actor, make them the subject. +- Inanimate thing doing a human verb ("the decision emerges")? Name the person. +- Sentence starts with a Wh- word? Restructure it. +- Any "here's what/this/that" throat-clearing? Cut to the point. +- Any "not X, it's Y" contrasts? State Y directly. +- Three consecutive sentences match length? Break one. +- Paragraph ends with punchy one-liner? Vary it. +- Em-dash anywhere? Remove it. +- Vague declarative ("The implications are significant")? Name the specific implication. +- Narrator-from-a-distance ("Nobody designed this")? Put the reader in the scene. +- Meta-joiners ("The rest of this essay...")? Delete. Let the essay move. + +## Scoring + +Rate 1-10 on each dimension: + +| Dimension | Question | +|-----------|----------| +| Directness | Statements or announcements? | +| Rhythm | Varied or metronomic? | +| Trust | Respects reader intelligence? | +| Authenticity | Sounds human? | +| Density | Anything cuttable? | + +Below 35/50: revise. + +--- + +## Phrases to Remove + +### Throat-Clearing Openers + +Remove these announcement phrases. State the content directly. + +- "Here's the thing:" +- "Here's what [X]" +- "Here's this [X]" +- "Here's that [X]" +- "Here's why [X]" +- "The uncomfortable truth is" +- "It turns out" +- "The real [X] is" +- "Let me be clear" +- "The truth is," +- "I'll say it again:" +- "I'm going to be honest" +- "Can we talk about" +- "Here's what I find interesting" +- "Here's the problem though" + +Any "here's what/this/that" construction is throat-clearing before the point. Cut it and state the point. + +### Emphasis Crutches + +These add no meaning. Delete them. + +- "Full stop." / "Period." +- "Let that sink in." +- "This matters because" +- "Make no mistake" +- "Here's why that matters" + +### Business Jargon + +Replace with plain language. + +| Avoid | Use instead | +|-------|-------------| +| Navigate (challenges) | Handle, address | +| Unpack (analysis) | Explain, examine | +| Lean into | Accept, embrace | +| Landscape (context) | Situation, field | +| Game-changer | Significant, important | +| Double down | Commit, increase | +| Deep dive | Analysis, examination | +| Take a step back | Reconsider | +| Moving forward | Next, from now | +| Circle back | Return to, revisit | +| On the same page | Aligned, agreed | + +### Adverbs + +Kill all adverbs. No -ly words. No softeners, no intensifiers, no hedges. + +Specific offenders: really, just, literally, genuinely, honestly, simply, actually, deeply, truly, fundamentally, inherently, inevitably, interestingly, importantly, crucially + +Also cut these filler phrases: + +- "At its core" +- "In today's [X]" +- "It's worth noting" +- "At the end of the day" +- "When it comes to" +- "In a world where" +- "The reality is" + +### Meta-Commentary + +Remove self-referential asides. The essay should move, not announce its own structure. + +- "Hint:" +- "Plot twist:" / "Spoiler:" +- "You already know this, but" +- "But that's another post" +- "X is a feature, not a bug" +- "Dressed up as" +- "The rest of this essay explains..." +- "Let me walk you through..." +- "In this section, we'll..." +- "As we'll see..." +- "I want to explore..." + +### Performative Emphasis + +- "creeps in" +- "I promise" +- "They exist, I promise" + +### Telling Instead of Showing + +- "This is genuinely hard" +- "This is what leadership actually looks like" +- "This is what X actually looks like" +- "actually matters" + +### Vague Declaratives + +Kill sentences that announce importance without naming the specific thing. + +- "The reasons are structural" +- "The implications are significant" +- "This is the deepest problem" +- "The stakes are high" +- "The consequences are real" + +--- + +## Structures to Avoid + +### Binary Contrasts + +These create false drama. State the point directly. + +- "Not because X. Because Y." / "Not because X, but because Y." +- "[X] isn't the problem. [Y] is." +- "The answer isn't X. It's Y." +- "It feels like X. It's actually Y." +- "The question isn't X. It's Y." +- "Not X. But Y." / "not X, it's Y" / "isn't X, it's Y" +- "stops being X and starts being Y" +- "doesn't mean X, but actually Y" +- "is about X but not Y" +- "not just X but also Y" + +**Instead:** State Y directly. Drop the negation entirely. + +### Negative Listing + +Listing what something is *not* before revealing what it *is*. + +- "Not a X... Not a Y... A Z." +- "It wasn't X. It wasn't Y. It was Z." + +**Instead:** State Z. The reader doesn't need the runway. + +### Dramatic Fragmentation + +Sentence fragments for emphasis read as manufactured profundity. + +- "[Noun]. That's it. That's the [thing]." +- "X. And Y. And Z." +- "This unlocks something. [Word]." + +**Instead:** Complete sentences. Trust content over presentation. + +### Rhetorical Setups + +These announce insight rather than deliver it. + +- "What if [reframe]?" +- "Here's what I mean:" +- "Think about it:" +- "And that's okay." + +**Instead:** Make the point. Let readers draw conclusions. + +### False Agency + +Giving inanimate things human verbs. Name the human actor instead. + +- "a complaint becomes a fix" -> "The team fixed it" +- "a bet lives or dies" -> "Someone kills the project or ships it" +- "the decision emerges" -> "Someone decides" +- "the culture shifts" -> "People change behavior" +- "the conversation moves toward" -> "Someone steers" +- "the data tells us" -> "Someone reads it and draws a conclusion" +- "the market rewards" -> "Buyers pay for things" + +### Narrator-from-a-Distance + +- "Nobody designed this." -> Put the reader in the room +- "This happens because..." -> Direct explanation +- "This is why..." -> Direct explanation +- "People tend to..." -> Use "you" + +### Passive Voice + +Find the actor. Put them at the front of the sentence. + +- "X was created" -> Name who created it +- "It is believed that" -> Name who believes it +- "Mistakes were made" -> Name who made them +- "The decision was reached" -> Name who decided + +### Sentence Starters to Avoid + +- Sentences starting with What, When, Where, Which, Who, Why, How -> Restructure. Lead with the subject or the verb. +- Paragraphs starting with "So" -> Start with content +- Sentences starting with "Look," -> Remove + +### Rhythm Patterns + +- Three-item lists -> Use two items or one +- Questions answered immediately -> Let questions breathe or cut them +- Every paragraph ends punchily -> Vary endings +- Em-dashes -> Remove. Use commas or periods. +- Staccato fragmentation -> Don't stack short punchy sentences +- "Not always. Not perfectly." -> Hedging disguised as reassurance + +### Word Patterns + +- Lazy extremes (every, always, never, everyone, everybody, nobody) -> Use specifics instead of sweeping claims +- All adverbs (-ly words, "really," "just," "literally," "genuinely," "honestly," "simply," "actually") -> Empty emphasis, remove them + +--- + +## Before/After Examples + +### Throat-Clearing + Binary Contrast + +**Before:** "Here's the thing: building products is hard. Not because the technology is complex. Because people are complex. Let that sink in." + +**After:** "Building products is hard. Technology is manageable. People aren't." + +### Filler + Unnecessary Reassurance + +**Before:** "It turns out that most teams struggle with alignment. The uncomfortable truth is that nobody wants to admit they're confused. And that's okay." + +**After:** "Teams struggle with alignment. Nobody admits confusion." + +### Business Jargon Stack + +**Before:** "In today's fast-paced landscape, we need to lean into discomfort and navigate uncertainty with clarity. This matters because your competition isn't waiting." + +**After:** "Move faster. Your competition is." + +### Dramatic Fragmentation + +**Before:** "Speed. Quality. Cost. You can only pick two. That's it. That's the tradeoff." + +**After:** "Speed, quality, cost: pick two." + +### Rhetorical Setup + +**Before:** "What if I told you that the best teams don't optimize for productivity? Here's what I mean: they optimize for learning. Think about it." + +**After:** "The best teams optimize for learning, not productivity." diff --git a/tools/skills/transcreation-exposed/SKILL.md b/tools/skills/transcreation-exposed/SKILL.md new file mode 100644 index 0000000..f769c7d --- /dev/null +++ b/tools/skills/transcreation-exposed/SKILL.md @@ -0,0 +1,388 @@ +--- +name: transcreation-exposed +description: "Transcreation skill for am-i.exposed content. Use when translating content for am-i.exposed between any supported language pair. Produces translations that preserve meaning, intent, and rhetorical effect - not word-for-word equivalents. Applies the am-i.exposed voice in the target language. Trigger on: translate, transcreate, ins Deutsche, into English, auf Englisch, auf Deutsch, i18n, localize, sync translations." +--- + +# am-i.exposed Transcreation + +Translate am-i.exposed content with domain precision. This skill handles UI strings from `public/locales/en/common.json`, FAQ text, glossary definitions, marketing copy, and technical privacy documentation. Every translation must be terminologically exact, tonally correct, and free of slop. + +--- + +## Part 1: am-i.exposed Voice + +am-i.exposed is a client-side Bitcoin privacy analysis tool. The voice reflects that. + +**Register:** Technical privacy tool. Direct over diplomatic. Precise over warm. Educational over alarming. + +**Audience assumption:** Users understand Bitcoin basics. They know what a transaction, address, and UTXO are. They may not know chain analysis techniques - that is what the tool teaches. Do not over-explain Bitcoin concepts in UI strings. Glossary entries and guide pages can provide more context. + +**Critical voice rules (from CLAUDE.md):** +- **Never use "we", "us", or "our"** in UI copy, metadata, FAQ answers, or any user-facing text. This tool is not a person, company, or group. +- Use **passive voice** or refer to the tool by name ("am-i.exposed"). +- Data is never "transmitted to us" - say "transmitted to anyone" or specify the actual recipient (e.g., "mempool.space for blockchain data"). +- **No em dashes** in any form. Use a regular hyphen with spaces instead: ` - `. + +**Tone gradient:** + +| Content type | Register | +|---|---| +| UI strings (buttons, labels, scores) | Neutral, functional. Zero decoration. | +| Finding descriptions | Technical, specific. Name the heuristic and what it reveals. | +| Remediation advice | Direct and actionable. Tell the user what to do. | +| Glossary definitions | Educational, concise. Explain once, with precision. | +| FAQ answers | Conversational but factual. Respect the reader's intelligence. | +| About / marketing | More expressive, but never breathless. Privacy over excitement. | + +**Active voice.** Direct instructions. Short sentences for UI. The tool reports findings; the user takes action. + +**Prohibited patterns:** +- First person plural: "We analyze...", "Our tool...", "We don't store..." +- Crypto-bro language: "WAGMI", "HODL", "to the moon", "LFG" +- Surveillance FUD: "Big Brother is watching", "They're tracking everything" +- Vague privacy buzzwords: "seamlessly private", "bulletproof anonymity", "military-grade encryption" +- False urgency: "Act now!", "Your privacy is at risk!" +- Exclamation marks in UI strings. Period. +- Em dashes in any form. + +**What the voice sounds like:** A competent privacy analysis tool that respects the user's intelligence. It tells you what chain analysis can infer, what the privacy implications are, and what you can do about it. It does not lecture, alarm, or sell. + +--- + +## Part 2: Translation Process + +Five steps. Every translation goes through all five. No shortcuts. + +### Step 0: Classify content type + +Before writing a single word, classify the source: + +- **(a) UI strings** from `common.json` - keys like `finding.H1_title`, `common.scan`, `results.grade` +- **(b) Finding descriptions** - heuristic explanations, analyst verdicts, severity descriptions +- **(c) Glossary definitions** - `glossary.def_*` keys, educational privacy/Bitcoin content +- **(d) Guide / FAQ content** - `guide.*`, `faq.*`, `methodology.*` keys +- **(e) About / marketing** - `about.*` keys, landing page content, feature descriptions + +Classification determines which rules dominate. UI strings prioritize brevity and consistency. Glossary entries prioritize clarity. About/marketing prioritizes natural flow. + +### Step 0b: Determine translation scope + +Before translating, determine whether this is a **full translation** (new locale, empty target file) or a **partial update** (target file exists with some translations). For partial updates, diff the source against the target to classify every key: + +**Key states:** + +| State | How to detect | Action | +|---|---|---| +| **New** | Key exists in English but not in target | Translate. Primary task for partial updates. | +| **Changed** | Key exists in both, but English value differs from what was originally translated | Re-translate. The source text changed. | +| **Unchanged** | Key exists in both, English value matches what was translated | Keep the existing translation. Do not re-translate. | +| **Orphaned** | Key exists in target but not in English | Flag for removal. Report to user - the key may have been renamed. | + +**How to diff for UI strings (common.json):** + +1. Read `public/locales/en/common.json` - this is the canonical key list. +2. Read the target file (e.g., `public/locales/de/common.json`). +3. Build three lists: keys in English but not target (new), keys in target but not English (orphaned), and keys in both (existing). +4. For the "existing" set: compare English values to infer whether the source changed. If unsure, keep the existing translation and flag it for review rather than re-translating blindly. + +**Subagent implications:** When spawning subagents for a partial update, each subagent's prompt should clearly state which keys to translate (new), which to re-translate (changed), and which existing translations to use as consistency context (unchanged). Do not send unchanged keys as part of the translation task - include them as read-only reference. + +### Step 1: Understand before writing + +**For UI strings (small batch, under ~100 keys):** Read all source keys and their existing target translations before writing anything. For larger batches, this step happens per-chunk inside each subagent - see Part 6. + +**For UI strings:** Read the dot-namespaced key. `finding.H1_title` tells you this is a finding title for heuristic H1. `remediation.H1_action` is a remediation action step. That context determines register, length, and grammar. + +**For long-form content:** Read the entire source. Identify the argument structure, the rhetorical devices, the intended emotional arc. Mark section boundaries and key claims. + +### Step 2: Extract intent + +**For UI strings:** State in one phrase what the user learns or does. "Address reuse detected" = the tool found the same address used in multiple transactions. "Use coin control" = the tool recommends the user select specific UTXOs. + +**For long-form content:** State the core argument in one sentence. Identify what the reader should think, feel, or do after reading. Note any rhetorical devices worth preserving. + +### Step 3: Write in target language + +**For UI strings:** Match the source's brevity and register. If the English is three words, aim for three words. If the target language needs five, use five, but never pad. Translate the intent, not the syntax. + +**For long-form content:** Write as if composing from scratch in the target language. A native reader should not detect translation. Preserve argument structure and rhetorical devices, but rebuild sentences to sound natural. + +### Step 4: Stop-slop pass + +**When to apply:** Long-form content (glossary definitions, FAQ answers, about page, guide sections) and multi-sentence UI strings. Skip for short UI strings - labels, buttons, headings, scores, and any string under ~10 words. + +**How to invoke:** The orchestrator and single-agent translations should invoke the `stop-slop` skill via the Skill tool for the final assembled output. Subagents apply the checklist manually (they cannot invoke skills). + +Check for: +- Connective tissue added during translation ("in this regard", "it should be noted") +- Parallel structures that crept in from the source language +- Passive voice where the source used active (note: passive is appropriate for am-i.exposed when avoiding first-person) +- Hedges and softeners absent from the source ("perhaps", "might", "it seems") +- Filler adverbs ("basically", "essentially", the target-language equivalents) +- Any word in the translation that has no corresponding word or intent in the source +- First-person plural ("we", "us", "our") - must never appear + +--- + +## Part 3: UI String Rules + +These rules apply to all translations of `public/locales/*/common.json` keys. + +**How to read common.json:** Each entry is a key-value pair in flat JSON. Example: +```json +"finding.H1_title": "Address Reuse Detected" +``` +- **The key** (left side: `finding.H1_title`) is the dot-namespaced label. It tells you what namespace the string belongs to and what kind of element it is. Never translate the key. Read it for context. +- **The value** (right side: `"Address Reuse Detected"`) is the English source text. This is what you translate. + +**1. Consult the source of truth.** Before translating any UI term, read `public/locales/en/common.json` for the established English wording. The value is the canonical text. + +**2. Check existing translations.** Before writing, read the target locale file (e.g., `public/locales/de/common.json`). If the term has been translated before, match it. Consistency across the app is non-negotiable. + +**3. Brevity is mandatory.** UI labels occupy fixed space. If the target language runs longer than the source, abbreviate. Never add words absent from the source. + +**4. Preserve `{{variable}}` placeholders exactly.** Never translate text inside double braces. `{{count}}` stays `{{count}}`. `{{address}}` stays `{{address}}`. Wrong: `{{Anzahl}}`. Note: this project uses i18next double-brace syntax `{{var}}`, not single-brace `{var}`. + +**5. Preserve HTML tags exactly.** ``, ``, `
`, `<1>`, `<0>` stay as-is. Translate only the text content between tags. Never reorder tags. + +**6. One English term = one target term, everywhere.** "Address reuse" is always the same translation within a locale. Never use different terms for the same concept in different screens. Refer to the terminology list in Part 4. + +**7. Use dot-namespaced keys for context.** `finding.H1_title` is a finding title. `remediation.H1_action` is an action step. `glossary.def_address_reuse` is a glossary definition. The key hierarchy tells you the UI element type, which determines grammar and register. + +**8. Button labels use imperative verbs.** "Scan" = "Scannen" (DE), not "Scan-Vorgang". "Export" = "Exportieren", not "Der Export". + +**9. Error messages: specific, not generic.** If the source says "Invalid address for this network", translate the specific error. + +**10. Length ceiling: ~150% of source.** If the English string is 20 characters, the target should not exceed 30. German compounds may push this. Use judgment, but flag anything that doubles the source length. + +**11. Don't add pedantic punctuation.** UI strings prioritize natural flow over grammar-book correctness. + +**12. Capitalization follows target-language rules.** English title-cases headings; most other languages do not. Spanish and Portuguese capitalize only the first word. German capitalizes nouns but not adjectives/verbs. French capitalizes only the first word. + +**13. No first-person plural.** If the English source accidentally uses "we" or "our", fix it in translation. Use passive voice or refer to "am-i.exposed" by name. + +--- + +## Part 4: Domain Terminology + +### Bitcoin Privacy Terms - Do Not Translate + +These terms stay in English in all target languages: + +> Bitcoin, Satoshi/Sats, Lightning, UTXO, CoinJoin, PayJoin, Stonewall, Whirlpool, WabiSabi, JoinMarket, PayNym, BIP47, Taproot, SegWit, Multisig, P2P, Mainnet, Testnet, Signet, xpub, PSBT, Tor, mempool, hash, on-chain, off-chain, Open Source, HD Wallet, Seed Phrase + +### Terms That Must Be Translated Consistently + +| English | Context | Notes | +|---|---|---| +| Address reuse | Privacy heuristic | Each language has one term, used everywhere | +| Change output / change detection | Transaction analysis | "Change" as in transaction change, not "modify" | +| Peel chain | Chain analysis pattern | Some languages keep "peel chain" as loan term | +| Dust attack | Privacy attack | Translate conceptually | +| Taint / taint analysis | Fund tracing | Translate conceptually | +| Privacy score | 0-100 rating | Translate consistently | +| Finding | Analysis result | A specific privacy observation | +| Remediation | Fix/action | The suggested action to improve privacy | +| Heuristic | Analysis technique | May keep as loan word in some languages | +| Entity | Known service/organization | Exchange, mixer, etc. | +| Cluster | Group of linked addresses | Translate consistently | +| Round amount | Transaction pattern | Translate conceptually | +| Script type | Address format | Technical, may keep English | + +### Per-Language Notes + +**German (de):** +- Sie-form for formal address unless existing file uses du (check the file) +- Bitcoin compound words: "Adresswiederverwendung" (address reuse), "Wechselgeld-Erkennung" (change detection) +- Keep technical loan words where established: "Heuristik", "Cluster" +- "Transaktion" not "Überweisung" (that implies bank transfer) + +**Spanish (es):** +- Latin American Spanish (the existing file uses this convention) +- "Reutilización de direcciones" for address reuse +- Keep loan words where natural: "cluster", "heurística" + +**Portuguese (pt):** +- Brazilian Portuguese conventions (existing file follows this) +- "Reutilização de endereços" for address reuse +- Similar loan word patterns to Spanish + +**French (fr):** +- Formal vous register +- "Réutilisation d'adresses" for address reuse +- French typography: non-breaking space before `:`, `?`, `!`, `;` + +--- + +## Part 5: Long-Form Content + +For glossary definitions, FAQ answers, guide sections, and about page copy, apply the full transcreation process (Steps 0-4). Do not line-translate. + +### Privacy Tool Voice in Long-Form + +am-i.exposed long-form content communicates three things: +1. **Education.** Explain what chain analysis can infer and how. +2. **Actionability.** Tell the user what to do about it. +3. **Empowerment.** Privacy is achievable. Specific techniques work. + +No FUD. No paranoia. No "they're watching everything." State what the tool detects. Let the reader evaluate. + +### Rhetorical Devices Worth Preserving + +| Device | Example | Handling | +|---|---|---| +| Specific heuristic reference | "H3: Round Amount Detection" | Keep the heuristic code. Translate the name. | +| Privacy score context | "A score below 40 indicates significant privacy leaks" | Keep the number. Adapt formatting to locale. | +| Tool reference | "am-i.exposed detects this pattern" | Keep tool name. Never translate "am-i.exposed". | +| Concrete example | "If you send exactly 0.1 BTC..." | Translate the framing. Keep the Bitcoin amount. | + +--- + +## Part 6: Subagent Strategy + +For large translation jobs, split work across subagents. Each subagent operates independently but follows the same rules. + +### Model recommendations + +| Role | Model | Why | +|---|---|---| +| Orchestrator | Opus | Judgment calls, cross-chunk consistency, assembly | +| UI string subagents | Sonnet | Formulaic strings, well-constrained by terminology | +| Long-form subagents | Opus | Creative language work, voice preservation | + +### When to use subagents + +**UI strings - full translation:** Always use subagents for the full `common.json` (~2061 keys). A single agent can handle one namespace of up to ~500 keys. + +**UI strings - partial update:** Run the Step 0b diff first. If the total number of new + changed keys is under ~100, a single agent is fine. If the delta exceeds ~100 keys or spans 3+ namespaces, use subagents. + +**Ad-hoc batches** under ~100 keys: a single agent is fine regardless of scope. + +### Reading very long source files + +Before chunking, the orchestrator must scan the full source file and the full target file. For large JSON files like `common.json`: + +1. **Read the file in sections** - use offset/limit reads by namespace prefix. +2. **Build a namespace inventory** - list every top-level namespace prefix and its key count. Current distribution (2061 keys total): `finding.*` (484), `remediation.*` (148), `pathways.*` (117), `methodology.*` (108), `setup.*` (103), `glossary.*` (74), `settings.*` (66), `wallet.*` (66), `about.*` (58), `walletGuide.*` (55), `common.*` (49), `page.*` (49), `viz.*` (49), `primaryRec.*` (48), plus ~30 smaller namespaces. Always re-count from the actual file. +3. **Scan existing target translations** - read the target locale file. Run the Step 0b diff. +4. **Write the consistency brief** - using the inventory and a sample of keys from each namespace. + +### UI Strings (common.json) + +Chunk by key namespace prefix. Suggested groupings: + +- `finding.*` (484) - privacy findings, heuristic results, severity descriptions +- `remediation.*` + `primaryRec.*` (196) - remediation actions and primary recommendations +- `pathways.*` + `methodology.*` (225) - analysis pathways and methodology +- `glossary.*` (74) - glossary definitions (dedicated subagent - educational content) +- `setup.*` + `settings.*` + `wallet.*` + `walletGuide.*` (290) - setup, settings, wallet features +- `about.*` + `welcome.*` + `faq.*` + `guide.*` (142) - about, welcome, FAQ, guide +- Everything else (~650) - common UI, viz, scores, errors, etc. + +**Warning: `wallet.*` and `walletGuide.*` are distinct namespaces.** Do not merge them. Assign by first dot-segment only. + +### What Every Subagent Receives + +1. This skill's rules (embed the relevant Parts - typically Parts 1-4 and the delivery checklist) +2. The stop-slop checklist from Step 4 (subagents apply it manually) +3. The consistency brief (see template below) +4. Their assigned chunk - source keys and any existing target translations, **embedded directly in the subagent prompt as text** + +**Critical: no filesystem handoff.** Subagents cannot access `/tmp/` or other temporary directories. Embed the source keys and any existing target translations directly in the subagent's prompt text. + +### Subagent Output Contract + +Every subagent must return its translation as **inline JSON in the final message text**. No file writes. No filesystem paths. The orchestrator parses the returned text directly. + +Format: a JSON object where keys are the common.json keys and values are the translated strings. + +```json +{ + "finding.H1_title": "Adresswiederverwendung erkannt", + "finding.H1_desc": "Diese Adresse wurde in mehreren Transaktionen verwendet" +} +``` + +The orchestrator should strip code fences defensively when parsing. + +### Consistency Brief Template + +Write this before spawning any subagent. Every subagent gets the same brief. + +``` +## Consistency Brief +**Direction:** [EN->DE / EN->ES / EN->PT / EN->FR] +**Target locale:** [de / es / pt / fr] +**Content type:** [UI strings / Glossary / Guide / About] +**Audience:** [Bitcoin users checking their transaction privacy] +**Terminology decisions:** [List any ambiguous terms and the chosen translation] + - "finding" -> [chosen term] + - "remediation" -> [chosen term] + - "heuristic" -> [chosen term] + - "change output" -> [chosen term] + - "peel chain" -> [chosen term] +**Voice note:** No first-person plural. Passive voice or "am-i.exposed" by name. +**Do not:** + - Do not translate Bitcoin/privacy terms from the do-not-translate list + - Do not add explanatory text absent from source + - Do not use "we", "us", or "our" + - Do not use em dashes +``` + +### Error Recovery + +- **Malformed JSON:** Strip code fences and retry parse. One retry. If it fails again, the orchestrator translates that chunk directly. +- **Missing keys:** Diff the returned keys against the assigned chunk. Re-prompt with just the missing keys. +- **Terminology inconsistency across chunks:** The orchestrator fixes these during recombination. +- **Truncated output:** Re-prompt: "Your output was truncated. Continue from the last complete key-value pair." + +### Recombination + +After all subagents return: + +1. Assemble chunks in source order +2. Review joins between chunks - no register shifts +3. Cross-chunk terminology check: same English term must map to same target term everywhere +4. Final stop-slop pass on long-form content and multi-sentence strings - invoke the `stop-slop` skill via the Skill tool +5. Spot-check five random strings against the consistency brief +6. Verify no "we/us/our" in any string + +--- + +## Part 7: Delivery Checklist + +Run every item. No exceptions. + +### Single-agent translations + +- [ ] Read the full source before writing anything +- [ ] Can state the intent of each string/section in one sentence +- [ ] Reproduced rhetorical devices, not just words +- [ ] Rhythm is natural in the target language +- [ ] Translation matches am-i.exposed's direct, technical, privacy-focused voice +- [ ] Stop-slop pass complete on multi-sentence strings and long-form content +- [ ] No first-person plural ("we", "us", "our") anywhere +- [ ] No em dashes anywhere + +### Additional for UI strings + +- [ ] Consulted `public/locales/en/common.json` for established English wording +- [ ] Checked existing translations in target locale file for consistency +- [ ] All `{{variable}}` placeholders preserved exactly as-is (double-brace i18next syntax) +- [ ] All HTML tags preserved exactly +- [ ] Bitcoin/privacy terms kept in English per do-not-translate list +- [ ] No UI string introduces words absent from the English source +- [ ] No string exceeds ~150% of source character length +- [ ] Button labels use imperative verbs in target language +- [ ] Dot-namespaced key context was used to determine register and grammar + +### Additional for subagent translations + +- [ ] Consistency brief written and reviewed before spawning subagents +- [ ] Every subagent received the brief, this skill's rules, and the stop-slop checklist +- [ ] Each subagent applied stop-slop to multi-sentence strings in its chunk +- [ ] Joins between chunks reviewed +- [ ] Final stop-slop pass across long-form content and multi-sentence strings +- [ ] Terminology consistent across all chunks +- [ ] Five random strings spot-checked against consistency brief +- [ ] No first-person plural in any chunk