Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,7 @@ dist/
.env
.vscode/

# Vite build artifacts (generated by `npm run build`)
offscreen/ocr-worker.bundle.js
vendor/tesseract/

32 changes: 0 additions & 32 deletions MV3_COMPLIANCE.md

This file was deleted.

29 changes: 26 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,17 @@ AI chat interfaces make it too easy to paste sensitive data. This extension inte
- ✅ Intercepts ChatGPT, Claude, Gemini
- ✅ PII detection via Precheck API
- ✅ Auto-redaction
- ✅ Image attachment scanning (OCR-based, client-side)
- ✅ Dashboard logging (optional)
- 🚧 Chrome Web Store submission pending

## Install (Dev Mode)

1. Clone this repo
2. Chrome → `chrome://extensions/` → Enable Developer Mode
3. Load unpacked → select this folder
4. Done
2. `npm install && npm run build`
3. Chrome → `chrome://extensions/` → Enable Developer Mode
4. Load unpacked → select this folder
5. Done

**Note**: Local mode works without an API key if you run Precheck locally. Console mode requires a free API key from governsai.com.

Expand All @@ -45,6 +47,8 @@ Click the extension icon → Settings:
See `MV3_COMPLIANCE.md` for the compliance checklist.

## How it works

### Text messages
```
User types message → Extension intercepts → Precheck API scans → Policy applied → Action taken
↓ ↓ ↓ ↓ ↓
Expand All @@ -57,6 +61,25 @@ User types message → Extension intercepts → Precheck API scans → Policy ap
2. **Analyze**: Precheck API scans for sensitive information (PII) and applies your organization's policies
3. **Protect**: Based on policy, the message is either allowed, has PII redacted, or is blocked entirely

### Image attachments (ChatGPT)

Images are scanned before they're sent using a fully client-side OCR pipeline — no pixel data leaves your browser.

```
User attaches image → Extension extracts image at Send time → OCR (Tesseract.js, offscreen doc)
↓ ↓
ChatGPT file Text extracted from image
attachment ↓
Precheck API scans text
Allow / Block (no redact)
```

- **OCR runs entirely in the browser** via a hidden [MV3 Offscreen Document](https://developer.chrome.com/docs/extensions/reference/api/offscreen) — Tesseract.js never sends image data anywhere
- Images with no detectable text pass through without being blocked
- Images larger than 2 MB are skipped (OCR not attempted)
- Because pixel-level redaction is not feasible, the only actions for images are **Allow** or **Block** — if PII is found, the user is asked to remove the attachment

## License

MIT
Expand Down
158 changes: 154 additions & 4 deletions background/service-worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,39 @@ console.log('[GovernsAI] Background service worker initialized');
// Listen for messages from content scripts
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
console.log('[GovernsAI] Received message:', request.type);

if (request.type === 'INTERCEPT_MESSAGE') {
handleInterceptedMessage(request, sender)
.then(sendResponse)
.catch(error => {
console.error('[GovernsAI] Error handling message:', error);
sendResponse(buildFailClosedDecision(request?.message, FAIL_CLOSED_REASON, error.message));
});

// Return true to indicate async response
return true;
}


if (request.type === 'INTERCEPT_IMAGE') {
handleInterceptedImage(request, sender)
.then(sendResponse)
.catch(error => {
console.error('[GovernsAI] Error handling image:', error);
// Fail-closed: if image processing errors we block rather than leak.
sendResponse(buildFailClosedDecision('', FAIL_CLOSED_REASON, error.message));
});

return true;
}

if (request.type === 'GET_STATUS') {
getExtensionStatus()
.then(sendResponse)
.catch(error => {
console.error('[GovernsAI] Error getting status:', error);
sendResponse({ enabled: false, error: error.message });
});

return true;
}
});
Expand Down Expand Up @@ -118,6 +130,144 @@ async function handleInterceptedMessage(request, sender) {
}
}

/**
* Handles an image attachment intercepted by a content script.
* Runs OCR on each image via the offscreen document, feeds the extracted
* text into the existing Precheck pipeline, and returns ALLOW or BLOCK.
* Images cannot be redacted (unlike text), so REDACT is mapped to BLOCK.
*
* @param {object} request - { platform, message, images: string[], url, timestamp }
* @param {object} sender
* @returns {Promise<object>} Decision object with action ALLOW or BLOCK
*/
async function handleInterceptedImage(request, sender) {
const { platform, message, images = [], url, timestamp } = request;

console.log(`[GovernsAI] Processing image attachment from ${platform}: ${images.length} image(s)`);

try {
const settings = await getSettings();

if (!settings.enabled) {
console.log('[GovernsAI] Extension disabled, allowing image');
return { action: 'ALLOW' };
}

if (settings.enabledPlatforms && !settings.enabledPlatforms.includes(platform)) {
console.log(`[GovernsAI] Platform ${platform} not monitored, allowing image`);
return { action: 'ALLOW' };
}

// Run OCR on all attached images using the offscreen document.
let ocrResult;
try {
await ensureOffscreenDocument();
ocrResult = await runOCR(images);
} catch (err) {
console.error('[GovernsAI] OCR pipeline failed:', err);
return buildFailClosedDecision('', FAIL_CLOSED_REASON, err.message);
}

const extractedText = (ocrResult.texts || []).join('\n').trim();
console.log('[GovernsAI] OCR extracted text (first 100 chars):', extractedText.slice(0, 100));

// No text found in images — nothing to scan.
if (!extractedText) {
console.log('[GovernsAI] No text found in images, allowing send');
return { action: 'ALLOW', reason: 'No text detected in attached images' };
}

// Re-use the existing Precheck/policy pipeline with the extracted text.
let precheckResult;
try {
precheckResult = await scanForPII(extractedText, settings);
} catch (err) {
console.error('[GovernsAI] Precheck API error (image OCR text):', err);
return buildFailClosedDecision('', FAIL_CLOSED_REASON, err.message);
}

if (precheckResult?.fallback) {
console.warn('[GovernsAI] Precheck fallback detected for image text; applying fail-closed policy');
return buildFailClosedDecision('', FAIL_CLOSED_REASON, 'Fallback PII detection is disabled in fail-closed mode');
}

// For images we only ALLOW or BLOCK — redaction of pixel data is out of scope.
let decision = applyApiDecision(precheckResult, extractedText, settings);
if (!decision) {
decision = evaluatePolicy(precheckResult, settings);
}

const imageDecision = {
action: decision.action === 'REDACT' ? 'BLOCK' : decision.action,
reason: decision.action === 'REDACT'
? `Image contains sensitive information: ${decision.reason}`
: decision.reason,
entities: decision.entities || [],
isImageBlock: true,
};

console.log(`[GovernsAI] Image decision: ${imageDecision.action}`);

await logInteraction({
platform,
url,
timestamp,
messageLength: extractedText.length,
hasPII: precheckResult.hasPII,
entities: precheckResult.entities,
action: imageDecision.action,
settings,
});

return imageDecision;

} catch (error) {
console.error('[GovernsAI] Unexpected error during image processing:', error);
return buildFailClosedDecision('', FAIL_CLOSED_REASON, error.message);
}
}

/**
* Ensures a single offscreen document exists for OCR processing.
* MV3 allows at most one offscreen document per extension at a time.
*/
async function ensureOffscreenDocument() {
// chrome.offscreen.hasDocument() returns true if any offscreen doc is open.
const existing = await chrome.offscreen.hasDocument();
if (existing) return;

await chrome.offscreen.createDocument({
url: 'offscreen/ocr-worker.html',
reasons: [chrome.offscreen.Reason.BLOBS],
justification: 'Run Tesseract.js OCR on image attachments to detect PII before sending',
});

console.log('[GovernsAI] Offscreen OCR document created');
}

/**
* Sends images to the offscreen document for OCR and returns extracted text.
*
* @param {string[]} images - Array of base64 data URLs
* @returns {Promise<{ texts: string[], confidence: number[] }>}
*/
function runOCR(images) {
return new Promise((resolve, reject) => {
chrome.runtime.sendMessage(
{ type: 'PROCESS_IMAGE', target: 'ocr-worker', images },
(response) => {
if (chrome.runtime.lastError) {
return reject(new Error(chrome.runtime.lastError.message));
}
if (response?.error) {
return reject(new Error(response.error));
}
resolve(response || { texts: [], confidence: [] });
}
);
});
}

function buildFailClosedDecision(originalMessage, reason, errorMessage = '') {
return {
action: 'BLOCK',
Expand Down
Loading