Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions modules/dr-anticorruption/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM python:3.11-slim

WORKDIR /app

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY . .

EXPOSE 8000

CMD ["uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "8000"]
75 changes: 72 additions & 3 deletions modules/dr-anticorruption/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,73 @@
# DR Anti-Corruption
Owner: Brian Collado
Goal: /ingest, /risk, /graph, /brief with official DR sources.
# DR Anti-Corruption Module (Dominican Republic)

This module provides an end-to-end pipeline for detecting corruption risks in public procurement in the Dominican Republic. It ingests official sources, connects entities, calculates risk scores, and generates evidence-backed policy briefs.

## 🚀 Overview
- **Lead:** Brian Collado
- **Scope:** DR procurement data -> Entity/Contract linking -> Risk indicators -> Evidence-linked briefs.
- **Status:** MVP Implementation.

## 📥 Inputs & 📤 Outputs

### Inputs
- **Procurement Data:** Official JSON/Excel files from DGCP (Dirección General de Contrataciones Públicas).
- **Target Entities:** RPE (Registro de Proveedores del Estado) or company names for tailored risk analysis.
- **Config Parameters:** Custom thresholds for activation spikes, bidder concentration, and flags for politically exposed persons (PEPs).

### Outputs
- **Risk Score:** Numerical score (0-100) indicating corruption risk.
- **Risk Level:** Categorical risk (LOW, MEDIUM, HIGH, CRITICAL).
- **Evidence Graph:** Relationship mapping between companies, owners, and contracts.
- **Policy Brief:** Contextual summary of findings with citations to specific contracts or legal violations.

## 🛠 How to Run

### Docker (Recommended)
The module is designed to run in a containerized environment.

```bash
docker-compose up -d
```
Access the API at `http://localhost:8000`.

### Local Development
1. Install dependencies:
```bash
pip install -r requirements.txt
```
2. Run ingestion pipeline:
```bash
python src/cli.py ingest --target all
```
3. Start the API:
```bash
uvicorn src.api.main:app --host 0.0.0.0 --port 8000
```

## 🌐 API Interface (MVP)

| Endpoint | Method | Description |
| :--- | :--- | :--- |
| `/ingest` | `POST` | Triggers background ingestion of official DR sources. |
| `/risk` | `POST` | Returns a detailed risk report for a specific entity. |
| `/graph` | `GET` | Returns JSON representation of the entity's network. |
| `/brief` | `POST` | Generates a 1-page PDF/Text brief with evidence. |

### Example Scenario
**Goal:** Analyze a suspicious supplier involved in multiple "emergency" contracts.
1. **Request:** `POST /risk` with `{"entity_id": "12345"}`.
2. **Response:**
```json
{
"risk_score": 85.5,
"level": "HIGH",
"factors": ["Concentration of emergency contracts", "Shared ownership with public official"],
"evidence": ["Contract #992-2023", "Public Gazette Ref ID: X"]
}
```

## 📂 Project Structure
- `src/api/`: FastAPI implementation.
- `src/core/`: Risk engine, Forensic analyzers, and Brief generator.
- `src/data/`: Data management and persistence (SQLite/Postgres).
- `config/`: Configuration for thresholds and API keys.
189 changes: 189 additions & 0 deletions modules/dr-anticorruption/config/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
data:
dir: data

logging:
level: INFO
file: ingestion.log

dgcp:
base_url: https://datosabiertos.dgcp.gob.do/api-dgcp/v1
rate_limit_delay: 0.2
max_retries: 5
timeout_connect: 10
timeout_read: 30

risk:
thresholds:
critical: 75
high: 50
medium: 25
new_company_years: ['2024', '2025', '2026']
hub_density_high: 20
hub_density_medium: 5

forensics:
versatility_threshold: 2
activation_concentration: 0.6
activation_min_contracts: 10
activation_30d_max: 10

keywords:
industries:
medical:
- salud
- hospital
- medico
- farmacia
- medicamento
- laboratorio
- reactivo
- clinico
- quirurgico
- insumo
- oxigeno
- suero
- pastillas
- jarabe
- sonda
- jeringa
- hilo
- cateter
- guante
- mascarilla
- sutura
- antibiotico
- analgesico
construction:
- obra
- construc
- ingenier
- remodelacion
- mantenimiento fisico
- reparacion
- pintura
- cemento
- varilla
- ferreteria
- asfalt
- acero
- edific
- tuberia
- electrico
- plomeria
food:
- aliment
- comida
- desayuno
- almuerzo
- catering
- buffet
- cocina
- bebida
- picadera
- refrigerio
- pan
- agua
- jugo
- cafe
- botella
it:
- computadora
- laptop
- toner
- papel
- oficina
- software
- licencia
- informatica
- tecnologia
- impresora
- cartucho
- redes
- internet
- disco duro
- ups
- monitor
- cable
transport:
- transporte
- vehiculo
- combustible
- taller
- repuesto
- goma
- bateria
- neumatico
- chofer
- camion
- guagua
- motor
- mecanic
- aceite
- lubricante
- freno
cleaning:
- limpieza
- aseo
- fumigacion
- conserje
- desechos
- basura
- jardineria
- desinfeccion
- lavado
- detergente
- escoba
- suape
event:
- evento
- publicidad
- impresion
- montaje
- sonido
- banner
- rotulo
- regalo
- bono
risk_news:
- corrupcion
- soborno
- fraude
- peculado
- lavado de activos
- estafa
- investigacion
- imputado
- acusado
- irregularidad
- sobrevaluacion
- vinculado
- testaferro
- clonacion
investigative:
- Nuria
- Alicia Ortega
- Acento
- SIN
whistleblowers:
- somos pueblo
- tolentino
- cavada
- espresate

api:
host: 0.0.0.0
port: 8000

news:
region: DO
language: es-419
lookback_years: 5

minio:
endpoint: http://localhost:9000
access_key: minioadmin
secret_key: minioadmin
bucket: s3-raw

postgres:
dsn: postgresql://postgres:postgres@localhost:5432/datalake
58 changes: 58 additions & 0 deletions modules/dr-anticorruption/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
version: '3.8'

services:
api:
build: .
ports:
- "8000:8000"
volumes:
- .:/app
environment:
- PYTHONPATH=/app/src
command: uvicorn src.api.main:app --host 0.0.0.0 --port 8000 --reload
depends_on:
- redis
- postgres
- minio
redis:
image: redis:alpine
ports:
- "6379:6379"
volumes:
- redis_data:/data
postgres:
image: postgres:15
environment:
POSTGRES_DB: datalake
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
ports:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
healthcheck:
test: [ "CMD-SHELL", "pg_isready -U postgres -d datalake" ]
interval: 10s
timeout: 5s
retries: 5
minio:
image: quay.io/minio/minio:latest
command: server /data --console-address ":9001"
environment:
MINIO_ROOT_USER: minioadmin
MINIO_ROOT_PASSWORD: minioadmin
ports:
- "9000:9000"
- "9001:9001"
volumes:
- minio_data:/data
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:9000/minio/health/live" ]
interval: 30s
timeout: 20s
retries: 3

volumes:
redis_data:
postgres_data:
minio_data:
51 changes: 51 additions & 0 deletions modules/dr-anticorruption/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "dr-anticorruption"
version = "0.2.0"
description = "Anti-corruption analysis platform for DR procurement data"
readme = "README.md"
requires-python = ">=3.11"
dependencies = [
"fastapi==0.115.0",
"uvicorn[standard]==0.32.0",
]

[project.optional-dependencies]
dev = [
"pytest==8.3.3",
"pytest-cov==5.0.0",
"black==24.8.0",
"isort==5.13.2",
"mypy==1.11.2",
"types-requests==2.32.0.202409.1",
]

[tool.black]
line-length = 88
target-version = ['py311']

[tool.isort]
profile = "black"
line_length = 88

[tool.mypy]
python_version = "3.11"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
disallow_incomplete_defs = true

[tool.pytest.ini_options]
addopts = "-ra --strict-markers --strict-config --cov=src --cov-report=html --cov-report=term-missing"
testpaths = ["tests"]
markers = [
"slow: Marks tests as slow",
"integration: Integration tests"
]

[tool.setuptools.packages.find]
where = ["src"]
namespaces = false
Loading