Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion api/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from fastapi import FastAPI
from api.routes import templates, forms
from api.routes import transcribe

app = FastAPI()

app.include_router(templates.router)
app.include_router(forms.router)
app.include_router(forms.router)
app.include_router(transcribe.router)
53 changes: 53 additions & 0 deletions api/routes/transcribe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from pathlib import Path

from fastapi import APIRouter, File, HTTPException, UploadFile

from api.schemas.transcribe import TranscribeResponse
from src.transcriber import SUPPORTED_FORMATS, Transcriber

router = APIRouter(prefix="/transcribe", tags=["transcription"])

# Module-level singleton — Whisper model is lazy-loaded on first request.
_transcriber: Transcriber | None = None


def _get_transcriber() -> Transcriber:
global _transcriber
if _transcriber is None:
_transcriber = Transcriber()
return _transcriber


@router.post("", response_model=TranscribeResponse)
async def transcribe_audio(file: UploadFile = File(...)):
"""
Upload an audio file and receive a plain-text transcription.

- Accepted formats: WAV, MP3, M4A, MP4, OGG, FLAC
- All transcription runs locally via Whisper — no data leaves the machine.
- Model size is configured via the `WHISPER_MODEL` environment variable
(default: `base`). Valid values: `tiny`, `base`, `small`, `medium`, `large`.
"""
suffix = Path(file.filename).suffix.lower()
if suffix not in SUPPORTED_FORMATS:
raise HTTPException(
status_code=415,
detail=(
f"Unsupported audio format {suffix!r}. "
f"Accepted: {sorted(SUPPORTED_FORMATS)}"
),
)

contents = await file.read()
transcriber = _get_transcriber()

try:
text = transcriber.transcribe_bytes(contents, suffix=suffix)
except Exception as exc:
raise HTTPException(status_code=500, detail=str(exc)) from exc

return TranscribeResponse(
text=text,
model_used=transcriber.model_size,
audio_filename=file.filename,
)
7 changes: 7 additions & 0 deletions api/schemas/transcribe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from pydantic import BaseModel


class TranscribeResponse(BaseModel):
text: str
model_used: str
audio_filename: str
5 changes: 4 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,7 @@ sqlmodel
pytest
httpx
numpy<2
ollama
ollama
pyyaml
openai-whisper
python-multipart
110 changes: 87 additions & 23 deletions src/file_manipulator.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,111 @@
import logging
import os

from commonforms import prepare_form

from src.filler import Filler
from src.llm import LLM
from commonforms import prepare_form
from src.template_mapper import TemplateMapper

logger = logging.getLogger(__name__)


class FileManipulator:
def __init__(self):
self.filler = Filler()
self.llm = LLM()

def create_template(self, pdf_path: str):
def create_template(self, pdf_path: str) -> str:
"""
By using commonforms, we create an editable .pdf template and we store it.
Prepare a fillable PDF template using commonforms and return its path.
"""
template_path = pdf_path[:-4] + "_template.pdf"
prepare_form(pdf_path, template_path)
return template_path

def fill_form(self, user_input: str, fields: list, pdf_form_path: str):
def fill_form(
self,
user_input: str,
fields: dict,
pdf_form_path: str,
yaml_path: str | None = None,
) -> str:
"""
It receives the raw data, runs the PDF filling logic,
and returns the path to the newly created file.
Extract data from user_input and fill pdf_form_path.

When yaml_path is provided and the file exists, the new pipeline is used:
LLM → IncidentReport → TemplateMapper → Filler (named fields)

When yaml_path is absent, falls back to the legacy pipeline:
LLM → raw dict → Filler (positional fields)

Returns the path to the filled PDF.
"""
print("[1] Received request from frontend.")
print(f"[2] PDF template path: {pdf_form_path}")
logger.info("Received fill request. PDF: %s YAML: %s", pdf_form_path, yaml_path)

if not os.path.exists(pdf_form_path):
print(f"Error: PDF template not found at {pdf_form_path}")
return None # Or raise an exception
raise FileNotFoundError(f"PDF template not found at {pdf_form_path}")

self.llm._transcript_text = user_input

if yaml_path and os.path.exists(yaml_path):
return self._fill_with_mapper(yaml_path)

logger.warning(
"No YAML template provided or found at %r — using legacy positional mapping.",
yaml_path,
)
return self._fill_legacy(fields, pdf_form_path)

# -------------------------------------------------------------------------
# New pipeline: LLM → IncidentReport → TemplateMapper → Filler
# -------------------------------------------------------------------------

def _fill_with_mapper(self, yaml_path: str) -> str:
mapper = TemplateMapper(yaml_path)

self.llm.main_loop()
report = self.llm.get_report()

if report and report.requires_review:
logger.warning(
"Extraction incomplete — the following fields require manual review: %s",
report.requires_review,
)

field_values = mapper.resolve(report)
return self.filler.fill_form(pdf_form=mapper.pdf_path, field_values=field_values)

# -------------------------------------------------------------------------
# Legacy pipeline: kept for backward compatibility until all templates have
# YAML mappings (Phase 2, Week 5).
# -------------------------------------------------------------------------

def _fill_legacy(self, fields: dict, pdf_form_path: str) -> str:
self.llm._target_fields = fields
self.llm.main_loop()
data = self.llm.get_data()

print("[3] Starting extraction and PDF filling process...")
try:
self.llm._target_fields = fields
self.llm._transcript_text = user_input
output_name = self.filler.fill_form(pdf_form=pdf_form_path, llm=self.llm)
# Build a positional {field_name: value} dict from the PDF's own field names
# and the extracted values in visual order — brittle, but preserved until
# YAML templates cover all forms.
from pdfrw import PdfReader

print("\n----------------------------------")
print("✅ Process Complete.")
print(f"Output saved to: {output_name}")
pdf = PdfReader(pdf_form_path)
pdf_fields = []
for page in pdf.pages:
if page.Annots:
sorted_annots = sorted(
page.Annots, key=lambda a: (-float(a.Rect[1]), float(a.Rect[0]))
)
for annot in sorted_annots:
if annot.Subtype == "/Widget" and annot.T:
pdf_fields.append(self.filler._field_name(annot.T))

return output_name
values = [v for v in data.values() if v is not None]
field_values = {
pdf_fields[i]: str(values[i])
for i in range(min(len(pdf_fields), len(values)))
}

except Exception as e:
print(f"An error occurred during PDF generation: {e}")
# Re-raise the exception so the frontend can handle it
raise e
return self.filler.fill_form(pdf_form=pdf_form_path, field_values=field_values)
86 changes: 54 additions & 32 deletions src/filler.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,35 @@
from pdfrw import PdfReader, PdfWriter
from src.llm import LLM
from __future__ import annotations

import logging
from datetime import datetime
from typing import Any

from pdfrw import PdfReader, PdfWriter

logger = logging.getLogger(__name__)


class Filler:
def __init__(self):
pass
"""
Fills a PDF form using a named field mapping produced by TemplateMapper.

Replaces the old positional approach (answers_list[i]) with an explicit
{pdf_field_name: value} dict so every value lands in the correct field
regardless of visual order or page layout.
"""

def fill_form(self, pdf_form: str, llm: LLM):
def fill_form(self, pdf_form: str, field_values: dict[str, Any]) -> str:
"""
Fill a PDF form with values from user_input using LLM.
Fields are filled in the visual order (top-to-bottom, left-to-right).
Write field_values into the PDF at pdf_form and save to a timestamped path.

Parameters
----------
pdf_form: Path to the fillable PDF template.
field_values: {pdf_field_name: value} produced by TemplateMapper.resolve().

Returns
-------
Path to the newly written filled PDF.
"""
output_pdf = (
pdf_form[:-4]
Expand All @@ -19,34 +38,37 @@ def fill_form(self, pdf_form: str, llm: LLM):
+ "_filled.pdf"
)

# Generate dictionary of answers from your original function
t2j = llm.main_loop()
textbox_answers = t2j.get_data() # This is a dictionary

answers_list = list(textbox_answers.values())

# Read PDF
pdf = PdfReader(pdf_form)
filled_count = 0

# Loop through pages
for page in pdf.pages:
if page.Annots:
sorted_annots = sorted(
page.Annots, key=lambda a: (-float(a.Rect[1]), float(a.Rect[0]))
)

i = 0
for annot in sorted_annots:
if annot.Subtype == "/Widget" and annot.T:
if i < len(answers_list):
annot.V = f"{answers_list[i]}"
annot.AP = None
i += 1
else:
# Stop if we run out of answers
break
if not page.Annots:
continue
for annot in page.Annots:
if annot.Subtype != "/Widget" or not annot.T:
continue

PdfWriter().write(output_pdf, pdf)
field_name = self._field_name(annot.T)
if field_name in field_values:
annot.V = str(field_values[field_name])
annot.AP = None
filled_count += 1
else:
logger.debug("PDF field %r has no mapped value — left blank", field_name)

# Your main.py expects this function to return the path
logger.info("Filled %d / %d mapped fields in %s", filled_count, len(field_values), pdf_form)
PdfWriter().write(output_pdf, pdf)
return output_pdf

# -------------------------------------------------------------------------
# Helpers
# -------------------------------------------------------------------------

@staticmethod
def _field_name(annot_t) -> str:
"""
Extract the plain field name string from a pdfrw PdfString.
pdfrw wraps PDF literal strings in parentheses, e.g. '(FieldName)'.
"""
raw = str(annot_t)
return raw[1:-1] if raw.startswith("(") and raw.endswith(")") else raw
Loading