diff --git a/README.md b/README.md index 501aa1a..2f96d9e 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,99 @@ -python3 -m venv .venv -source .venv/bin/activate +# Proyecto TSP -pip install -r requirements.txt +## Requisitos previos ---- -Construir y levantar los contenedores +Asegúrate de tener instalados los siguientes componentes: +1. PostgreSQL y postgresql-contrib +2. Docker +3. Docker Compose + +## Instalación + +### 1. PostgreSQL + +Para instalar PostgreSQL y postgresql-contrib, ejecuta: + +```bash +sudo apt-get update +sudo apt-get install postgresql postgresql-contrib -y +``` + +### 2. Docker y Docker Compose + +Sigue las [instrucciones oficiales de Docker](https://docs.docker.com/engine/install/ubuntu/) para instalar Docker en Ubuntu. + +Para instalar Docker Compose, sigue las [instrucciones oficiales de Docker Compose](https://docs.docker.com/compose/install/). + +## Configuración + +Asegúrate de que los siguientes archivos tengan los permisos correctos (755): + +```bash +chmod 755 db-init/init-user-db.sh +chmod 755 scripts/change_user_password.sh +chmod 755 scripts/wait-for-postgres.sh +``` + +Después de asignar los permisos, puedes cambiar la contraseña del usuario de PostgreSQL ejecutando el script `change_user_password.sh`. Por ejemplo: + +```bash +bash scripts/change_user_password.sh postgres nueva_contraseña +``` + +Asegúrate de reemplazar `nueva_contraseña` con la contraseña que deseas establecer. + +## Ejecución del proyecto + +### Iniciar los contenedores + +Puedes usar cualquiera de los siguientes comandos para iniciar los contenedores: + +Para construir las imágenes (usando caché si está disponible) e iniciar los contenedores en un solo comando: +```bash docker compose up --build -docker compose up --build --no-cache +``` + +Para construir las imágenes sin usar caché y luego iniciar los contenedores: + +```bash +docker compose build --no-cache +docker compose up +``` + +o para ejecutar en modo detached: + +```bash +docker compose up -d +``` + +### Detener los contenedores + +Si algo sale mal y necesitas detener los contenedores, puedes usar: + +```bash +docker compose down -v +``` + +Este comando detendrá y eliminará los contenedores y volúmenes. + +Si solo quieres detener los contenedores sin eliminar los volúmenes: + +```bash +docker compose down +``` + +## Pruebas + +Para reconstruir la imagen de prueba y ejecutar las pruebas: +```bash +docker compose build test +docker compose run --rm test +``` -Verificar el estado -docker compose logs -f -Acceso a los servicios: +## Acceso a los servicios: Puedes acceder al servicio de traducción en http://localhost:5001. El servicio de almacenamiento en http://localhost:5002. diff --git a/docker-compose.yml b/docker-compose.yml index 8d8d187..9203a55 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,15 +1,13 @@ -version: '3.8' services: # Servicio de almacenamiento storage: build: ./storage container_name: storage + ports: + - "5002:5000" + env_file: .env environment: - - STORAGE_PATH=${STORAGE_PATH} - - DATABASE_URL=postgresql://${DB_USER}:${DB_PASSWORD}@db:5432/${DB_NAME} - - REDIS_URL=redis://redis:6379/${REDIS_DB} - - LOG_LEVEL=${LOG_LEVEL} - - LOG_FILE=${LOG_FILE_STORAGE} + - FLASK_ENV=${FLASK_ENV} volumes: - ./data:${STORAGE_PATH} - ./logs:/app/logs @@ -28,11 +26,11 @@ services: timeout: 5s retries: 3 start_period: 10s - command: ["/scripts/wait-for-postgres.sh", "-h", "db", "-U", "${DB_USER}", "-P", "${DB_PASSWORD}", "-d", "${DB_NAME}", "-t", "60", "-i", "5", "--", "python", "main.py"] + command: ["/scripts/wait-for-postgres.sh", "-h", "db", "-U", "${DB_USER}", "-P", "${DB_PASSWORD}", "-d", "${DB_NAME}", "-t", "60", "-i", "5", "--", "python", "src/main.py"] # Servicio para pruebas test: - build: + build: context: ./storage dockerfile: Dockerfile container_name: storage_test @@ -43,25 +41,17 @@ services: condition: service_healthy environment: - TESTING=True - # Incluye estas variables solo si tus pruebas las necesitan - # - DATABASE_URL=postgresql://${DB_USER}:${DB_PASSWORD}@db:5432/${DB_NAME} - # - REDIS_URL=redis://redis:6379/${REDIS_DB} + - FLASK_ENV=testing + env_file: .env networks: - app_network - #command: ["pytest", "-v", "/app/tests", "--import-mode=importlib"] command: ["python", "-m", "pytest", "-v", "/app/tests"] # Base de datos PostgreSQL db: image: postgres:13 container_name: postgres_db - environment: - POSTGRES_USER: ${POSTGRES_USER} - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} - POSTGRES_DB: ${POSTGRES_DB} - DB_USER: ${DB_USER} - DB_PASSWORD: ${DB_PASSWORD} - DB_NAME: ${DB_NAME} + env_file: .env volumes: - postgres_data:/var/lib/postgresql/data - ./db-init:/docker-entrypoint-initdb.d @@ -98,7 +88,7 @@ services: image: adminer container_name: adminer ports: - - "9090:8080" # Puerto para acceder a Adminer + - "9090:8080" depends_on: - db networks: diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index e183c85..0000000 --- a/requirements.txt +++ /dev/null @@ -1,22 +0,0 @@ -annotated-types==0.7.0 -anyio==4.4.0 -certifi==2024.8.30 -distro==1.9.0 -exceptiongroup==1.2.2 -h11==0.14.0 -httpcore==1.0.5 -httpx==0.27.2 -idna==3.10 -jiter==0.5.0 -linecache2==1.0.0 -openai==1.46.0 -pydantic==2.9.2 -pydantic-core==2.23.4 -pypdf2==3.0.1 -python-dotenv==1.0.1 -six==1.16.0 -sniffio==1.3.1 -tqdm==4.66.5 -traceback2==1.4.0 -typing-extensions==4.12.2 -unittest2==1.1.0 diff --git a/scripts/change_user_password.sh b/scripts/change_user_password.sh index 1f9a1f1..c46f69f 100755 --- a/scripts/change_user_password.sh +++ b/scripts/change_user_password.sh @@ -1,5 +1,5 @@ #!/bin/bash - +set -e : ' Este script se utiliza para cambiar la contraseña de un usuario en PostgreSQL. Haz el script ejecutable: diff --git a/src/extract/interface.py b/src/extract/interface.py deleted file mode 100644 index c43b885..0000000 --- a/src/extract/interface.py +++ /dev/null @@ -1,18 +0,0 @@ -from abc import ABC, abstractmethod - -class ITextExtractor(ABC): - """Interfaz para un extractor de texto.""" - - @abstractmethod - def extract_text(self, pdf_path: str, start_page: int = 0, end_page: int = None) -> list: - """Extrae texto de un archivo PDF. - - Args: - pdf_path (str): Ruta del archivo PDF. - start_page (int): Página de inicio. - end_page (int): Página final. - - Returns: - list: Lista de textos extraídos de las páginas especificadas. - """ - pass diff --git a/src/extract/pdf_extractor.py b/src/extract/pdf_extractor.py deleted file mode 100644 index f92fe7d..0000000 --- a/src/extract/pdf_extractor.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -import PyPDF2 -from abc import ABC, abstractmethod -from src.utils.logger import LoggerManager -from src.utils.decorators import handle_error -from src.utils.config import FILES_DIR -from .interface import ITextExtractor - - -class PDFTextExtractor(ITextExtractor): - @handle_error - def extract_text(self, pdf_filename: str, start_page: int = 0, end_page: int = None) -> list: - """ - Extrae texto de un archivo PDF ubicado en la carpeta 'files'. - - Parámetros: - - pdf_filename: Nombre del archivo PDF del que se extraerá el texto. - - start_page: Página inicial para extraer (cero-indexada). - - end_page: Página final para extraer (cero-indexada), o None para la última página. - - Retorna: - - list: Lista con el texto extraído de las páginas especificadas. - """ - pdf_path = os.path.join(FILES_DIR, pdf_filename) - - with open(pdf_path, 'rb') as file: - reader = PyPDF2.PdfReader(file) - end_page = end_page or len(reader.pages) - 1 - extracted_texts = [] - - for i in range(start_page, end_page + 1): - page_text = reader.pages[i].extract_text() or "" - extracted_texts.append(page_text) - LoggerManager.log_message(f"Texto extraído de la página {i + 1} del archivo: {pdf_filename}", level='info') - - LoggerManager.log_message(f"Texto extraído correctamente del archivo: {pdf_filename}", level='info') - return extracted_texts # Devolver una lista de textos por página \ No newline at end of file diff --git a/src/main.py b/src/main.py deleted file mode 100644 index 79231cf..0000000 --- a/src/main.py +++ /dev/null @@ -1,40 +0,0 @@ -from src.extract.pdf_extractor import extract_text_from_pdf -from src.translators.gpt_translator import GPTTranslator -from src.utils.file_utils import save_translation_to_file -from src.utils.config import LOG_FILE, LOGS_DIR, FILES_DIR -from src.utils.logger import LoggerManager -from src.utils.config import Settings - - -def main(): - # Cargar configuraciones - settings = Settings() - - # Configurar el logger - logger = LoggerManager(settings.app_name, settings.log_file) - - # Clave API para el traductor - api_key = settings.api_key - translator = GPTTranslator(api_key) - - # Crear una instancia del extractor - pdf_extractor = PDFTextExtractor() - - # Definir el nombre del archivo PDF y las páginas a extraer - pdf_filename = 'example.pdf' - start_page = 0 - end_page = None # Extraer hasta la última página - - # Extraer texto del PDF - extracted_texts = pdf_extractor.extract_text(pdf_filename, start_page, end_page) - - # Traducir cada texto extraído - for page_number, text in enumerate(extracted_texts, start=start_page): - if text.strip(): # Verificar que el texto no esté vacío - translated_text = translator.translate(text) - LoggerManager.log_message(f"Texto traducido de la página {page_number + 1}: {translated_text}", level='info') - - LoggerManager.log_message("Proceso de extracción y traducción completado.", level='info') - -if __name__ == '__main__': - main() diff --git a/src/translators/gpt_translator.py b/src/translators/gpt_translator.py deleted file mode 100644 index e85f8c0..0000000 --- a/src/translators/gpt_translator.py +++ /dev/null @@ -1,74 +0,0 @@ -import requests -import json -import openai -from src.utils.logger import LoggerManager -from src.utils.error_handling import handle_error -from .interface import ITranslator - -class GPTTranslator(ITranslator): - """Implementación del traductor utilizando la API de GPT.""" - - def __init__(self, api_key: str): - self.api_key = api_key - openai.api_key = self.api_key # Configura la clave API de OpenAI - - @handle_error - def translate(self, text: str) -> str: - """Traduce el texto usando la API de GPT. - - Args: - text (str): El texto que se va a traducir. - - Returns: - str: El texto traducido al español latino. - """ - LoggerManager.log_message("Iniciando la traducción con GPT.") - - try: - # Aquí llamamos a la función de traducción - translated_text = self._translate_text(text) - LoggerManager.log_message(f"Traducción completada exitosamente con GPT.", level='info') - return translated_text - - except Exception as e: - LoggerManager.log_message(f"Error durante la traducción: {e}", level='error') - return "Error en la traducción" # O maneja el error como prefieras - - def _translate_text(self, text: str) -> str: - """Realiza la traducción utilizando la API de OpenAI. - - Args: - text (str): El texto que se va a traducir. - - Returns: - str: El texto traducido. - """ - url = "https://api.openai.com/v1/chat/completions" - headers = { - "Content-Type": "application/json", - "Authorization": f"Bearer {self.api_key}" - } - - data = { - "model": "gpt-4", - "messages": [ - { - "role": "system", - "content": "Eres un traductor profesional." - }, - { - "role": "user", - "content": f"Traduce el siguiente texto al español latino:\n\n{text}" - } - ], - "max_tokens": 2000 - } - - response = requests.post(url, headers=headers, json=data) - response_data = response.json() - - if response.status_code == 200: - return response_data["choices"][0]["message"]["content"].strip() - else: - LoggerManager.log_message(f"Error en la API de OpenAI: {response_data.get('error', 'Error desconocido')}", level='error') - raise Exception("Error en la API de traducción") diff --git a/src/translators/interface.py b/src/translators/interface.py deleted file mode 100644 index c90de57..0000000 --- a/src/translators/interface.py +++ /dev/null @@ -1,16 +0,0 @@ -from abc import ABC, abstractmethod - -class ITranslator(ABC): - """Interfaz para un traductor.""" - - @abstractmethod - def translate(self, text: str) -> str: - """Método abstracto que todas las implementaciones deben sobrescribir. - - Args: - text (str): El texto que se va a traducir. - - Returns: - str: El texto traducido. - """ - pass diff --git a/src/utils/decorators.py b/src/utils/decorators.py deleted file mode 100644 index 0d2d08a..0000000 --- a/src/utils/decorators.py +++ /dev/null @@ -1,13 +0,0 @@ -from functools import wraps - -def handle_error(func): - """Decorador para manejar errores de manera uniforme""" - - @wraps(func) # Usamos @wraps para mantener el nombre, docstring y metadata de la función decorada - def wrapper(*args, **kwargs): - try: - return func(*args, **kwargs) - except Exception as e: - log_message(f"Error en la función {func.__name__}: {str(e)}", level='error') - raise e - return wrapper diff --git a/src/utils/file_utils.py b/src/utils/file_utils.py deleted file mode 100644 index 6f11e1a..0000000 --- a/src/utils/file_utils.py +++ /dev/null @@ -1,11 +0,0 @@ -from .logger import log_message - -def save_translation_to_file(text, output_path): - """Guarda el texto traducido en un archivo.""" - try: - with open(output_path, 'w') as file: - file.write(text) - log_message(f"Texto traducido guardado en {output_path}", level='info') - except Exception as e: - log_message(f"Error al guardar el archivo: {str(e)}", level='error') - raise RuntimeError(f"Error al guardar el archivo: {str(e)}") diff --git a/src/utils/logger.py b/src/utils/logger.py deleted file mode 100644 index 602ffe2..0000000 --- a/src/utils/logger.py +++ /dev/null @@ -1,34 +0,0 @@ -import logging -import os - -class LoggerManager: - _logger = None - - @classmethod - def setup_logger(cls, name=None, log_file=None, level=logging.INFO): - """Configura el logger con un formato estándar.""" - if cls._logger is None: # Evitar configurar el logger más de una vez - if name is None: - name = 'app_logger' - if log_file is None: - log_file = 'logs/app.log' # Puede ser configurado dinámicamente - - os.makedirs(os.path.dirname(log_file), exist_ok=True) - cls._logger = logging.getLogger(name) - cls._logger.setLevel(level) - handler = logging.FileHandler(log_file) - formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') - handler.setFormatter(formatter) - cls._logger.addHandler(handler) - - @classmethod - def log_message(cls, message, level='info'): - cls.setup_logger() # Se puede configurar aquí si se desea un logger predeterminado - if level == 'info': - cls._logger.info(message) - elif level == 'warning': - cls._logger.warning(message) - elif level == 'error': - cls._logger.error(message) - else: - cls._logger.debug(message) diff --git a/storage/Dockerfile b/storage/Dockerfile index 0b2f38f..ebb6126 100644 --- a/storage/Dockerfile +++ b/storage/Dockerfile @@ -15,12 +15,13 @@ WORKDIR /app COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt +# Copiar todo el contenido del directorio actual COPY . . -# Define environment variable +# Set environment variables ENV NAME StorageService -# Dockerfile para las pruebas incluya el directorio del proyecto en PYTHONPATH ENV PYTHONPATH=/app:$PYTHONPATH +ENV PYTHONUNBUFFERED 1 -# Comando para iniciar el servicio -CMD ["python", "main.py"] +# Cambiar al directorio padre de 'src' y ejecutar como módulo +CMD ["python", "-m", "src.main"] \ No newline at end of file diff --git a/storage/config.py b/storage/config.py new file mode 100644 index 0000000..f50f2e8 --- /dev/null +++ b/storage/config.py @@ -0,0 +1,33 @@ +import os +from typing import List + +class Config: + DEBUG: bool = os.getenv('DEBUG', 'False').lower() == 'true' + TESTING: bool = os.getenv('TESTING', 'False').lower() == 'true' + SECRET_KEY: str = os.getenv('SECRET_KEY', 'default-secret-key') + + # Database configuration + DB_HOST: str = os.getenv('DB_HOST', 'db') + DB_NAME: str = os.getenv('DB_NAME', 'pdf_translation_db') + DB_USER: str = os.getenv('DB_USER', 'pdf_translator') + DB_PASSWORD: str = os.getenv('DB_PASSWORD', 'secure_password') + + +# Configuraciones específicas para diferentes ambientes +class DevelopmentConfig(Config): + DEBUG = True + +class ProductionConfig(Config): + DEBUG = False + +# Elegir configuración basada en el ambiente +def get_config(): + env = os.getenv('FLASK_ENV', 'development') + if env == 'production': + return ProductionConfig() + return DevelopmentConfig() + + +class TestingConfig(Config): + TESTING = True + DB_NAME = 'test_pdf_translation_db' \ No newline at end of file diff --git a/storage/db.py b/storage/db.py deleted file mode 100644 index 6bbe847..0000000 --- a/storage/db.py +++ /dev/null @@ -1,90 +0,0 @@ -from sqlalchemy import create_engine, Column, Integer, String, DateTime, Enum -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import sessionmaker -from datetime import datetime -import os -from utils.decorators import handle_error -from utils.logger import LoggerManager - -Base = declarative_base() - -class Document(Base): - __tablename__ = 'documents' - - id = Column(Integer, primary_key=True) - filename = Column(String, nullable=False) - file_path = Column(String, nullable=False) - translated_path = Column(String) - file_size = Column(Integer, nullable=False) - upload_date = Column(DateTime, default=datetime.utcnow) - status = Column(Enum('uploaded', 'translating', 'translated', name='document_status'), default='uploaded') - source_language = Column(String, nullable=False) - target_language = Column(String, nullable=False) - -class DatabaseManager: - def __init__(self): - db_url = os.getenv('DATABASE_URL', 'postgresql://user:password@localhost/mydatabase') - self.engine = create_engine(db_url) - self.SessionLocal = sessionmaker(bind=self.engine) - - @handle_error - def create_tables(self): - Base.metadata.create_all(self.engine) - LoggerManager.log_message("Database tables created", level='info') - - @handle_error - def add_document(self, filename, file_path, file_size, source_language, target_language): - with self.SessionLocal() as session: - new_doc = Document( - filename=filename, - file_path=file_path, - file_size=file_size, - source_language=source_language, - target_language=target_language - ) - session.add(new_doc) - session.commit() - LoggerManager.log_message(f"Document added to database: {filename}", level='info') - return new_doc.id - - @handle_error - def get_document(self, doc_id): - with self.SessionLocal() as session: - doc = session.query(Document).filter(Document.id == doc_id).first() - if doc: - LoggerManager.log_message(f"Retrieved document: {doc_id}", level='info') - else: - LoggerManager.log_message(f"Document not found: {doc_id}", level='warning') - return doc - - @handle_error - def update_document_status(self, doc_id, new_status): - with self.SessionLocal() as session: - doc = session.query(Document).filter(Document.id == doc_id).first() - if doc: - doc.status = new_status - session.commit() - LoggerManager.log_message(f"Updated document status: {doc_id} to {new_status}", level='info') - return True - LoggerManager.log_message(f"Failed to update document status: {doc_id}", level='warning') - return False - - @handle_error - def update_document_translated_path(self, doc_id, translated_path): - with self.SessionLocal() as session: - doc = session.query(Document).filter(Document.id == doc_id).first() - if doc: - doc.translated_path = translated_path - doc.status = 'translated' - session.commit() - LoggerManager.log_message(f"Updated translated path for document: {doc_id}", level='info') - return True - LoggerManager.log_message(f"Failed to update translated path for document: {doc_id}", level='warning') - return False - - @handle_error - def get_documents_for_translation(self): - with self.SessionLocal() as session: - docs = session.query(Document).filter(Document.status == 'uploaded').all() - LoggerManager.log_message(f"Retrieved {len(docs)} documents for translation", level='info') - return docs \ No newline at end of file diff --git a/storage/main.py b/storage/main.py deleted file mode 100644 index 3d9569f..0000000 --- a/storage/main.py +++ /dev/null @@ -1,94 +0,0 @@ -import os -from dotenv import load_dotenv -from flask import Flask, jsonify -from utils.logger import LoggerManager -from storage import StorageService -from db import DatabaseManager -from file_system import FileSystemManager -from redis_client import RedisClient -from utils.config import REDIS_CHANNEL_NEW_DOCUMENT, REDIS_CHANNEL_TRANSLATION_COMPLETE - -# Cargar variables de entorno -load_dotenv() - -app = Flask(__name__) - -# Variables globales para los servicios -db_manager = None -file_manager = None -redis_client = None -storage_service = None - - -def initialize_services(): - """Inicializa y retorna todos los servicios necesarios.""" - global db_manager, file_manager, redis_client, storage_service - db_manager = DatabaseManager() - file_manager = FileSystemManager(os.getenv('STORAGE_PATH')) - redis_client = RedisClient() - storage_service = StorageService() - return db_manager, file_manager, redis_client, storage_service - - -@app.route('/health') -def health_check(): - # Verifica que todos los servicios estén operativos - if db_manager and file_manager and redis_client and storage_service: - # Puedes añadir más verificaciones aquí si es necesario - return jsonify({"status": "healthy"}), 200 - else: - return jsonify({"status": "unhealthy"}), 500 - -def setup_redis_listeners(redis_client, storage_service): - """Configura los listeners de Redis para manejar eventos.""" - def handle_new_document(message): - doc_id = message['data'] - LoggerManager.log_message(f"Nuevo documento recibido para traducción: {doc_id}") - # Aquí podrías iniciar el proceso de traducción o cualquier otra lógica necesaria - - def handle_translation_complete(message): - doc_id = message['data'] - LoggerManager.log_message(f"Traducción completada para el documento: {doc_id}") - # Aquí podrías actualizar el estado del documento o iniciar el proceso de notificación - - pubsub = redis_client.client.pubsub() - pubsub.subscribe(**{ - REDIS_CHANNEL_NEW_DOCUMENT: handle_new_document, - REDIS_CHANNEL_TRANSLATION_COMPLETE: handle_translation_complete - }) - return pubsub - -def main(): - # Configurar el logger - log_file = os.getenv('LOG_FILE') - log_level = os.getenv('LOG_LEVEL') - LoggerManager.setup_logger(name='storage_service', log_file=log_file, level=log_level) - - # Inicializar servicios - initialize_services() - - # Crear tablas de la base de datos si no existen - db_manager.create_tables() - - # Configurar listeners de Redis - pubsub = setup_redis_listeners(redis_client, storage_service) - - LoggerManager.log_message("Servicio de Almacenamiento iniciado", level='info') - - # Iniciar el servidor Flask en un hilo separado - from threading import Thread - Thread(target=lambda: app.run(host='0.0.0.0', port=5000, debug=False, use_reloader=False)).start() - - # Mantener el servicio en ejecución - try: - for message in pubsub.listen(): - if message['type'] == 'message': - # El manejo de mensajes se realiza en las funciones de callback - pass - except KeyboardInterrupt: - LoggerManager.log_message("Servicio de Almacenamiento detenido", level='info') - finally: - pubsub.close() - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/storage/pytest.ini b/storage/pytest.ini deleted file mode 100644 index decb2e8..0000000 --- a/storage/pytest.ini +++ /dev/null @@ -1,3 +0,0 @@ -[pytest] -pythonpath = . -testpaths = tests \ No newline at end of file diff --git a/storage/requirements.txt b/storage/requirements.txt index 0926947..2c9fca5 100644 Binary files a/storage/requirements.txt and b/storage/requirements.txt differ diff --git a/src/__init__.py b/storage/src/__init__.py similarity index 100% rename from src/__init__.py rename to storage/src/__init__.py diff --git a/src/extract/__init__.py b/storage/src/db/__init__.py similarity index 100% rename from src/extract/__init__.py rename to storage/src/db/__init__.py diff --git a/storage/src/db/base.py b/storage/src/db/base.py new file mode 100644 index 0000000..e6d5c98 --- /dev/null +++ b/storage/src/db/base.py @@ -0,0 +1,40 @@ +from sqlalchemy import create_engine +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker +from contextlib import contextmanager +from config import get_config +from sqlalchemy.pool import QueuePool + +Base = declarative_base() + +class BaseManager: + def __init__(self): + self.config = get_config() + self.engine = create_engine( + self.config.DATABASE_URL, + poolclass=QueuePool, + pool_size=5, + max_overflow=10 + ) + self.SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self.engine) + + @contextmanager + def get_session(self): + session = self.SessionLocal() + try: + yield session + session.commit() + except Exception: + session.rollback() + raise + finally: + session.close() + + def create_tables(self): + Base.metadata.create_all(self.engine) + +# Función de utilidad para obtener una sesión de base de datos +def get_db(): + manager = BaseManager() + with manager.get_session() as session: + yield session \ No newline at end of file diff --git a/src/translators/__init__.py b/storage/src/db/managers/__init__.py similarity index 100% rename from src/translators/__init__.py rename to storage/src/db/managers/__init__.py diff --git a/storage/src/db/managers/document_manager.py b/storage/src/db/managers/document_manager.py new file mode 100644 index 0000000..889480d --- /dev/null +++ b/storage/src/db/managers/document_manager.py @@ -0,0 +1,90 @@ +from ..base import BaseManager +from ..models.document import Document +from sqlalchemy.exc import SQLAlchemyError +from ...utils.logger import LoggerManager + +class DocumentManager(BaseManager): + def add_document(self, filename, file_path, file_size, source_language, target_language): + try: + with self.get_session() as session: + new_doc = Document( + filename=filename, + file_path=file_path, + file_size=file_size, + source_language=source_language, + target_language=target_language + ) + session.add(new_doc) + session.flush() + LoggerManager.log_message(f"Document added to database: {filename}", level='info') + return new_doc.id + except SQLAlchemyError as e: + LoggerManager.log_message(f"Error adding document to database: {str(e)}", level='error') + raise + + def get_document(self, doc_id): + try: + with self.get_session() as session: + doc = session.query(Document).filter(Document.id == doc_id).first() + if doc: + LoggerManager.log_message(f"Retrieved document: {doc_id}", level='info') + return doc.to_dict() + else: + LoggerManager.log_message(f"Document not found: {doc_id}", level='warning') + return None + except SQLAlchemyError as e: + LoggerManager.log_message(f"Error retrieving document: {str(e)}", level='error') + raise + + def update_document_status(self, doc_id, new_status): + try: + with self.get_session() as session: + doc = session.query(Document).filter(Document.id == doc_id).first() + if doc: + doc.status = new_status + LoggerManager.log_message(f"Updated document status: {doc_id} to {new_status}", level='info') + return True + LoggerManager.log_message(f"Failed to update document status: {doc_id}", level='warning') + return False + except SQLAlchemyError as e: + LoggerManager.log_message(f"Error updating document status: {str(e)}", level='error') + raise + + def update_document_translated_path(self, doc_id, translated_path): + try: + with self.get_session() as session: + doc = session.query(Document).filter(Document.id == doc_id).first() + if doc: + doc.translated_path = translated_path + doc.status = 'translated' + LoggerManager.log_message(f"Updated translated path for document: {doc_id}", level='info') + return True + LoggerManager.log_message(f"Failed to update translated path for document: {doc_id}", level='warning') + return False + except SQLAlchemyError as e: + LoggerManager.log_message(f"Error updating translated path: {str(e)}", level='error') + raise + + def get_documents_for_translation(self, limit=100, offset=0): + try: + with self.get_session() as session: + docs = session.query(Document).filter(Document.status == 'uploaded').limit(limit).offset(offset).all() + LoggerManager.log_message(f"Retrieved {len(docs)} documents for translation", level='info') + return [doc.to_dict() for doc in docs] + except SQLAlchemyError as e: + LoggerManager.log_message(f"Error retrieving documents for translation: {str(e)}", level='error') + raise + + def delete_document(self, doc_id): + try: + with self.get_session() as session: + doc = session.query(Document).filter(Document.id == doc_id).first() + if doc: + session.delete(doc) + LoggerManager.log_message(f"Deleted document: {doc_id}", level='info') + return True + LoggerManager.log_message(f"Failed to delete document: {doc_id}", level='warning') + return False + except SQLAlchemyError as e: + LoggerManager.log_message(f"Error deleting document: {str(e)}", level='error') + raise \ No newline at end of file diff --git a/src/utils/__init__.py b/storage/src/db/models/__init__.py similarity index 100% rename from src/utils/__init__.py rename to storage/src/db/models/__init__.py diff --git a/storage/src/db/models/document.py b/storage/src/db/models/document.py new file mode 100644 index 0000000..a6dbf7d --- /dev/null +++ b/storage/src/db/models/document.py @@ -0,0 +1,43 @@ +from sqlalchemy import Column, Integer, String, DateTime, Enum +from sqlalchemy.sql import func +from src.db.base import Base + + +class Document(Base): + __tablename__ = 'documents' + + id = Column(Integer, primary_key=True) + filename = Column(String, nullable=False) + file_path = Column(String, nullable=False) + translated_path = Column(String) + file_size = Column(Integer, nullable=False) + upload_date = Column(DateTime, default=func.now()) + status = Column(Enum('uploaded', 'translating', 'translated', name='document_status'), default='uploaded') + source_language = Column(String, nullable=False) + target_language = Column(String, nullable=False) + + def __repr__(self): + return f"" + + @classmethod + def from_dict(cls, data): + return cls( + filename=data['filename'], + file_path=data['file_path'], + file_size=data['file_size'], + source_language=data['source_language'], + target_language=data['target_language'] + ) + + def to_dict(self): + return { + 'id': self.id, + 'filename': self.filename, + 'file_path': self.file_path, + 'translated_path': self.translated_path, + 'file_size': self.file_size, + 'upload_date': self.upload_date.isoformat() if self.upload_date else None, + 'status': self.status, + 'source_language': self.source_language, + 'target_language': self.target_language + } \ No newline at end of file diff --git a/storage/file_system.py b/storage/src/file_system.py similarity index 96% rename from storage/file_system.py rename to storage/src/file_system.py index 47474e8..b0aba65 100644 --- a/storage/file_system.py +++ b/storage/src/file_system.py @@ -3,7 +3,7 @@ from datetime import datetime from utils.decorators import handle_error from utils.logger import LoggerManager -from io import BytesIO + class FileSystemManager: def __init__(self, base_path): @@ -15,7 +15,7 @@ def __init__(self, base_path): def _get_directory_path(self, date=None): """ Genera la ruta del directorio basada en la fecha. - + :param date: Fecha para la cual generar la ruta (default: fecha actual) :return: Ruta del directorio """ @@ -27,7 +27,7 @@ def _get_directory_path(self, date=None): def save_file(self, file_or_content, filename): """ Guarda un archivo o contenido en el sistema de archivos. - + :param file_or_content: Objeto de archivo, bytes, o string a guardar :param filename: Nombre del archivo :return: (path del archivo guardado, tamaño del archivo) @@ -35,10 +35,10 @@ def save_file(self, file_or_content, filename): timestamp = datetime.now() dir_path = self._get_directory_path(timestamp) os.makedirs(dir_path, exist_ok=True) - + safe_filename = f"{timestamp.strftime('%H%M%S')}_{filename}" file_path = os.path.join(dir_path, safe_filename) - + if isinstance(file_or_content, (str, bytes)): mode = 'w' if isinstance(file_or_content, str) else 'wb' with open(file_path, mode) as f: @@ -48,7 +48,7 @@ def save_file(self, file_or_content, filename): shutil.copyfileobj(file_or_content, buffer) else: raise ValueError("Unsupported file or content type") - + file_size = os.path.getsize(file_path) LoggerManager.log_message(f"File saved: {file_path}, Size: {file_size} bytes", level='info') return file_path, file_size @@ -57,7 +57,7 @@ def save_file(self, file_or_content, filename): def get_file(self, file_path): """ Recupera un archivo del sistema de archivos. - + :param file_path: Ruta completa al archivo :return: Contenido del archivo o None si no existe """ @@ -74,7 +74,7 @@ def get_file(self, file_path): def delete_file(self, file_path): """ Elimina un archivo del sistema de archivos. - + :param file_path: Ruta completa al archivo a eliminar :return: True si se eliminó correctamente, False si no existía """ @@ -92,13 +92,14 @@ def delete_file(self, file_path): def list_files(self, date=None): """ Lista todos los archivos en el directorio especificado por la fecha. - + :param date: Fecha para la cual listar los archivos (default: fecha actual) :return: Lista de rutas completas de archivos en el directorio """ dir_path = self._get_directory_path(date) if os.path.exists(dir_path): - files = [os.path.join(dir_path, f) for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f))] + files = [os.path.join(dir_path, f) for f in os.listdir(dir_path) if + os.path.isfile(os.path.join(dir_path, f))] LoggerManager.log_message(f"Listed {len(files)} files in {dir_path}", level='info') return files else: @@ -109,7 +110,7 @@ def list_files(self, date=None): def get_file_size(self, file_path): """ Obtiene el tamaño de un archivo. - + :param file_path: Ruta completa al archivo :return: Tamaño del archivo en bytes, o None si el archivo no existe """ @@ -124,7 +125,7 @@ def get_file_size(self, file_path): def _cleanup_empty_dirs(self, path): """ Elimina directorios vacíos recursivamente. - + :param path: Ruta del directorio a verificar """ if not os.path.isdir(path): diff --git a/storage/src/main.py b/storage/src/main.py new file mode 100644 index 0000000..96cdaa1 --- /dev/null +++ b/storage/src/main.py @@ -0,0 +1,68 @@ +from flask import Flask, request, jsonify +from src.storage_service import StorageService +from config import Config +from src.utils.logger import LoggerManager + +app = Flask(__name__) +config = Config() +logger = LoggerManager(__name__) +storage_service = StorageService() + +@app.before_first_request +def initialize_database(): + logger.log_message("Initializing database...", level='info') + storage_service.initialize_database() + logger.log_message("Database initialized successfully", level='info') + +@app.route('/documents/', methods=['POST']) +def create_document(): + try: + data = request.json + doc_id = storage_service.add_new_document( + data['filename'], + data['file_path'], + data['file_size'], + data['source_language'], + data['target_language'] + ) + return jsonify({"id": doc_id}), 201 + except KeyError as e: + logger.log_message(f"Missing required field: {str(e)}", level='error') + return jsonify({"error": "Missing required field"}), 400 + except Exception as e: + logger.log_message(f"Error creating document: {str(e)}", level='error') + return jsonify({"error": "Internal server error"}), 500 + +@app.route('/documents/', methods=['GET']) +def get_document(doc_id): + document = storage_service.get_document(doc_id) + if document is None: + return jsonify({"error": "Document not found"}), 404 + return jsonify(document) + +@app.route('/documents//status', methods=['PUT']) +def update_document_status(doc_id): + new_status = request.json.get('status') + if not new_status: + return jsonify({"error": "Status is required"}), 400 + success = storage_service.update_document_status(doc_id, new_status) + if not success: + return jsonify({"error": "Document not found"}), 404 + return jsonify({"message": "Status updated successfully"}) + +@app.route('/documents/for_translation', methods=['GET']) +def get_documents_for_translation(): + limit = request.args.get('limit', default=10, type=int) + offset = request.args.get('offset', default=0, type=int) + documents = storage_service.get_documents_for_translation(limit, offset) + return jsonify(documents) + +@app.errorhandler(Exception) +def handle_exception(e): + logger.log_message(f"Unhandled exception: {str(e)}", level='error') + return jsonify({"error": "Internal server error"}), 500 + +if __name__ == "__main__": + logger.log_message("Starting storage service...", level='info') + app.run(host="0.0.0.0", port=8000, debug=config.DEBUG) + logger.log_message("Storage service stopped", level='info') \ No newline at end of file diff --git a/storage/redis_client.py b/storage/src/redis_client.py similarity index 98% rename from storage/redis_client.py rename to storage/src/redis_client.py index dc7000a..c15beb2 100644 --- a/storage/redis_client.py +++ b/storage/src/redis_client.py @@ -3,6 +3,7 @@ from utils.decorators import handle_error from utils.logger import LoggerManager + class RedisClient: def __init__(self): redis_url = os.getenv('REDIS_URL', 'redis://localhost:6379/0') @@ -13,7 +14,7 @@ def __init__(self): def publish(self, channel, message): """ Publica un mensaje en un canal específico. - + :param channel: El canal en el que publicar :param message: El mensaje a publicar :return: El número de clientes que recibieron el mensaje @@ -26,7 +27,7 @@ def publish(self, channel, message): def subscribe(self, channel): """ Suscribe al cliente a un canal específico. - + :param channel: El canal al que suscribirse :return: Un objeto PubSub para escuchar mensajes """ @@ -39,7 +40,7 @@ def subscribe(self, channel): def get(self, key): """ Obtiene el valor asociado a una clave. - + :param key: La clave a buscar :return: El valor asociado a la clave, o None si no existe """ @@ -54,7 +55,7 @@ def get(self, key): def set(self, key, value, expiration=None): """ Establece un valor para una clave, con una expiración opcional. - + :param key: La clave a establecer :param value: El valor a asociar con la clave :param expiration: Tiempo de expiración en segundos (opcional) @@ -71,7 +72,7 @@ def set(self, key, value, expiration=None): def delete(self, key): """ Elimina una clave y su valor asociado. - + :param key: La clave a eliminar :return: El número de claves eliminadas (0 o 1) """ diff --git a/storage/src/storage_service.py b/storage/src/storage_service.py new file mode 100644 index 0000000..4dae8d7 --- /dev/null +++ b/storage/src/storage_service.py @@ -0,0 +1,76 @@ +from db.managers.document_manager import DocumentManager +from utils.logger import LoggerManager +from ..config import Config + +class StorageService: + def __init__(self): + self.config = Config() + self.doc_manager = DocumentManager() + self.logger = LoggerManager(__name__) + + def initialize_database(self): + try: + self.doc_manager.create_tables() + self.logger.log_message("Database tables created successfully", level='info') + except Exception as e: + self.logger.log_message(f"Error creating database tables: {str(e)}", level='error') + raise + + def add_new_document(self, filename, file_path, file_size, source_language, target_language): + try: + doc_id = self.doc_manager.add_document(filename, file_path, file_size, source_language, target_language) + self.logger.log_message(f"New document added with ID: {doc_id}", level='info') + return doc_id + except Exception as e: + self.logger.log_message(f"Error adding new document: {str(e)}", level='error') + raise + + def get_document(self, doc_id): + try: + document = self.doc_manager.get_document(doc_id) + if document: + self.logger.log_message(f"Retrieved document with ID: {doc_id}", level='info') + else: + self.logger.log_message(f"Document with ID {doc_id} not found", level='warning') + return document + except Exception as e: + self.logger.log_message(f"Error retrieving document: {str(e)}", level='error') + raise + + def update_document_status(self, doc_id, new_status): + try: + success = self.doc_manager.update_document_status(doc_id, new_status) + if success: + self.logger.log_message(f"Updated status of document {doc_id} to {new_status}", level='info') + else: + self.logger.log_message(f"Failed to update status of document {doc_id}", level='warning') + return success + except Exception as e: + self.logger.log_message(f"Error updating document status: {str(e)}", level='error') + raise + + def get_documents_for_translation(self, limit=10, offset=0): + try: + docs = self.doc_manager.get_documents_for_translation(limit, offset) + self.logger.log_message(f"Retrieved {len(docs)} documents for translation", level='info') + return docs + except Exception as e: + self.logger.log_message(f"Error retrieving documents for translation: {str(e)}", level='error') + raise + +# Example usage +if __name__ == "__main__": + storage_service = StorageService() + storage_service.initialize_database() + + # Add a new document + doc_id = storage_service.add_new_document("example.txt", "/path/to/file", 1024, "en", "es") + + # Retrieve the document + document = storage_service.get_document(doc_id) + + # Update document status + storage_service.update_document_status(doc_id, "translating") + + # Get documents for translation + docs_to_translate = storage_service.get_documents_for_translation(limit=10, offset=0) \ No newline at end of file diff --git a/storage/utils/__init__.py b/storage/src/utils/__init__.py similarity index 100% rename from storage/utils/__init__.py rename to storage/src/utils/__init__.py diff --git a/storage/utils/decorators.py b/storage/src/utils/decorators.py similarity index 100% rename from storage/utils/decorators.py rename to storage/src/utils/decorators.py diff --git a/storage/utils/config.py b/storage/src/utils/helpers.py similarity index 100% rename from storage/utils/config.py rename to storage/src/utils/helpers.py diff --git a/storage/src/utils/logger.py b/storage/src/utils/logger.py new file mode 100644 index 0000000..c9acd71 --- /dev/null +++ b/storage/src/utils/logger.py @@ -0,0 +1 @@ +import logging import os class LoggerManager: _logger = None @classmethod def setup_logger(cls, name=None, log_file=None, level=logging.INFO): """Configura el logger con un formato estándar.""" if cls._logger is None: # Evitar configurar el logger más de una vez if name is None: name = 'app_logger' if log_file is None: log_file = 'logs/app.log' # Puede ser configurado dinámicamente os.makedirs(os.path.dirname(log_file), exist_ok=True) cls._logger = logging.getLogger(name) cls._logger.setLevel(level) handler = logging.FileHandler(log_file) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) cls._logger.addHandler(handler) @classmethod def log_message(cls, message, level='info'): cls.setup_logger() # Se puede configurar aquí si se desea un logger predeterminado if level == 'info': cls._logger.info(message) elif level == 'warning': cls._logger.warning(message) elif level == 'error': cls._logger.error(message) else: cls._logger.debug(message) \ No newline at end of file diff --git a/storage/storage.py b/storage/storage.py deleted file mode 100644 index 82c2bfd..0000000 --- a/storage/storage.py +++ /dev/null @@ -1,97 +0,0 @@ -import os -from db import DatabaseManager -from file_system import FileSystemManager -from redis_client import RedisClient -from utils.decorators import handle_error -from utils.logger import LoggerManager -from utils.config import REDIS_CHANNEL_NEW_DOCUMENT, REDIS_CHANNEL_TRANSLATION_COMPLETE - -class StorageService: - def __init__(self): - self.db_manager = DatabaseManager() - self.file_manager = FileSystemManager(os.getenv('STORAGE_PATH', '/data')) - self.redis_client = RedisClient() - - @handle_error - def upload_pdf(self, file, filename, source_language, target_language): - """ - Paso 1: Subida de PDF - """ - LoggerManager.log_message(f"Uploading PDF: {filename}") - file_path, file_size = self.file_manager.save_file(file, filename) - doc_id = self.db_manager.add_document(filename, file_path, file_size, source_language, target_language) - - # Notificar al Servicio de Traducción - self.redis_client.publish(REDIS_CHANNEL_NEW_DOCUMENT, str(doc_id)) - - LoggerManager.log_message(f"PDF uploaded successfully. ID: {doc_id}") - return doc_id - - @handle_error - def store_translated_text(self, doc_id, translated_text): - """ - Paso 3: Almacenamiento del Documento Traducido - """ - LoggerManager.log_message(f"Storing translated text for document: {doc_id}") - - # Obtener el documento original - original_doc = self.db_manager.get_document(doc_id) - if not original_doc: - raise ValueError(f"Document with ID {doc_id} not found") - - # Guardar el texto traducido - translated_filename = f"translated_{os.path.basename(original_doc.file_path)}" - translated_path, _ = self.file_manager.save_file(translated_text, translated_filename) - - # Actualizar metadatos en PostgreSQL - self.db_manager.update_document_translated_path(doc_id, translated_path) - - # Notificar al Servicio de Notificación - self.redis_client.publish(REDIS_CHANNEL_TRANSLATION_COMPLETE, str(doc_id)) - - LoggerManager.log_message(f"Translated text stored for document: {doc_id}") - - @handle_error - def get_document(self, doc_id): - """ - Obtener información del documento - """ - LoggerManager.log_message(f"Retrieving document: {doc_id}") - doc = self.db_manager.get_document(doc_id) - if doc: - return { - 'id': doc.id, - 'filename': doc.filename, - 'file_path': doc.file_path, - 'translated_path': doc.translated_path, - 'status': doc.status, - 'source_language': doc.source_language, - 'target_language': doc.target_language - } - LoggerManager.log_message(f"Document not found: {doc_id}", level='warning') - return None - - @handle_error - def get_document_content(self, doc_id, translated=False): - """ - Obtener el contenido del documento (original o traducido) - """ - doc = self.db_manager.get_document(doc_id) - if not doc: - LoggerManager.log_message(f"Document not found: {doc_id}", level='warning') - return None - - if translated and not doc.translated_path: - LoggerManager.log_message(f"Translated version not available for document: {doc_id}", level='warning') - return None - - file_path = doc.translated_path if translated else doc.file_path - return self.file_manager.get_file(file_path) - - @handle_error - def get_documents_for_translation(self): - """ - Obtener documentos pendientes de traducción - """ - LoggerManager.log_message("Retrieving documents for translation") - return self.db_manager.get_documents_for_translation() diff --git a/storage/tests/conftest.py b/storage/tests/conftest.py index 3c71bde..6d11531 100644 --- a/storage/tests/conftest.py +++ b/storage/tests/conftest.py @@ -1,3 +1,33 @@ -import sys -import os -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) \ No newline at end of file +import pytest +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from src.db.base import BaseManager, Base +from src.db.managers.document_manager import DocumentManager +from config import TestingConfig + + +@pytest.fixture(scope="session") +def app_config(): + return TestingConfig() + +@pytest.fixture(scope="function") +def base_manager(app_config): + manager = BaseManager() + manager.config = app_config + manager.create_tables() + yield manager + Base.metadata.drop_all(manager.engine) + +@pytest.fixture(scope="function") +def db_session(base_manager): + with base_manager.get_session() as session: + yield session + +@pytest.fixture(scope="function") +def doc_manager(db_engine, app_config): + manager = DocumentManager() + manager.engine = db_engine + manager.SessionLocal = sessionmaker(bind=db_engine) + Base.metadata.create_all(db_engine) + yield manager + Base.metadata.drop_all(db_engine) \ No newline at end of file diff --git a/storage/tests/test_document_manager.py b/storage/tests/test_document_manager.py new file mode 100644 index 0000000..c9027a2 --- /dev/null +++ b/storage/tests/test_document_manager.py @@ -0,0 +1,86 @@ +import pytest +from src.db.managers.document_manager import DocumentManager +from sqlalchemy.exc import SQLAlchemyError +from unittest.mock import patch, MagicMock + +@pytest.fixture +def doc_manager(base_manager): + return DocumentManager() + +class TestDocumentManager: + + def test_add_document(self, doc_manager): + doc_id = doc_manager.add_document("test.txt", "/path/to/test.txt", 1024, "en", "es") + assert doc_id is not None + doc = doc_manager.get_document(doc_id) + assert doc['filename'] == "test.txt" + assert doc['status'] == "uploaded" + + def test_get_document(self, doc_manager): + doc_id = doc_manager.add_document("test.txt", "/path/to/test.txt", 1024, "en", "es") + doc = doc_manager.get_document(doc_id) + assert doc is not None + assert doc['filename'] == "test.txt" + + def test_get_nonexistent_document(self, doc_manager): + doc = doc_manager.get_document(999) + assert doc is None + + def test_update_document_status(self, doc_manager): + doc_id = doc_manager.add_document("test.txt", "/path/to/test.txt", 1024, "en", "es") + success = doc_manager.update_document_status(doc_id, "translating") + assert success + doc = doc_manager.get_document(doc_id) + assert doc['status'] == "translating" + + def test_update_nonexistent_document_status(self, doc_manager): + success = doc_manager.update_document_status(999, "translating") + assert not success + + def test_update_document_translated_path(self, doc_manager): + doc_id = doc_manager.add_document("test.txt", "/path/to/test.txt", 1024, "en", "es") + success = doc_manager.update_document_translated_path(doc_id, "/path/to/translated.txt") + assert success + doc = doc_manager.get_document(doc_id) + assert doc['translated_path'] == "/path/to/translated.txt" + assert doc['status'] == "translated" + + def test_get_documents_for_translation(self, doc_manager): + doc_manager.add_document("test1.txt", "/path/to/test1.txt", 1024, "en", "es") + doc_manager.add_document("test2.txt", "/path/to/test2.txt", 2048, "en", "fr") + docs = doc_manager.get_documents_for_translation() + assert len(docs) == 2 + assert all(doc['status'] == "uploaded" for doc in docs) + + def test_get_documents_for_translation_with_pagination(self, doc_manager): + for i in range(5): + doc_manager.add_document(f"test{i}.txt", f"/path/to/test{i}.txt", 1024, "en", "es") + docs = doc_manager.get_documents_for_translation(limit=2, offset=1) + assert len(docs) == 2 + assert docs[0]['filename'] == "test1.txt" + assert docs[1]['filename'] == "test2.txt" + + def test_delete_document(self, doc_manager): + doc_id = doc_manager.add_document("test.txt", "/path/to/test.txt", 1024, "en", "es") + success = doc_manager.delete_document(doc_id) + assert success + doc = doc_manager.get_document(doc_id) + assert doc is None + + def test_delete_nonexistent_document(self, doc_manager): + success = doc_manager.delete_document(999) + assert not success + + @patch('db.managers.document_manager.LoggerManager') + def test_add_document_logs_error(self, mock_logger, doc_manager): + with patch.object(doc_manager, 'get_session', side_effect=SQLAlchemyError("Test error")): + with pytest.raises(SQLAlchemyError): + doc_manager.add_document("test.txt", "/path/to/test.txt", 1024, "en", "es") + mock_logger.log_message.assert_called_with("Error adding document to database: Test error", level='error') + + @patch('db.managers.document_manager.LoggerManager') + def test_get_document_logs_error(self, mock_logger, doc_manager): + with patch.object(doc_manager, 'get_session', side_effect=SQLAlchemyError("Test error")): + with pytest.raises(SQLAlchemyError): + doc_manager.get_document(1) + mock_logger.log_message.assert_called_with("Error retrieving document: Test error", level='error') \ No newline at end of file diff --git a/storage/tests/test_integration.py b/storage/tests/test_integration.py new file mode 100644 index 0000000..d4a0c33 --- /dev/null +++ b/storage/tests/test_integration.py @@ -0,0 +1,22 @@ +class TestIntegration: + def test_full_document_workflow(self, doc_manager, app_config): + # Add a document + doc_id = doc_manager.add_document("integration_test.txt", f"{app_config.STORAGE_PATH}/integration_test.txt", + 1024, "en", "es") + + # Verify it was added correctly + doc = doc_manager.get_document(doc_id) + assert doc.filename == "integration_test.txt" + assert doc.status == "uploaded" + + # Update the status + success = doc_manager.update_document_status(doc_id, "translating") + assert success + + # Verify the status was updated + doc = doc_manager.get_document(doc_id) + assert doc.status == "translating" + + # Verify it's not in the list of documents to translate + docs_to_translate = doc_manager.get_documents_for_translation() + assert doc_id not in [d.id for d in docs_to_translate] \ No newline at end of file diff --git a/storage/tests/test_storage_service.py b/storage/tests/test_storage_service.py deleted file mode 100644 index 9226e8b..0000000 --- a/storage/tests/test_storage_service.py +++ /dev/null @@ -1,42 +0,0 @@ -import pytest -from storage import StorageService -from db import DatabaseManager -from file_system import FileSystemManager -from redis_client import RedisClient - -@pytest.fixture -def storage_service(): - # Configurar los mocks o instancias de prueba para las dependencias - db_manager = DatabaseManager() - file_manager = FileSystemManager("/tmp/test_storage") - redis_client = RedisClient() - - # Crear una instancia de StorageService con las dependencias - return StorageService(db_manager, file_manager, redis_client) - -def test_storage_service_initialization(storage_service): - assert storage_service is not None - assert isinstance(storage_service, StorageService) - -def test_store_document(storage_service): - doc_id = "test_doc_1" - content = "This is a test document" - result = storage_service.store_document(doc_id, content) - assert result is True - - # Verificar que el documento se puede recuperar - retrieved_content = storage_service.get_document(doc_id) - assert retrieved_content == content - -def test_delete_document(storage_service): - doc_id = "test_doc_2" - content = "This is another test document" - storage_service.store_document(doc_id, content) - - # Verificar que el documento se puede eliminar - result = storage_service.delete_document(doc_id) - assert result is True - - # Verificar que el documento ya no se puede recuperar - with pytest.raises(Exception): # Ajusta esto al tipo de excepción que lanzas cuando un documento no se encuentra - storage_service.get_document(doc_id) \ No newline at end of file diff --git a/storage/utils/logger.py b/storage/utils/logger.py deleted file mode 100644 index b14a629..0000000 --- a/storage/utils/logger.py +++ /dev/null @@ -1,33 +0,0 @@ -import logging -import os - -class LoggerManager: - _logger = None - - @classmethod - def setup_logger(cls, name=None, log_file=None, level=logging.INFO): - """Configura el logger con un formato estándar.""" - if cls._logger is None: # Evitar configurar el logger más de una vez - if name is None: - name = 'app_logger' - if log_file is None: - log_file = 'logs/app.log' # Puede ser configurado dinámicamente - os.makedirs(os.path.dirname(log_file), exist_ok=True) - cls._logger = logging.getLogger(name) - cls._logger.setLevel(level) - handler = logging.FileHandler(log_file) - formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') - handler.setFormatter(formatter) - cls._logger.addHandler(handler) - - @classmethod - def log_message(cls, message, level='info'): - cls.setup_logger() # Se puede configurar aquí si se desea un logger predeterminado - if level == 'info': - cls._logger.info(message) - elif level == 'warning': - cls._logger.warning(message) - elif level == 'error': - cls._logger.error(message) - else: - cls._logger.debug(message) \ No newline at end of file