diff --git a/.devcontainer/api/Dockerfile b/.devcontainer/api/Dockerfile new file mode 100644 index 0000000..edfee93 --- /dev/null +++ b/.devcontainer/api/Dockerfile @@ -0,0 +1,62 @@ +# Use Python base image for development - using Debian Bookworm (stable) +FROM mcr.microsoft.com/devcontainers/python:3.10-bookworm + +# Set timezone +ENV TZ="Europe/Helsinki" +ENV PYTHONUNBUFFERED=1 + +# Install system dependencies including GDAL for geopandas +RUN apt-get update && apt-get install -y \ + postgresql-client \ + curl \ + wget \ + git \ + gdal-bin \ + libgdal-dev \ + libgeos-dev \ + libproj-dev \ + libspatialindex-dev \ + build-essential \ + && apt-get clean -y \ + && rm -rf /var/lib/apt/lists/* + +# Install Azure Functions Core Tools (only on AMD64, skip on ARM64/Apple Silicon) +RUN ARCH=$(dpkg --print-architecture) && \ + if [ "$ARCH" = "amd64" ]; then \ + curl https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > microsoft.gpg && \ + mv microsoft.gpg /etc/apt/trusted.gpg.d/microsoft.gpg && \ + sh -c 'echo "deb [arch=amd64] https://packages.microsoft.com/debian/12/prod bookworm main" > /etc/apt/sources.list.d/dotnetdev.list' && \ + apt-get update && \ + apt-get install -y azure-functions-core-tools-4 && \ + apt-get clean -y && \ + rm -rf /var/lib/apt/lists/*; \ + else \ + echo "Azure Functions Core Tools not available for $ARCH architecture. Skipping installation."; \ + fi + +# Set working directory +WORKDIR /workspace + +# Install Python dependencies +COPY python/requirements.txt /tmp/requirements.txt + +# Set GDAL environment variables for building Python packages +ENV GDAL_CONFIG=/usr/bin/gdal-config +ENV CPLUS_INCLUDE_PATH=/usr/include/gdal +ENV C_INCLUDE_PATH=/usr/include/gdal + +RUN pip install --upgrade pip \ + && pip install debugpy pytest pytest-asyncio httpx \ + && pip install -r /tmp/requirements.txt + +# Create .vscode-server directories with correct permissions +RUN mkdir -p /home/vscode/.vscode-server/bin \ + && mkdir -p /home/vscode/.vscode-server/data/Machine \ + && mkdir -p /home/vscode/.vscode-server/extensions \ + && mkdir -p /home/vscode/.vscode-server-insiders/bin \ + && mkdir -p /home/vscode/.vscode-server-insiders/extensions \ + && chown -R vscode:vscode /home/vscode + +# The vscode user already exists in the base image +# Switch to non-root user +USER vscode diff --git a/.devcontainer/api/README.md b/.devcontainer/api/README.md new file mode 100644 index 0000000..ee2aab9 --- /dev/null +++ b/.devcontainer/api/README.md @@ -0,0 +1,95 @@ +# HFP Analytics API Development Container + +This devcontainer provides a complete development environment for the HFP Analytics FastAPI application. + +## What's Included + +- **Python 3.10** with all project dependencies +- **Azure Functions Core Tools** for Azure development (AMD64 only) +- **PostgreSQL client** for database operations +- **TimescaleDB** database instance +- **Azurite** for local Azure Storage emulation +- **VS Code extensions** for Python, Azure, Docker, and more +- **ARM64 (Apple Silicon) support** - works on M1/M2/M3 Macs + +## Getting Started + +1. **Open in Dev Container** + - Open this folder in VS Code + - Press `F1` and select "Dev Containers: Reopen in Container" + - Wait for the container to build and start + +2. **Run the FastAPI App** + ```bash + cd python + uvicorn api.main:app --host 0.0.0.0 --port 8000 --reload + ``` + + Or use the VS Code task: `Terminal > Run Task > Run FastAPI Development Server` + +3. **Access the Application** + - FastAPI: http://localhost:8000 + - API Docs: http://localhost:8000/docs + - PostgreSQL: localhost:5433 (user: postgres, password: postgres) + - Azurite Blob: http://localhost:10100 + - Azurite Queue: http://localhost:10101 + - Azurite Table: http://localhost:10102 + +## Debugging + +Use the built-in debugger: +1. Set breakpoints in your code +2. Press `F5` or go to "Run and Debug" +3. Select "Python: FastAPI" configuration + +## Database Setup + +Initialize the database schema: +```bash +psql -h localhost -U postgres -d analytics -f db/sql/100_create_global_objects.sql +``` + +Or use the VS Code task: `Terminal > Run Task > Database: Run Migrations` + +## Running Tests + +```bash +cd python +pytest +``` + +Or use the VS Code task: `Terminal > Run Task > Run Tests` + +## Tips + +- All changes to Python files trigger auto-reload +- Use `.env` file for environment variables +- The workspace is mounted at `/workspace` +- Extensions and settings are pre-configured + +## Ports + +| Service | Port | Description | +|---------|------|-------------| +| FastAPI | 8000 | Main API server | +| PostgreSQL | 5433 | TimescaleDB database (remapped to avoid conflicts) | +| Azurite Blob | 10100 | Azure Blob Storage emulator (remapped) | +| Azurite Queue | 10101 | Azure Queue Storage emulator (remapped) | +| Azurite Table | 10102 | Azure Table Storage emulator (remapped) | + +## Troubleshooting + +**Container won't start:** +- Make sure Docker is running +- Check if ports 5433, 8000, 10100, 10101, 10102 are available +- Port 5433 is used instead of 5432 to avoid conflicts with local PostgreSQL +- Ports 10100-10102 are used for Azurite to avoid conflicts with main docker-compose +- Try rebuilding: `F1 > Dev Containers: Rebuild Container` + +**Database connection issues:** +- Wait for the database health check to pass (about 10-15 seconds) +- Check connection string in `.env` file + +**Import errors:** +- Make sure PYTHONPATH is set correctly: `export PYTHONPATH=/workspace/python` +- Reinstall dependencies: `pip install -r python/requirements.txt` diff --git a/.devcontainer/api/devcontainer.json b/.devcontainer/api/devcontainer.json new file mode 100644 index 0000000..ec8ef54 --- /dev/null +++ b/.devcontainer/api/devcontainer.json @@ -0,0 +1,100 @@ +{ + "name": "HFP Analytics API (FastAPI)", + "dockerComposeFile": [ + "../docker-compose.yml" + ], + "service": "api", + "workspaceFolder": "/workspace", + "shutdownAction": "stopCompose", + + // Forward ports for the API, database, and Azurite + "forwardPorts": [ + 8000, // FastAPI + 5433, // PostgreSQL (remapped to avoid conflict with host) + 10100, // Azurite Blob (remapped to avoid conflict) + 10101, // Azurite Queue (remapped to avoid conflict) + 10102 // Azurite Table (remapped to avoid conflict) + ], + + "portsAttributes": { + "8000": { + "label": "FastAPI", + "onAutoForward": "notify" + }, + "5433": { + "label": "PostgreSQL" + }, + "10100": { + "label": "Azurite Blob" + }, + "10101": { + "label": "Azurite Queue" + }, + "10102": { + "label": "Azurite Table" + } + }, + + "customizations": { + "vscode": { + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance", + "ms-python.black-formatter", + "ms-python.isort", + "ms-python.debugpy", + "ms-azuretools.vscode-azurefunctions", + "ms-azuretools.vscode-docker", + "mtxr.sqltools", + "mtxr.sqltools-driver-pg", + "GitHub.copilot", + "esbenp.prettier-vscode" + ], + "settings": { + "python.defaultInterpreterPath": "/usr/local/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": false, + "python.formatting.provider": "black", + "python.testing.pytestEnabled": true, + "python.testing.unittestEnabled": false, + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": "explicit" + }, + "[python]": { + "editor.defaultFormatter": "ms-python.python" + }, + "launch": { + "configurations": [], + "compounds": [] + }, + "tasks": { + "version": "2.0.0", + "tasks": [] + } + } + } + }, + + // Commands to run after container is created + "postCreateCommand": "pip install -r python/requirements.txt && pip install debugpy pytest pytest-asyncio httpx && mkdir -p /workspace/.vscode && cp /workspace/.devcontainer/shared/launch.json /workspace/.vscode/launch.json && cp /workspace/.devcontainer/shared/tasks.json /workspace/.vscode/tasks.json", + + // Commands to run when attaching to existing container + "postAttachCommand": "echo 'Container attached. Run: cd python && uvicorn api.main:app --host 0.0.0.0 --port 8000 --reload'", + + // Features to add to the container + "features": { + "ghcr.io/devcontainers/features/git:1": {}, + "ghcr.io/devcontainers/features/github-cli:1": {} + }, + + // Set environment variables + "containerEnv": { + "PYTHONPATH": "/workspace/python", + "PYTHONUNBUFFERED": "1", + "TZ": "Europe/Helsinki" + }, + + // Run as non-root user + "remoteUser": "vscode" +} \ No newline at end of file diff --git a/.devcontainer/api/launch.json b/.devcontainer/api/launch.json new file mode 100644 index 0000000..74444f3 --- /dev/null +++ b/.devcontainer/api/launch.json @@ -0,0 +1,51 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Python: FastAPI", + "type": "debugpy", + "request": "launch", + "module": "uvicorn", + "args": [ + "api.main:app", + "--host", + "0.0.0.0", + "--port", + "8000", + "--reload" + ], + "cwd": "${workspaceFolder}/python", + "env": { + "PYTHONPATH": "${workspaceFolder}/python" + }, + "jinja": true, + "justMyCode": false + }, + { + "name": "Python: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}/python", + "env": { + "PYTHONPATH": "${workspaceFolder}/python" + } + }, + { + "name": "Python: Pytest", + "type": "debugpy", + "request": "launch", + "module": "pytest", + "args": [ + "-v", + "${file}" + ], + "console": "integratedTerminal", + "cwd": "${workspaceFolder}/python", + "env": { + "PYTHONPATH": "${workspaceFolder}/python" + } + } + ] +} diff --git a/.devcontainer/api/tasks.json b/.devcontainer/api/tasks.json new file mode 100644 index 0000000..934352d --- /dev/null +++ b/.devcontainer/api/tasks.json @@ -0,0 +1,36 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Run FastAPI Development Server", + "type": "shell", + "command": "cd python && uvicorn api.main:app --host 0.0.0.0 --port 8000 --reload", + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "new" + }, + "isBackground": true, + "group": { + "kind": "build", + "isDefault": true + } + }, + { + "label": "Run Tests", + "type": "shell", + "command": "cd python && pytest", + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "shared" + } + }, + { + "label": "Database: Run Migrations", + "type": "shell", + "command": "psql -h localhost -U postgres -d analytics -f db/sql/100_create_global_objects.sql", + "problemMatcher": [] + } + ] +} diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml new file mode 100644 index 0000000..57b8b3d --- /dev/null +++ b/.devcontainer/docker-compose.yml @@ -0,0 +1,79 @@ +services: + api: + build: + context: .. + dockerfile: .devcontainer/api/Dockerfile + volumes: + # Mount the workspace + - ..:/workspace:cached + # Mount vscode server extensions for persistence + - vscode-server-extensions:/home/vscode/.vscode-server/extensions + - vscode-server-insiders-extensions:/home/vscode/.vscode-server-insiders/extensions + command: sleep infinity + depends_on: + - db + - azurite + environment: + - PYTHONUNBUFFERED=1 + - TZ=Europe/Helsinki + env_file: + - ../.env + + importer: + build: + context: .. + dockerfile: .devcontainer/importer/Dockerfile + volumes: + # Mount the workspace + - ..:/workspace:cached + # Mount vscode server extensions for persistence + - vscode-server-extensions-importer:/home/azurefunc/.vscode-server/extensions + - vscode-server-insiders-extensions-importer:/home/azurefunc/.vscode-server-insiders/extensions + command: sleep infinity + depends_on: + - db + - azurite + environment: + - PYTHONUNBUFFERED=1 + - TZ=Europe/Helsinki + - AzureWebJobsScriptRoot=/home/site/wwwroot + - WEBSITE_HOSTNAME=localhost:7072 + env_file: + - ../.env + + db: + image: timescale/timescaledb:latest-pg16 + restart: unless-stopped + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: analytics + ports: + - "127.0.0.1:5433:5432" + volumes: + - postgres-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 5s + start_period: 5s + timeout: 20s + retries: 3 + + azurite: + image: mcr.microsoft.com/azure-storage/azurite + restart: unless-stopped + ports: + - "127.0.0.1:10100:10000" + - "127.0.0.1:10101:10001" + - "127.0.0.1:10102:10002" + volumes: + - azurite-data:/data + command: azurite --blobHost 0.0.0.0 --queueHost 0.0.0.0 --tableHost 0.0.0.0 + +volumes: + postgres-data: + azurite-data: + vscode-server-extensions: + vscode-server-insiders-extensions: + vscode-server-extensions-importer: + vscode-server-insiders-extensions-importer: diff --git a/.devcontainer/importer/Dockerfile b/.devcontainer/importer/Dockerfile new file mode 100644 index 0000000..ece17eb --- /dev/null +++ b/.devcontainer/importer/Dockerfile @@ -0,0 +1,71 @@ +# Use Python base image for development - using Debian Bookworm (stable) +FROM mcr.microsoft.com/devcontainers/python:3.10-bookworm + +# Set timezone +ENV TZ="Europe/Helsinki" +ENV PYTHONUNBUFFERED=1 + +# Install system dependencies including GDAL for geopandas +RUN apt-get update && apt-get install -y \ + postgresql-client \ + curl \ + wget \ + git \ + gdal-bin \ + libgdal-dev \ + libgeos-dev \ + libproj-dev \ + libspatialindex-dev \ + build-essential \ + && apt-get clean -y \ + && rm -rf /var/lib/apt/lists/* + +# Install Node.js (needed for Azure Functions Core Tools on ARM64) +RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ + apt-get install -y nodejs && \ + apt-get clean -y && \ + rm -rf /var/lib/apt/lists/* + +# Install Azure Functions Core Tools +# - AMD64: Install from Microsoft repository +# - ARM64: Install via npm (official method for ARM/Apple Silicon) +RUN ARCH=$(dpkg --print-architecture) && \ + if [ "$ARCH" = "amd64" ]; then \ + curl https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > microsoft.gpg && \ + mv microsoft.gpg /etc/apt/trusted.gpg.d/microsoft.gpg && \ + sh -c 'echo "deb [arch=amd64] https://packages.microsoft.com/debian/12/prod bookworm main" > /etc/apt/sources.list.d/dotnetdev.list' && \ + apt-get update && \ + apt-get install -y azure-functions-core-tools-4 && \ + apt-get clean -y && \ + rm -rf /var/lib/apt/lists/*; \ + else \ + echo "Installing Azure Functions Core Tools via npm for $ARCH architecture..."; \ + npm install -g azure-functions-core-tools@4 --unsafe-perm true; \ + fi + +# Set working directory +WORKDIR /workspace + +# Install Python dependencies +COPY python/requirements.txt /tmp/requirements.txt + +# Set GDAL environment variables for building Python packages +ENV GDAL_CONFIG=/usr/bin/gdal-config +ENV CPLUS_INCLUDE_PATH=/usr/include/gdal +ENV C_INCLUDE_PATH=/usr/include/gdal + +RUN pip install --upgrade pip \ + && pip install debugpy pytest pytest-asyncio httpx \ + && pip install -r /tmp/requirements.txt + +# Create .vscode-server directories with correct permissions +RUN mkdir -p /home/vscode/.vscode-server/bin \ + && mkdir -p /home/vscode/.vscode-server/data/Machine \ + && mkdir -p /home/vscode/.vscode-server/extensions \ + && mkdir -p /home/vscode/.vscode-server-insiders/bin \ + && mkdir -p /home/vscode/.vscode-server-insiders/extensions \ + && chown -R vscode:vscode /home/vscode + +# The vscode user already exists in the base image +# Switch to non-root user +USER vscode diff --git a/.devcontainer/importer/README.md b/.devcontainer/importer/README.md new file mode 100644 index 0000000..c6c36e5 --- /dev/null +++ b/.devcontainer/importer/README.md @@ -0,0 +1,208 @@ +# HFP Analytics Importer Development Container + +This devcontainer provides a complete development environment for the HFP Analytics Azure Functions (Importer) application. + +## What's Included + +- **Python 3.10** with all project dependencies +- **Azure Functions Core Tools v4** for local Azure Functions development +- **PostgreSQL client** for database operations +- **TimescaleDB** database instance +- **Azurite** for local Azure Storage emulation +- **VS Code extensions** for Python, Azure Functions, Docker, and more +- **ARM64 (Apple Silicon) support** - works on M1/M2/M3 Macs + +## Getting Started + +1. **Open in Dev Container** + - Open the `.devcontainer/importer` folder in VS Code + - Press `F1` and select "Dev Containers: Reopen in Container" + - Wait for the container to build and start + +2. **Start Azure Functions** + + **Method 1: Using VS Code Task (Recommended)** + - Press `Ctrl+Shift+P` (or `Cmd+Shift+P` on Mac) + - Select `Tasks: Run Task` + - Choose `Start Azure Functions` + + **Method 2: Using Terminal** + ```bash + cd python + func start --port 7071 + ``` + + **Method 3: Using the built-in command** + ```bash + cd python + func host start --port 7071 + ``` + +3. **Access the Application** + - Azure Functions: http://localhost:7071 + - Function Admin: http://localhost:7071/admin/functions + - PostgreSQL: localhost:5433 (user: postgres, password: postgres) - shared with API + - Azurite Blob: http://localhost:10100 - shared with API + - Azurite Queue: http://localhost:10101 - shared with API + - Azurite Table: http://localhost:10102 - shared with API + +## Available Functions + +The importer includes these Azure Functions: +- **httpPreprocess** - HTTP-triggered preprocessing function +- **importer** - Main import function +- **analyzer** - Analysis function +- **preprocess** - Preprocessing function +- **httpStart** (Durable) - Durable orchestration starter +- **orchestrator** (Durable) - Durable orchestrator +- **reclusterAnalysisActivity** (Durable) - Recluster analysis activity +- **setStatusActivity** (Durable) - Status setter activity +- **getStatusActivity** (Durable) - Status getter activity + +## Testing Functions + +### Test HTTP Function +```bash +# Test httpPreprocess +curl -X POST http://localhost:7071/httpPreprocess + +# Or use VS Code task: +# Ctrl+Shift+P -> Tasks: Run Task -> Trigger HTTP Function (httpPreprocess) +``` + +### View Function Logs +Azure Functions logs appear in the terminal where you ran `func start`. + +## Debugging + +### Debug Azure Functions: +1. Start Azure Functions with the task or terminal command +2. Set breakpoints in your Python code +3. Use `F5` or go to "Run and Debug" +4. Select "Attach to Python Functions" +5. Trigger your function via HTTP, timer, or queue + +### Debug Current File: +1. Open a Python file +2. Set breakpoints +3. Press `F5` +4. Select "Python: Current File" + +## Database Setup + +Initialize the database schema: +```bash +psql -h localhost -U postgres -d analytics -f db/sql/100_create_global_objects.sql +``` + +Or use the VS Code task: `Terminal > Run Task > Database: Run Migrations` + +## Running Tests + +```bash +cd python +pytest +``` + +Or use the VS Code task: `Terminal > Run Task > Run Tests` + +## Azure Functions Development Tips + +1. **Hot Reload**: Azure Functions automatically reload when you save Python files + +2. **View Function List**: + ```bash + cd python + func list + ``` + +3. **Check Function Status**: + ```bash + curl http://localhost:7071/admin/functions + ``` + +4. **Environment Variables**: Configure in `.env` file at workspace root + +5. **Function Configuration**: Each function has a `function.json` in its folder + +6. **Host Configuration**: Global settings in `python/host.json` + +## Ports + +| Service | Port | Description | +|---------|------|-------------| +| Azure Functions | 7071 | Main Functions host | +| PostgreSQL | 5433 | TimescaleDB database (shared with API) | +| Azurite Blob | 10100 | Azure Blob Storage emulator (shared with API) | +| Azurite Queue | 10101 | Azure Queue Storage emulator (shared with API) | +| Azurite Table | 10102 | Azure Table Storage emulator (shared with API) | + +## Troubleshooting + +**Container won't start:** +- Make sure Docker is running +- Check if ports 5433, 7071, 10100, 10101, 10102 are available +- Database and Azurite are shared with the API container +- Ports are remapped to avoid conflicts with main docker-compose +- Try rebuilding: `F1 > Dev Containers: Rebuild Container` + +**Functions won't start:** +- Verify you're in the `python` directory: `cd python` +- Check `host.json` exists in `python/` folder +- Ensure all function folders have `function.json` +- Check Azure Functions Core Tools: `func --version` + +**Database connection issues:** +- Wait for the database health check to pass (about 10-15 seconds) +- Check connection string in `.env` file +- Use port 5434 (not 5432) from host machine + +**Import errors:** +- Make sure PYTHONPATH is set: `export PYTHONPATH=/workspace/python` +- Reinstall dependencies: `pip install -r python/requirements.txt` + +**Storage connection errors:** +- Check Azurite is running: `docker ps | grep azurite` +- Verify storage connection string in `.env` uses correct ports (10100-10102) +- Default connection string: `DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10100/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10101/devstoreaccount1;TableEndpoint=http://127.0.0.1:10102/devstoreaccount1;` + +## Project Structure + +``` +python/ +├── host.json # Azure Functions host configuration +├── requirements.txt # Python dependencies +├── importer/ # Importer function +│ ├── function.json +│ └── __init__.py +├── analyzer/ # Analyzer function +├── preprocess/ # Preprocess function +├── httpPreprocess/ # HTTP preprocess function +├── durable/ # Durable functions +│ ├── httpStart/ +│ ├── orchestrator/ +│ └── *Activity/ +└── common/ # Shared code +``` + +## Useful Commands + +```bash +# List all functions +cd python && func list + +# Start with verbose logging +cd python && func start --port 7071 --verbose + +# Install new dependency +pip install && pip freeze > python/requirements.txt + +# Run specific test file +cd python && pytest tests/test_importer.py -v + +# Check Azure Functions version +func --version + +# View function templates +func templates list +``` diff --git a/.devcontainer/importer/devcontainer.json b/.devcontainer/importer/devcontainer.json new file mode 100644 index 0000000..85b9c4f --- /dev/null +++ b/.devcontainer/importer/devcontainer.json @@ -0,0 +1,106 @@ +{ + "name": "HFP Analytics Importer (Azure Functions)", + "dockerComposeFile": [ + "../docker-compose.yml" + ], + "service": "importer", + "workspaceFolder": "/workspace", + "shutdownAction": "stopCompose", + + // Forward ports for Azure Functions, database, and Azurite + // Note: Database and Azurite are shared with API container + "forwardPorts": [ + 7071, // Azure Functions + 5433, // PostgreSQL (shared with API) + 10100, // Azurite Blob (shared with API) + 10101, // Azurite Queue (shared with API) + 10102 // Azurite Table (shared with API) + ], + + "portsAttributes": { + "7071": { + "label": "Azure Functions", + "onAutoForward": "notify" + }, + "5433": { + "label": "PostgreSQL" + }, + "10100": { + "label": "Azurite Blob" + }, + "10101": { + "label": "Azurite Queue" + }, + "10102": { + "label": "Azurite Table" + } + }, + + "customizations": { + "vscode": { + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance", + "ms-python.black-formatter", + "ms-python.isort", + "ms-python.debugpy", + "ms-azuretools.vscode-azurefunctions@1.19.0", + "ms-azuretools.vscode-docker", + "mtxr.sqltools", + "mtxr.sqltools-driver-pg", + "GitHub.copilot", + "esbenp.prettier-vscode" + ], + "settings": { + "python.defaultInterpreterPath": "/usr/local/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": false, + "python.formatting.provider": "black", + "python.testing.pytestEnabled": true, + "python.testing.unittestEnabled": false, + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": "explicit" + }, + "[python]": { + "editor.defaultFormatter": "ms-python.python" + }, + "azureFunctions.deploySubpath": "python", + "azureFunctions.projectRuntime": "~4", + "azureFunctions.projectLanguage": "Python", + "launch": { + "configurations": [], + "compounds": [] + }, + "tasks": { + "version": "2.0.0", + "tasks": [] + } + } + } + }, + + // Commands to run after container is created + "postCreateCommand": "pip install -r python/requirements.txt && pip install debugpy pytest pytest-asyncio httpx && mkdir -p /workspace/.vscode && cp /workspace/.devcontainer/shared/launch.json /workspace/.vscode/launch.json && cp /workspace/.devcontainer/shared/tasks.json /workspace/.vscode/tasks.json && cd /workspace/python && ln -sf durable/httpStart httpStart && ln -sf durable/orchestrator orchestrator && ln -sf durable/getStatusActivity getStatusActivity && ln -sf durable/setStatusActivity setStatusActivity && ln -sf durable/reclusterAnalysisActivity reclusterAnalysisActivity", + + // Commands to run when attaching to existing container + "postAttachCommand": "echo 'Container attached. To start Azure Functions:\n cd python\n func start --port 7071'", + + // Features to add to the container + "features": { + "ghcr.io/devcontainers/features/git:1": {}, + "ghcr.io/devcontainers/features/github-cli:1": {}, + "ghcr.io/devcontainers/features/azure-cli:1": {}, + //vscode azure function extentions must be 1.19.0 + }, + + // Set environment variables + "containerEnv": { + "PYTHONPATH": "/workspace/python", + "PYTHONUNBUFFERED": "1", + "TZ": "Europe/Helsinki" + }, + + // Run as non-root user + "remoteUser": "vscode" +} diff --git a/.devcontainer/importer/launch.json b/.devcontainer/importer/launch.json new file mode 100644 index 0000000..00d9aac --- /dev/null +++ b/.devcontainer/importer/launch.json @@ -0,0 +1,58 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Azure Functions: All Functions (Start All)", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 9091 + }, + "preLaunchTask": "Start Azure Functions with Debugger", + "pathMappings": [ + { + "localRoot": "${workspaceFolder}/python", + "remoteRoot": "/home/site/wwwroot" + } + ], + "justMyCode": false + }, + { + "name": "Python: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}/python", + "env": { + "PYTHONPATH": "${workspaceFolder}/python" + } + }, + { + "name": "Python: Pytest", + "type": "debugpy", + "request": "launch", + "module": "pytest", + "args": [ + "-v", + "${file}" + ], + "console": "integratedTerminal", + "cwd": "${workspaceFolder}/python", + "env": { + "PYTHONPATH": "${workspaceFolder}/python" + } + }, + { + "name": "Attach to Python Functions dwys", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 9091 + }, + "preLaunchTask": "func: host start" + } + ] +} diff --git a/.devcontainer/importer/tasks.json b/.devcontainer/importer/tasks.json new file mode 100644 index 0000000..af7248a --- /dev/null +++ b/.devcontainer/importer/tasks.json @@ -0,0 +1,100 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Start Azure Functions", + "type": "shell", + "command": "cd python && func start --port 7071", + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "new" + }, + "isBackground": true, + "group": "none" + }, + { + "label": "Start Azure Functions (Debug Mode)", + "type": "shell", + "command": "cd python && func start --port 7071 --verbose", + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "new" + }, + "isBackground": true + }, + { + "label": "Start Azure Functions with Debugger", + "type": "shell", + "command": "cd python && func start --port 7071 --python-debug-port 9091", + "problemMatcher": [ + { + "pattern": [ + { + "regexp": ".", + "file": 1, + "location": 2, + "message": 3 + } + ], + "background": { + "activeOnStart": true, + "beginsPattern": ".*Azure Functions Core Tools.*", + "endsPattern": ".*(Worker process started and initialized|Host started|Job host started).*" + } + } + ], + "presentation": { + "reveal": "always", + "panel": "dedicated" + }, + "isBackground": true, + "runOptions": { + "instanceLimit": 1 + } + }, + { + "label": "Install Python Dependencies", + "type": "shell", + "command": "pip install -r python/requirements.txt", + "problemMatcher": [] + }, + { + "label": "Run Tests", + "type": "shell", + "command": "cd python && pytest", + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "shared" + } + }, + { + "label": "Database: Run Migrations", + "type": "shell", + "command": "psql -h localhost -U postgres -d analytics -f db/sql/100_create_global_objects.sql", + "problemMatcher": [] + }, + { + "label": "Trigger HTTP Function (httpPreprocess)", + "type": "shell", + "command": "curl -X POST http://localhost:7071/httpPreprocess", + "problemMatcher": [] + }, + { + "type": "func", + "label": "func: host start", + "command": "start", + "problemMatcher": "$func-python-watch", + "isBackground": true, + // "dependsOn": "pip install (functions)", + "options": { + "cwd": "${workspaceFolder}/python", + "env": { + "languageWorkers__python__arguments": "-m debugpy --listen 127.0.0.1:9091" + } + } + } + ] +} diff --git a/.devcontainer/shared/launch.json b/.devcontainer/shared/launch.json new file mode 100644 index 0000000..6573a63 --- /dev/null +++ b/.devcontainer/shared/launch.json @@ -0,0 +1,78 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Python: FastAPI", + "type": "debugpy", + "request": "launch", + "module": "uvicorn", + "args": [ + "api.main:app", + "--host", + "0.0.0.0", + "--port", + "8000", + "--reload" + ], + "cwd": "${workspaceFolder}/python", + "env": { + "PYTHONPATH": "${workspaceFolder}/python" + }, + "jinja": true, + "justMyCode": false + }, + { + "name": "Azure Functions: All Functions (Start All)", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 9091 + }, + "preLaunchTask": "Start Azure Functions with Debugger", + "pathMappings": [ + { + "localRoot": "${workspaceFolder}/python", + "remoteRoot": "/home/site/wwwroot" + } + ], + "justMyCode": false + }, + { + "name": "Python: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}/python", + "env": { + "PYTHONPATH": "${workspaceFolder}/python" + } + }, + { + "name": "Python: Pytest", + "type": "debugpy", + "request": "launch", + "module": "pytest", + "args": [ + "-v", + "${file}" + ], + "console": "integratedTerminal", + "cwd": "${workspaceFolder}/python", + "env": { + "PYTHONPATH": "${workspaceFolder}/python" + } + }, + { + "name": "Attach to Python Functions dwys", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 9091 + }, + "preLaunchTask": "func: host start" + } + ] +} diff --git a/.devcontainer/shared/tasks.json b/.devcontainer/shared/tasks.json new file mode 100644 index 0000000..2044278 --- /dev/null +++ b/.devcontainer/shared/tasks.json @@ -0,0 +1,115 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Run FastAPI Development Server", + "type": "shell", + "command": "cd python && uvicorn api.main:app --host 0.0.0.0 --port 8000 --reload", + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "new" + }, + "isBackground": true, + "group": { + "kind": "build", + "isDefault": true + } + }, + { + "label": "Start Azure Functions", + "type": "shell", + "command": "cd python && func start --port 7071", + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "new" + }, + "isBackground": true, + "group": "none" + }, + { + "label": "Start Azure Functions (Debug Mode)", + "type": "shell", + "command": "cd python && func start --port 7071 --verbose", + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "new" + }, + "isBackground": true + }, + { + "label": "Start Azure Functions with Debugger", + "type": "shell", + "command": "cd python && func start --port 7071 --python-debug-port 9091", + "problemMatcher": [ + { + "pattern": [ + { + "regexp": ".", + "file": 1, + "location": 2, + "message": 3 + } + ], + "background": { + "activeOnStart": true, + "beginsPattern": ".*Azure Functions Core Tools.*", + "endsPattern": ".*(Worker process started and initialized|Host started|Job host started).*" + } + } + ], + "presentation": { + "reveal": "always", + "panel": "dedicated" + }, + "isBackground": true, + "runOptions": { + "instanceLimit": 1 + } + }, + { + "label": "Install Python Dependencies", + "type": "shell", + "command": "pip install -r python/requirements.txt", + "problemMatcher": [] + }, + { + "label": "Run Tests", + "type": "shell", + "command": "cd python && pytest", + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "shared" + } + }, + { + "label": "Database: Run Migrations", + "type": "shell", + "command": "psql -h localhost -U postgres -d analytics -f db/sql/100_create_global_objects.sql", + "problemMatcher": [] + }, + { + "label": "Trigger HTTP Function (httpPreprocess)", + "type": "shell", + "command": "curl -X POST http://localhost:7071/httpPreprocess", + "problemMatcher": [] + }, + { + "type": "func", + "label": "func: host start", + "command": "start", + "problemMatcher": "$func-python-watch", + "isBackground": true, + // "dependsOn": "pip install (functions)", + "options": { + "cwd": "${workspaceFolder}/python", + "env": { + "languageWorkers__python__arguments": "-m debugpy --listen 127.0.0.1:9091" + } + } + } + ] +} diff --git a/python/common/recluster.py b/python/common/recluster.py index 5533d8d..a9ecd85 100644 --- a/python/common/recluster.py +++ b/python/common/recluster.py @@ -12,13 +12,12 @@ import numpy as np import pandas as pd import zstandard as zstd -from sklearn.cluster import DBSCAN - from common.container_client import FlowAnalyticsContainerClient from common.database import pool from common.enums import ReclusterStatus from common.logger_util import CustomDbLogHandler from common.utils import get_season +from sklearn.cluster import DBSCAN logger = logging.getLogger("api") @@ -109,10 +108,16 @@ async def load_preprocess_files( dfs = [] decompressor = zstd.ZstdDecompressor() + + for r in results: compressed_data = r[0] decompressed_csv = decompressor.decompress(compressed_data) df = pd.read_csv(io.BytesIO(decompressed_csv), sep=";") + if "tst_median" in df.columns: + df["tst_median"] = pd.to_datetime(df["tst_median"], format="ISO8601").dt.tz_convert( + "UTC" + ) dfs.append(df) if not dfs: