From 09b2eb102be7776bcd040353e61e1f7aef3e89cf Mon Sep 17 00:00:00 2001 From: Damian Wysokinski Date: Mon, 1 Dec 2025 22:26:23 +0100 Subject: [PATCH 1/6] add devcontianer functionality for api --- .devcontainer/api/Dockerfile | 62 +++++++++++++++++++ .devcontainer/api/README.md | 95 +++++++++++++++++++++++++++++ .devcontainer/api/devcontainer.json | 92 ++++++++++++++++++++++++++++ .devcontainer/api/launch.json | 51 ++++++++++++++++ .devcontainer/api/tasks.json | 36 +++++++++++ 5 files changed, 336 insertions(+) create mode 100644 .devcontainer/api/Dockerfile create mode 100644 .devcontainer/api/README.md create mode 100644 .devcontainer/api/devcontainer.json create mode 100644 .devcontainer/api/launch.json create mode 100644 .devcontainer/api/tasks.json diff --git a/.devcontainer/api/Dockerfile b/.devcontainer/api/Dockerfile new file mode 100644 index 0000000..edfee93 --- /dev/null +++ b/.devcontainer/api/Dockerfile @@ -0,0 +1,62 @@ +# Use Python base image for development - using Debian Bookworm (stable) +FROM mcr.microsoft.com/devcontainers/python:3.10-bookworm + +# Set timezone +ENV TZ="Europe/Helsinki" +ENV PYTHONUNBUFFERED=1 + +# Install system dependencies including GDAL for geopandas +RUN apt-get update && apt-get install -y \ + postgresql-client \ + curl \ + wget \ + git \ + gdal-bin \ + libgdal-dev \ + libgeos-dev \ + libproj-dev \ + libspatialindex-dev \ + build-essential \ + && apt-get clean -y \ + && rm -rf /var/lib/apt/lists/* + +# Install Azure Functions Core Tools (only on AMD64, skip on ARM64/Apple Silicon) +RUN ARCH=$(dpkg --print-architecture) && \ + if [ "$ARCH" = "amd64" ]; then \ + curl https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > microsoft.gpg && \ + mv microsoft.gpg /etc/apt/trusted.gpg.d/microsoft.gpg && \ + sh -c 'echo "deb [arch=amd64] https://packages.microsoft.com/debian/12/prod bookworm main" > /etc/apt/sources.list.d/dotnetdev.list' && \ + apt-get update && \ + apt-get install -y azure-functions-core-tools-4 && \ + apt-get clean -y && \ + rm -rf /var/lib/apt/lists/*; \ + else \ + echo "Azure Functions Core Tools not available for $ARCH architecture. Skipping installation."; \ + fi + +# Set working directory +WORKDIR /workspace + +# Install Python dependencies +COPY python/requirements.txt /tmp/requirements.txt + +# Set GDAL environment variables for building Python packages +ENV GDAL_CONFIG=/usr/bin/gdal-config +ENV CPLUS_INCLUDE_PATH=/usr/include/gdal +ENV C_INCLUDE_PATH=/usr/include/gdal + +RUN pip install --upgrade pip \ + && pip install debugpy pytest pytest-asyncio httpx \ + && pip install -r /tmp/requirements.txt + +# Create .vscode-server directories with correct permissions +RUN mkdir -p /home/vscode/.vscode-server/bin \ + && mkdir -p /home/vscode/.vscode-server/data/Machine \ + && mkdir -p /home/vscode/.vscode-server/extensions \ + && mkdir -p /home/vscode/.vscode-server-insiders/bin \ + && mkdir -p /home/vscode/.vscode-server-insiders/extensions \ + && chown -R vscode:vscode /home/vscode + +# The vscode user already exists in the base image +# Switch to non-root user +USER vscode diff --git a/.devcontainer/api/README.md b/.devcontainer/api/README.md new file mode 100644 index 0000000..ee2aab9 --- /dev/null +++ b/.devcontainer/api/README.md @@ -0,0 +1,95 @@ +# HFP Analytics API Development Container + +This devcontainer provides a complete development environment for the HFP Analytics FastAPI application. + +## What's Included + +- **Python 3.10** with all project dependencies +- **Azure Functions Core Tools** for Azure development (AMD64 only) +- **PostgreSQL client** for database operations +- **TimescaleDB** database instance +- **Azurite** for local Azure Storage emulation +- **VS Code extensions** for Python, Azure, Docker, and more +- **ARM64 (Apple Silicon) support** - works on M1/M2/M3 Macs + +## Getting Started + +1. **Open in Dev Container** + - Open this folder in VS Code + - Press `F1` and select "Dev Containers: Reopen in Container" + - Wait for the container to build and start + +2. **Run the FastAPI App** + ```bash + cd python + uvicorn api.main:app --host 0.0.0.0 --port 8000 --reload + ``` + + Or use the VS Code task: `Terminal > Run Task > Run FastAPI Development Server` + +3. **Access the Application** + - FastAPI: http://localhost:8000 + - API Docs: http://localhost:8000/docs + - PostgreSQL: localhost:5433 (user: postgres, password: postgres) + - Azurite Blob: http://localhost:10100 + - Azurite Queue: http://localhost:10101 + - Azurite Table: http://localhost:10102 + +## Debugging + +Use the built-in debugger: +1. Set breakpoints in your code +2. Press `F5` or go to "Run and Debug" +3. Select "Python: FastAPI" configuration + +## Database Setup + +Initialize the database schema: +```bash +psql -h localhost -U postgres -d analytics -f db/sql/100_create_global_objects.sql +``` + +Or use the VS Code task: `Terminal > Run Task > Database: Run Migrations` + +## Running Tests + +```bash +cd python +pytest +``` + +Or use the VS Code task: `Terminal > Run Task > Run Tests` + +## Tips + +- All changes to Python files trigger auto-reload +- Use `.env` file for environment variables +- The workspace is mounted at `/workspace` +- Extensions and settings are pre-configured + +## Ports + +| Service | Port | Description | +|---------|------|-------------| +| FastAPI | 8000 | Main API server | +| PostgreSQL | 5433 | TimescaleDB database (remapped to avoid conflicts) | +| Azurite Blob | 10100 | Azure Blob Storage emulator (remapped) | +| Azurite Queue | 10101 | Azure Queue Storage emulator (remapped) | +| Azurite Table | 10102 | Azure Table Storage emulator (remapped) | + +## Troubleshooting + +**Container won't start:** +- Make sure Docker is running +- Check if ports 5433, 8000, 10100, 10101, 10102 are available +- Port 5433 is used instead of 5432 to avoid conflicts with local PostgreSQL +- Ports 10100-10102 are used for Azurite to avoid conflicts with main docker-compose +- Try rebuilding: `F1 > Dev Containers: Rebuild Container` + +**Database connection issues:** +- Wait for the database health check to pass (about 10-15 seconds) +- Check connection string in `.env` file + +**Import errors:** +- Make sure PYTHONPATH is set correctly: `export PYTHONPATH=/workspace/python` +- Reinstall dependencies: `pip install -r python/requirements.txt` diff --git a/.devcontainer/api/devcontainer.json b/.devcontainer/api/devcontainer.json new file mode 100644 index 0000000..014000a --- /dev/null +++ b/.devcontainer/api/devcontainer.json @@ -0,0 +1,92 @@ +{ + "name": "HFP Analytics API (FastAPI)", + "dockerComposeFile": [ + "../docker-compose.yml" + ], + "service": "api", + "workspaceFolder": "/workspace", + "shutdownAction": "stopCompose", + + // Forward ports for the API, database, and Azurite + "forwardPorts": [ + 8000, // FastAPI + 5433, // PostgreSQL (remapped to avoid conflict with host) + 10100, // Azurite Blob (remapped to avoid conflict) + 10101, // Azurite Queue (remapped to avoid conflict) + 10102 // Azurite Table (remapped to avoid conflict) + ], + + "portsAttributes": { + "8000": { + "label": "FastAPI", + "onAutoForward": "notify" + }, + "5433": { + "label": "PostgreSQL" + }, + "10100": { + "label": "Azurite Blob" + }, + "10101": { + "label": "Azurite Queue" + }, + "10102": { + "label": "Azurite Table" + } + }, + + "customizations": { + "vscode": { + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance", + "ms-python.black-formatter", + "ms-python.isort", + "ms-python.debugpy", + "ms-azuretools.vscode-azurefunctions", + "ms-azuretools.vscode-docker", + "mtxr.sqltools", + "mtxr.sqltools-driver-pg", + "GitHub.copilot", + "esbenp.prettier-vscode" + ], + "settings": { + "python.defaultInterpreterPath": "/usr/local/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": false, + "python.formatting.provider": "black", + "python.testing.pytestEnabled": true, + "python.testing.unittestEnabled": false, + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": "explicit" + }, + "[python]": { + "editor.defaultFormatter": "ms-python.python" + } + } + } + }, + + // Commands to run after container is created + "postCreateCommand": "pip install -r python/requirements.txt && pip install debugpy pytest pytest-asyncio httpx", + + // Commands to run when attaching to existing container + "postAttachCommand": "echo 'Container attached. Run: cd python && uvicorn api.main:app --host 0.0.0.0 --port 8000 --reload'", + + // Features to add to the container + "features": { + "ghcr.io/devcontainers/features/git:1": {}, + "ghcr.io/devcontainers/features/github-cli:1": {} + }, + + // Set environment variables + "containerEnv": { + "PYTHONPATH": "/workspace/python", + "PYTHONUNBUFFERED": "1", + "TZ": "Europe/Helsinki" + }, + + // Run as non-root user + "remoteUser": "vscode" +} \ No newline at end of file diff --git a/.devcontainer/api/launch.json b/.devcontainer/api/launch.json new file mode 100644 index 0000000..74444f3 --- /dev/null +++ b/.devcontainer/api/launch.json @@ -0,0 +1,51 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Python: FastAPI", + "type": "debugpy", + "request": "launch", + "module": "uvicorn", + "args": [ + "api.main:app", + "--host", + "0.0.0.0", + "--port", + "8000", + "--reload" + ], + "cwd": "${workspaceFolder}/python", + "env": { + "PYTHONPATH": "${workspaceFolder}/python" + }, + "jinja": true, + "justMyCode": false + }, + { + "name": "Python: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}/python", + "env": { + "PYTHONPATH": "${workspaceFolder}/python" + } + }, + { + "name": "Python: Pytest", + "type": "debugpy", + "request": "launch", + "module": "pytest", + "args": [ + "-v", + "${file}" + ], + "console": "integratedTerminal", + "cwd": "${workspaceFolder}/python", + "env": { + "PYTHONPATH": "${workspaceFolder}/python" + } + } + ] +} diff --git a/.devcontainer/api/tasks.json b/.devcontainer/api/tasks.json new file mode 100644 index 0000000..934352d --- /dev/null +++ b/.devcontainer/api/tasks.json @@ -0,0 +1,36 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Run FastAPI Development Server", + "type": "shell", + "command": "cd python && uvicorn api.main:app --host 0.0.0.0 --port 8000 --reload", + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "new" + }, + "isBackground": true, + "group": { + "kind": "build", + "isDefault": true + } + }, + { + "label": "Run Tests", + "type": "shell", + "command": "cd python && pytest", + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "shared" + } + }, + { + "label": "Database: Run Migrations", + "type": "shell", + "command": "psql -h localhost -U postgres -d analytics -f db/sql/100_create_global_objects.sql", + "problemMatcher": [] + } + ] +} From ac1c3f0fa6a96393054949ebafb9c875907cfef3 Mon Sep 17 00:00:00 2001 From: Damian Wysokinski Date: Tue, 2 Dec 2025 09:37:29 +0100 Subject: [PATCH 2/6] added docker-compose for devcontainer --- .devcontainer/docker-compose.yml | 56 ++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 .devcontainer/docker-compose.yml diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml new file mode 100644 index 0000000..7b500ec --- /dev/null +++ b/.devcontainer/docker-compose.yml @@ -0,0 +1,56 @@ +services: + api: + build: + context: .. + dockerfile: .devcontainer/api/Dockerfile + volumes: + # Mount the workspace + - ..:/workspace:cached + # Mount vscode server extensions for persistence + - vscode-server-extensions:/home/vscode/.vscode-server/extensions + - vscode-server-insiders-extensions:/home/vscode/.vscode-server-insiders/extensions + command: sleep infinity + network_mode: service:db + depends_on: + - db + - azurite + environment: + - PYTHONUNBUFFERED=1 + - TZ=Europe/Helsinki + env_file: + - ../.env + + db: + image: timescale/timescaledb:latest-pg16 + restart: unless-stopped + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: analytics + ports: + - "127.0.0.1:5433:5432" + volumes: + - postgres-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 5s + start_period: 5s + timeout: 20s + retries: 3 + + azurite: + image: mcr.microsoft.com/azure-storage/azurite + restart: unless-stopped + ports: + - "127.0.0.1:10100:10000" + - "127.0.0.1:10101:10001" + - "127.0.0.1:10102:10002" + volumes: + - azurite-data:/data + command: azurite --blobHost 0.0.0.0 --queueHost 0.0.0.0 --tableHost 0.0.0.0 + +volumes: + postgres-data: + azurite-data: + vscode-server-extensions: + vscode-server-insiders-extensions: From f61dba84229abd0e5dde5e588bece9ee3eaa51e5 Mon Sep 17 00:00:00 2001 From: Damian Wysokinski Date: Tue, 2 Dec 2025 13:14:35 +0100 Subject: [PATCH 3/6] added orchestrator config --- .devcontainer/api/devcontainer.json | 10 +- .devcontainer/docker-compose.yml | 25 ++- .devcontainer/importer/Dockerfile | 71 ++++++++ .devcontainer/importer/README.md | 208 +++++++++++++++++++++++ .devcontainer/importer/devcontainer.json | 105 ++++++++++++ .devcontainer/importer/launch.json | 99 +++++++++++ .devcontainer/importer/tasks.json | 86 ++++++++++ 7 files changed, 602 insertions(+), 2 deletions(-) create mode 100644 .devcontainer/importer/Dockerfile create mode 100644 .devcontainer/importer/README.md create mode 100644 .devcontainer/importer/devcontainer.json create mode 100644 .devcontainer/importer/launch.json create mode 100644 .devcontainer/importer/tasks.json diff --git a/.devcontainer/api/devcontainer.json b/.devcontainer/api/devcontainer.json index 014000a..ec8ef54 100644 --- a/.devcontainer/api/devcontainer.json +++ b/.devcontainer/api/devcontainer.json @@ -63,13 +63,21 @@ }, "[python]": { "editor.defaultFormatter": "ms-python.python" + }, + "launch": { + "configurations": [], + "compounds": [] + }, + "tasks": { + "version": "2.0.0", + "tasks": [] } } } }, // Commands to run after container is created - "postCreateCommand": "pip install -r python/requirements.txt && pip install debugpy pytest pytest-asyncio httpx", + "postCreateCommand": "pip install -r python/requirements.txt && pip install debugpy pytest pytest-asyncio httpx && mkdir -p /workspace/.vscode && cp /workspace/.devcontainer/shared/launch.json /workspace/.vscode/launch.json && cp /workspace/.devcontainer/shared/tasks.json /workspace/.vscode/tasks.json", // Commands to run when attaching to existing container "postAttachCommand": "echo 'Container attached. Run: cd python && uvicorn api.main:app --host 0.0.0.0 --port 8000 --reload'", diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 7b500ec..57b8b3d 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -10,7 +10,6 @@ services: - vscode-server-extensions:/home/vscode/.vscode-server/extensions - vscode-server-insiders-extensions:/home/vscode/.vscode-server-insiders/extensions command: sleep infinity - network_mode: service:db depends_on: - db - azurite @@ -20,6 +19,28 @@ services: env_file: - ../.env + importer: + build: + context: .. + dockerfile: .devcontainer/importer/Dockerfile + volumes: + # Mount the workspace + - ..:/workspace:cached + # Mount vscode server extensions for persistence + - vscode-server-extensions-importer:/home/azurefunc/.vscode-server/extensions + - vscode-server-insiders-extensions-importer:/home/azurefunc/.vscode-server-insiders/extensions + command: sleep infinity + depends_on: + - db + - azurite + environment: + - PYTHONUNBUFFERED=1 + - TZ=Europe/Helsinki + - AzureWebJobsScriptRoot=/home/site/wwwroot + - WEBSITE_HOSTNAME=localhost:7072 + env_file: + - ../.env + db: image: timescale/timescaledb:latest-pg16 restart: unless-stopped @@ -54,3 +75,5 @@ volumes: azurite-data: vscode-server-extensions: vscode-server-insiders-extensions: + vscode-server-extensions-importer: + vscode-server-insiders-extensions-importer: diff --git a/.devcontainer/importer/Dockerfile b/.devcontainer/importer/Dockerfile new file mode 100644 index 0000000..ece17eb --- /dev/null +++ b/.devcontainer/importer/Dockerfile @@ -0,0 +1,71 @@ +# Use Python base image for development - using Debian Bookworm (stable) +FROM mcr.microsoft.com/devcontainers/python:3.10-bookworm + +# Set timezone +ENV TZ="Europe/Helsinki" +ENV PYTHONUNBUFFERED=1 + +# Install system dependencies including GDAL for geopandas +RUN apt-get update && apt-get install -y \ + postgresql-client \ + curl \ + wget \ + git \ + gdal-bin \ + libgdal-dev \ + libgeos-dev \ + libproj-dev \ + libspatialindex-dev \ + build-essential \ + && apt-get clean -y \ + && rm -rf /var/lib/apt/lists/* + +# Install Node.js (needed for Azure Functions Core Tools on ARM64) +RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ + apt-get install -y nodejs && \ + apt-get clean -y && \ + rm -rf /var/lib/apt/lists/* + +# Install Azure Functions Core Tools +# - AMD64: Install from Microsoft repository +# - ARM64: Install via npm (official method for ARM/Apple Silicon) +RUN ARCH=$(dpkg --print-architecture) && \ + if [ "$ARCH" = "amd64" ]; then \ + curl https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > microsoft.gpg && \ + mv microsoft.gpg /etc/apt/trusted.gpg.d/microsoft.gpg && \ + sh -c 'echo "deb [arch=amd64] https://packages.microsoft.com/debian/12/prod bookworm main" > /etc/apt/sources.list.d/dotnetdev.list' && \ + apt-get update && \ + apt-get install -y azure-functions-core-tools-4 && \ + apt-get clean -y && \ + rm -rf /var/lib/apt/lists/*; \ + else \ + echo "Installing Azure Functions Core Tools via npm for $ARCH architecture..."; \ + npm install -g azure-functions-core-tools@4 --unsafe-perm true; \ + fi + +# Set working directory +WORKDIR /workspace + +# Install Python dependencies +COPY python/requirements.txt /tmp/requirements.txt + +# Set GDAL environment variables for building Python packages +ENV GDAL_CONFIG=/usr/bin/gdal-config +ENV CPLUS_INCLUDE_PATH=/usr/include/gdal +ENV C_INCLUDE_PATH=/usr/include/gdal + +RUN pip install --upgrade pip \ + && pip install debugpy pytest pytest-asyncio httpx \ + && pip install -r /tmp/requirements.txt + +# Create .vscode-server directories with correct permissions +RUN mkdir -p /home/vscode/.vscode-server/bin \ + && mkdir -p /home/vscode/.vscode-server/data/Machine \ + && mkdir -p /home/vscode/.vscode-server/extensions \ + && mkdir -p /home/vscode/.vscode-server-insiders/bin \ + && mkdir -p /home/vscode/.vscode-server-insiders/extensions \ + && chown -R vscode:vscode /home/vscode + +# The vscode user already exists in the base image +# Switch to non-root user +USER vscode diff --git a/.devcontainer/importer/README.md b/.devcontainer/importer/README.md new file mode 100644 index 0000000..c6c36e5 --- /dev/null +++ b/.devcontainer/importer/README.md @@ -0,0 +1,208 @@ +# HFP Analytics Importer Development Container + +This devcontainer provides a complete development environment for the HFP Analytics Azure Functions (Importer) application. + +## What's Included + +- **Python 3.10** with all project dependencies +- **Azure Functions Core Tools v4** for local Azure Functions development +- **PostgreSQL client** for database operations +- **TimescaleDB** database instance +- **Azurite** for local Azure Storage emulation +- **VS Code extensions** for Python, Azure Functions, Docker, and more +- **ARM64 (Apple Silicon) support** - works on M1/M2/M3 Macs + +## Getting Started + +1. **Open in Dev Container** + - Open the `.devcontainer/importer` folder in VS Code + - Press `F1` and select "Dev Containers: Reopen in Container" + - Wait for the container to build and start + +2. **Start Azure Functions** + + **Method 1: Using VS Code Task (Recommended)** + - Press `Ctrl+Shift+P` (or `Cmd+Shift+P` on Mac) + - Select `Tasks: Run Task` + - Choose `Start Azure Functions` + + **Method 2: Using Terminal** + ```bash + cd python + func start --port 7071 + ``` + + **Method 3: Using the built-in command** + ```bash + cd python + func host start --port 7071 + ``` + +3. **Access the Application** + - Azure Functions: http://localhost:7071 + - Function Admin: http://localhost:7071/admin/functions + - PostgreSQL: localhost:5433 (user: postgres, password: postgres) - shared with API + - Azurite Blob: http://localhost:10100 - shared with API + - Azurite Queue: http://localhost:10101 - shared with API + - Azurite Table: http://localhost:10102 - shared with API + +## Available Functions + +The importer includes these Azure Functions: +- **httpPreprocess** - HTTP-triggered preprocessing function +- **importer** - Main import function +- **analyzer** - Analysis function +- **preprocess** - Preprocessing function +- **httpStart** (Durable) - Durable orchestration starter +- **orchestrator** (Durable) - Durable orchestrator +- **reclusterAnalysisActivity** (Durable) - Recluster analysis activity +- **setStatusActivity** (Durable) - Status setter activity +- **getStatusActivity** (Durable) - Status getter activity + +## Testing Functions + +### Test HTTP Function +```bash +# Test httpPreprocess +curl -X POST http://localhost:7071/httpPreprocess + +# Or use VS Code task: +# Ctrl+Shift+P -> Tasks: Run Task -> Trigger HTTP Function (httpPreprocess) +``` + +### View Function Logs +Azure Functions logs appear in the terminal where you ran `func start`. + +## Debugging + +### Debug Azure Functions: +1. Start Azure Functions with the task or terminal command +2. Set breakpoints in your Python code +3. Use `F5` or go to "Run and Debug" +4. Select "Attach to Python Functions" +5. Trigger your function via HTTP, timer, or queue + +### Debug Current File: +1. Open a Python file +2. Set breakpoints +3. Press `F5` +4. Select "Python: Current File" + +## Database Setup + +Initialize the database schema: +```bash +psql -h localhost -U postgres -d analytics -f db/sql/100_create_global_objects.sql +``` + +Or use the VS Code task: `Terminal > Run Task > Database: Run Migrations` + +## Running Tests + +```bash +cd python +pytest +``` + +Or use the VS Code task: `Terminal > Run Task > Run Tests` + +## Azure Functions Development Tips + +1. **Hot Reload**: Azure Functions automatically reload when you save Python files + +2. **View Function List**: + ```bash + cd python + func list + ``` + +3. **Check Function Status**: + ```bash + curl http://localhost:7071/admin/functions + ``` + +4. **Environment Variables**: Configure in `.env` file at workspace root + +5. **Function Configuration**: Each function has a `function.json` in its folder + +6. **Host Configuration**: Global settings in `python/host.json` + +## Ports + +| Service | Port | Description | +|---------|------|-------------| +| Azure Functions | 7071 | Main Functions host | +| PostgreSQL | 5433 | TimescaleDB database (shared with API) | +| Azurite Blob | 10100 | Azure Blob Storage emulator (shared with API) | +| Azurite Queue | 10101 | Azure Queue Storage emulator (shared with API) | +| Azurite Table | 10102 | Azure Table Storage emulator (shared with API) | + +## Troubleshooting + +**Container won't start:** +- Make sure Docker is running +- Check if ports 5433, 7071, 10100, 10101, 10102 are available +- Database and Azurite are shared with the API container +- Ports are remapped to avoid conflicts with main docker-compose +- Try rebuilding: `F1 > Dev Containers: Rebuild Container` + +**Functions won't start:** +- Verify you're in the `python` directory: `cd python` +- Check `host.json` exists in `python/` folder +- Ensure all function folders have `function.json` +- Check Azure Functions Core Tools: `func --version` + +**Database connection issues:** +- Wait for the database health check to pass (about 10-15 seconds) +- Check connection string in `.env` file +- Use port 5434 (not 5432) from host machine + +**Import errors:** +- Make sure PYTHONPATH is set: `export PYTHONPATH=/workspace/python` +- Reinstall dependencies: `pip install -r python/requirements.txt` + +**Storage connection errors:** +- Check Azurite is running: `docker ps | grep azurite` +- Verify storage connection string in `.env` uses correct ports (10100-10102) +- Default connection string: `DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10100/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10101/devstoreaccount1;TableEndpoint=http://127.0.0.1:10102/devstoreaccount1;` + +## Project Structure + +``` +python/ +├── host.json # Azure Functions host configuration +├── requirements.txt # Python dependencies +├── importer/ # Importer function +│ ├── function.json +│ └── __init__.py +├── analyzer/ # Analyzer function +├── preprocess/ # Preprocess function +├── httpPreprocess/ # HTTP preprocess function +├── durable/ # Durable functions +│ ├── httpStart/ +│ ├── orchestrator/ +│ └── *Activity/ +└── common/ # Shared code +``` + +## Useful Commands + +```bash +# List all functions +cd python && func list + +# Start with verbose logging +cd python && func start --port 7071 --verbose + +# Install new dependency +pip install && pip freeze > python/requirements.txt + +# Run specific test file +cd python && pytest tests/test_importer.py -v + +# Check Azure Functions version +func --version + +# View function templates +func templates list +``` diff --git a/.devcontainer/importer/devcontainer.json b/.devcontainer/importer/devcontainer.json new file mode 100644 index 0000000..8985bdb --- /dev/null +++ b/.devcontainer/importer/devcontainer.json @@ -0,0 +1,105 @@ +{ + "name": "HFP Analytics Importer (Azure Functions)", + "dockerComposeFile": [ + "../docker-compose.yml" + ], + "service": "importer", + "workspaceFolder": "/workspace", + "shutdownAction": "stopCompose", + + // Forward ports for Azure Functions, database, and Azurite + // Note: Database and Azurite are shared with API container + "forwardPorts": [ + 7071, // Azure Functions + 5433, // PostgreSQL (shared with API) + 10100, // Azurite Blob (shared with API) + 10101, // Azurite Queue (shared with API) + 10102 // Azurite Table (shared with API) + ], + + "portsAttributes": { + "7071": { + "label": "Azure Functions", + "onAutoForward": "notify" + }, + "5433": { + "label": "PostgreSQL" + }, + "10100": { + "label": "Azurite Blob" + }, + "10101": { + "label": "Azurite Queue" + }, + "10102": { + "label": "Azurite Table" + } + }, + + "customizations": { + "vscode": { + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance", + "ms-python.black-formatter", + "ms-python.isort", + "ms-python.debugpy", + "ms-azuretools.vscode-azurefunctions", + "ms-azuretools.vscode-docker", + "mtxr.sqltools", + "mtxr.sqltools-driver-pg", + "GitHub.copilot", + "esbenp.prettier-vscode" + ], + "settings": { + "python.defaultInterpreterPath": "/usr/local/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": false, + "python.formatting.provider": "black", + "python.testing.pytestEnabled": true, + "python.testing.unittestEnabled": false, + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": "explicit" + }, + "[python]": { + "editor.defaultFormatter": "ms-python.python" + }, + "azureFunctions.deploySubpath": "python", + "azureFunctions.projectRuntime": "~4", + "azureFunctions.projectLanguage": "Python", + "launch": { + "configurations": [], + "compounds": [] + }, + "tasks": { + "version": "2.0.0", + "tasks": [] + } + } + } + }, + + // Commands to run after container is created + "postCreateCommand": "pip install -r python/requirements.txt && pip install debugpy pytest pytest-asyncio httpx && mkdir -p /workspace/.vscode && cp /workspace/.devcontainer/shared/launch.json /workspace/.vscode/launch.json && cp /workspace/.devcontainer/shared/tasks.json /workspace/.vscode/tasks.json && cd /workspace/python && ln -sf durable/httpStart httpStart && ln -sf durable/orchestrator orchestrator && ln -sf durable/getStatusActivity getStatusActivity && ln -sf durable/setStatusActivity setStatusActivity && ln -sf durable/reclusterAnalysisActivity reclusterAnalysisActivity", + + // Commands to run when attaching to existing container + "postAttachCommand": "echo 'Container attached. To start Azure Functions:\n cd python\n func start --port 7071'", + + // Features to add to the container + "features": { + "ghcr.io/devcontainers/features/git:1": {}, + "ghcr.io/devcontainers/features/github-cli:1": {}, + "ghcr.io/devcontainers/features/azure-cli:1": {} + }, + + // Set environment variables + "containerEnv": { + "PYTHONPATH": "/workspace/python", + "PYTHONUNBUFFERED": "1", + "TZ": "Europe/Helsinki" + }, + + // Run as non-root user + "remoteUser": "vscode" +} diff --git a/.devcontainer/importer/launch.json b/.devcontainer/importer/launch.json new file mode 100644 index 0000000..4eafcd7 --- /dev/null +++ b/.devcontainer/importer/launch.json @@ -0,0 +1,99 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Azure Functions: All Functions (Start All)", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 9091 + }, + "preLaunchTask": "Start Azure Functions with Debugger", + "pathMappings": [ + { + "localRoot": "${workspaceFolder}/python", + "remoteRoot": "/home/site/wwwroot" + } + ], + "justMyCode": false + }, + { + "name": "Azure Functions: Debug", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 9091 + }, + "preLaunchTask": "Start Azure Functions with Debugger", + "pathMappings": [ + { + "localRoot": "${workspaceFolder}/python", + "remoteRoot": "/home/site/wwwroot" + } + ] + }, + { + "name": "Attach to Python Functions", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 9091 + }, + "pathMappings": [ + { + "localRoot": "${workspaceFolder}/python", + "remoteRoot": "/home/site/wwwroot" + } + ] + }, + { + "name": "Azure Functions: Python (Local)", + "type": "debugpy", + "request": "launch", + "module": "azure_functions_worker", + "args": [ + "--host", + "127.0.0.1", + "--port", + "7071", + "--worker-id", + "test-worker" + ], + "cwd": "${workspaceFolder}/python", + "env": { + "PYTHONPATH": "${workspaceFolder}/python", + "AzureWebJobsScriptRoot": "${workspaceFolder}/python" + }, + "console": "integratedTerminal" + }, + { + "name": "Python: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}/python", + "env": { + "PYTHONPATH": "${workspaceFolder}/python" + } + }, + { + "name": "Python: Pytest", + "type": "debugpy", + "request": "launch", + "module": "pytest", + "args": [ + "-v", + "${file}" + ], + "console": "integratedTerminal", + "cwd": "${workspaceFolder}/python", + "env": { + "PYTHONPATH": "${workspaceFolder}/python" + } + } + ] +} diff --git a/.devcontainer/importer/tasks.json b/.devcontainer/importer/tasks.json new file mode 100644 index 0000000..ea5ea3c --- /dev/null +++ b/.devcontainer/importer/tasks.json @@ -0,0 +1,86 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Start Azure Functions", + "type": "shell", + "command": "cd python && func start --port 7071", + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "new" + }, + "isBackground": true, + "group": "none" + }, + { + "label": "Start Azure Functions (Debug Mode)", + "type": "shell", + "command": "cd python && func start --port 7071 --verbose", + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "new" + }, + "isBackground": true + }, + { + "label": "Start Azure Functions with Debugger", + "type": "shell", + "command": "cd python && func start --port 7071 --python-debug-port 9091", + "problemMatcher": [ + { + "pattern": [ + { + "regexp": ".", + "file": 1, + "location": 2, + "message": 3 + } + ], + "background": { + "activeOnStart": true, + "beginsPattern": ".*Azure Functions Core Tools.*", + "endsPattern": ".*(Worker process started and initialized|Host started|Job host started).*" + } + } + ], + "presentation": { + "reveal": "always", + "panel": "dedicated" + }, + "isBackground": true, + "runOptions": { + "instanceLimit": 1 + } + }, + { + "label": "Install Python Dependencies", + "type": "shell", + "command": "pip install -r python/requirements.txt", + "problemMatcher": [] + }, + { + "label": "Run Tests", + "type": "shell", + "command": "cd python && pytest", + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "shared" + } + }, + { + "label": "Database: Run Migrations", + "type": "shell", + "command": "psql -h localhost -U postgres -d analytics -f db/sql/100_create_global_objects.sql", + "problemMatcher": [] + }, + { + "label": "Trigger HTTP Function (httpPreprocess)", + "type": "shell", + "command": "curl -X POST http://localhost:7071/httpPreprocess", + "problemMatcher": [] + } + ] +} From 137998fe9062450abf865411db667935ed0398f7 Mon Sep 17 00:00:00 2001 From: Damian Wysokinski Date: Thu, 4 Dec 2025 10:58:03 +0200 Subject: [PATCH 4/6] added debugger for importer --- .devcontainer/importer/launch.json | 10 ++++++++++ .devcontainer/importer/tasks.json | 14 ++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/.devcontainer/importer/launch.json b/.devcontainer/importer/launch.json index 4eafcd7..4c4b3a7 100644 --- a/.devcontainer/importer/launch.json +++ b/.devcontainer/importer/launch.json @@ -94,6 +94,16 @@ "env": { "PYTHONPATH": "${workspaceFolder}/python" } + }, + { + "name": "Attach to Python Functions dwys", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 9091 + }, + "preLaunchTask": "func: host start" } ] } diff --git a/.devcontainer/importer/tasks.json b/.devcontainer/importer/tasks.json index ea5ea3c..af7248a 100644 --- a/.devcontainer/importer/tasks.json +++ b/.devcontainer/importer/tasks.json @@ -81,6 +81,20 @@ "type": "shell", "command": "curl -X POST http://localhost:7071/httpPreprocess", "problemMatcher": [] + }, + { + "type": "func", + "label": "func: host start", + "command": "start", + "problemMatcher": "$func-python-watch", + "isBackground": true, + // "dependsOn": "pip install (functions)", + "options": { + "cwd": "${workspaceFolder}/python", + "env": { + "languageWorkers__python__arguments": "-m debugpy --listen 127.0.0.1:9091" + } + } } ] } From cf5dd9e00e9dd1f3f5ce33067531105c52fc77bb Mon Sep 17 00:00:00 2001 From: Damian Wysokinski Date: Thu, 4 Dec 2025 11:40:40 +0200 Subject: [PATCH 5/6] added shared launches --- .devcontainer/shared/launch.json | 129 +++++++++++++++++++++++++++++++ .devcontainer/shared/tasks.json | 115 +++++++++++++++++++++++++++ 2 files changed, 244 insertions(+) create mode 100644 .devcontainer/shared/launch.json create mode 100644 .devcontainer/shared/tasks.json diff --git a/.devcontainer/shared/launch.json b/.devcontainer/shared/launch.json new file mode 100644 index 0000000..310be36 --- /dev/null +++ b/.devcontainer/shared/launch.json @@ -0,0 +1,129 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Python: FastAPI", + "type": "debugpy", + "request": "launch", + "module": "uvicorn", + "args": [ + "api.main:app", + "--host", + "0.0.0.0", + "--port", + "8000", + "--reload" + ], + "cwd": "${workspaceFolder}/python", + "env": { + "PYTHONPATH": "${workspaceFolder}/python" + }, + "jinja": true, + "justMyCode": false + }, + { + "name": "Azure Functions: All Functions (Start All)", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 9091 + }, + "preLaunchTask": "Start Azure Functions with Debugger", + "pathMappings": [ + { + "localRoot": "${workspaceFolder}/python", + "remoteRoot": "/home/site/wwwroot" + } + ], + "justMyCode": false + }, + { + "name": "Azure Functions: Debug", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 9091 + }, + "preLaunchTask": "Start Azure Functions with Debugger", + "pathMappings": [ + { + "localRoot": "${workspaceFolder}/python", + "remoteRoot": "/home/site/wwwroot" + } + ] + }, + { + "name": "Attach to Python Functions", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 9091 + }, + "pathMappings": [ + { + "localRoot": "${workspaceFolder}/python", + "remoteRoot": "/home/site/wwwroot" + } + ] + }, + { + "name": "Azure Functions: Python (Local)", + "type": "debugpy", + "request": "launch", + "module": "azure_functions_worker", + "args": [ + "--host", + "127.0.0.1", + "--port", + "7071", + "--worker-id", + "test-worker" + ], + "cwd": "${workspaceFolder}/python", + "env": { + "PYTHONPATH": "${workspaceFolder}/python", + "AzureWebJobsScriptRoot": "${workspaceFolder}/python" + }, + "console": "integratedTerminal" + }, + { + "name": "Python: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}/python", + "env": { + "PYTHONPATH": "${workspaceFolder}/python" + } + }, + { + "name": "Python: Pytest", + "type": "debugpy", + "request": "launch", + "module": "pytest", + "args": [ + "-v", + "${file}" + ], + "console": "integratedTerminal", + "cwd": "${workspaceFolder}/python", + "env": { + "PYTHONPATH": "${workspaceFolder}/python" + } + }, + { + "name": "Attach to Python Functions dwys", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost", + "port": 9091 + }, + "preLaunchTask": "func: host start" + } + ] +} diff --git a/.devcontainer/shared/tasks.json b/.devcontainer/shared/tasks.json new file mode 100644 index 0000000..2044278 --- /dev/null +++ b/.devcontainer/shared/tasks.json @@ -0,0 +1,115 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Run FastAPI Development Server", + "type": "shell", + "command": "cd python && uvicorn api.main:app --host 0.0.0.0 --port 8000 --reload", + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "new" + }, + "isBackground": true, + "group": { + "kind": "build", + "isDefault": true + } + }, + { + "label": "Start Azure Functions", + "type": "shell", + "command": "cd python && func start --port 7071", + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "new" + }, + "isBackground": true, + "group": "none" + }, + { + "label": "Start Azure Functions (Debug Mode)", + "type": "shell", + "command": "cd python && func start --port 7071 --verbose", + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "new" + }, + "isBackground": true + }, + { + "label": "Start Azure Functions with Debugger", + "type": "shell", + "command": "cd python && func start --port 7071 --python-debug-port 9091", + "problemMatcher": [ + { + "pattern": [ + { + "regexp": ".", + "file": 1, + "location": 2, + "message": 3 + } + ], + "background": { + "activeOnStart": true, + "beginsPattern": ".*Azure Functions Core Tools.*", + "endsPattern": ".*(Worker process started and initialized|Host started|Job host started).*" + } + } + ], + "presentation": { + "reveal": "always", + "panel": "dedicated" + }, + "isBackground": true, + "runOptions": { + "instanceLimit": 1 + } + }, + { + "label": "Install Python Dependencies", + "type": "shell", + "command": "pip install -r python/requirements.txt", + "problemMatcher": [] + }, + { + "label": "Run Tests", + "type": "shell", + "command": "cd python && pytest", + "problemMatcher": [], + "presentation": { + "reveal": "always", + "panel": "shared" + } + }, + { + "label": "Database: Run Migrations", + "type": "shell", + "command": "psql -h localhost -U postgres -d analytics -f db/sql/100_create_global_objects.sql", + "problemMatcher": [] + }, + { + "label": "Trigger HTTP Function (httpPreprocess)", + "type": "shell", + "command": "curl -X POST http://localhost:7071/httpPreprocess", + "problemMatcher": [] + }, + { + "type": "func", + "label": "func: host start", + "command": "start", + "problemMatcher": "$func-python-watch", + "isBackground": true, + // "dependsOn": "pip install (functions)", + "options": { + "cwd": "${workspaceFolder}/python", + "env": { + "languageWorkers__python__arguments": "-m debugpy --listen 127.0.0.1:9091" + } + } + } + ] +} From 4dbe9bd4b872e6441f4297d4326152ef1b96f0a3 Mon Sep 17 00:00:00 2001 From: Damian Wysokinski Date: Thu, 29 Jan 2026 13:20:40 +0200 Subject: [PATCH 6/6] added recluster tst_median change. And devcontainer for imported --- .devcontainer/importer/devcontainer.json | 5 ++- .devcontainer/importer/launch.json | 51 ------------------------ .devcontainer/shared/launch.json | 51 ------------------------ python/common/recluster.py | 12 +++--- 4 files changed, 10 insertions(+), 109 deletions(-) diff --git a/.devcontainer/importer/devcontainer.json b/.devcontainer/importer/devcontainer.json index 8985bdb..85b9c4f 100644 --- a/.devcontainer/importer/devcontainer.json +++ b/.devcontainer/importer/devcontainer.json @@ -44,7 +44,7 @@ "ms-python.black-formatter", "ms-python.isort", "ms-python.debugpy", - "ms-azuretools.vscode-azurefunctions", + "ms-azuretools.vscode-azurefunctions@1.19.0", "ms-azuretools.vscode-docker", "mtxr.sqltools", "mtxr.sqltools-driver-pg", @@ -90,7 +90,8 @@ "features": { "ghcr.io/devcontainers/features/git:1": {}, "ghcr.io/devcontainers/features/github-cli:1": {}, - "ghcr.io/devcontainers/features/azure-cli:1": {} + "ghcr.io/devcontainers/features/azure-cli:1": {}, + //vscode azure function extentions must be 1.19.0 }, // Set environment variables diff --git a/.devcontainer/importer/launch.json b/.devcontainer/importer/launch.json index 4c4b3a7..00d9aac 100644 --- a/.devcontainer/importer/launch.json +++ b/.devcontainer/importer/launch.json @@ -18,57 +18,6 @@ ], "justMyCode": false }, - { - "name": "Azure Functions: Debug", - "type": "debugpy", - "request": "attach", - "connect": { - "host": "localhost", - "port": 9091 - }, - "preLaunchTask": "Start Azure Functions with Debugger", - "pathMappings": [ - { - "localRoot": "${workspaceFolder}/python", - "remoteRoot": "/home/site/wwwroot" - } - ] - }, - { - "name": "Attach to Python Functions", - "type": "debugpy", - "request": "attach", - "connect": { - "host": "localhost", - "port": 9091 - }, - "pathMappings": [ - { - "localRoot": "${workspaceFolder}/python", - "remoteRoot": "/home/site/wwwroot" - } - ] - }, - { - "name": "Azure Functions: Python (Local)", - "type": "debugpy", - "request": "launch", - "module": "azure_functions_worker", - "args": [ - "--host", - "127.0.0.1", - "--port", - "7071", - "--worker-id", - "test-worker" - ], - "cwd": "${workspaceFolder}/python", - "env": { - "PYTHONPATH": "${workspaceFolder}/python", - "AzureWebJobsScriptRoot": "${workspaceFolder}/python" - }, - "console": "integratedTerminal" - }, { "name": "Python: Current File", "type": "debugpy", diff --git a/.devcontainer/shared/launch.json b/.devcontainer/shared/launch.json index 310be36..6573a63 100644 --- a/.devcontainer/shared/launch.json +++ b/.devcontainer/shared/launch.json @@ -38,57 +38,6 @@ ], "justMyCode": false }, - { - "name": "Azure Functions: Debug", - "type": "debugpy", - "request": "attach", - "connect": { - "host": "localhost", - "port": 9091 - }, - "preLaunchTask": "Start Azure Functions with Debugger", - "pathMappings": [ - { - "localRoot": "${workspaceFolder}/python", - "remoteRoot": "/home/site/wwwroot" - } - ] - }, - { - "name": "Attach to Python Functions", - "type": "debugpy", - "request": "attach", - "connect": { - "host": "localhost", - "port": 9091 - }, - "pathMappings": [ - { - "localRoot": "${workspaceFolder}/python", - "remoteRoot": "/home/site/wwwroot" - } - ] - }, - { - "name": "Azure Functions: Python (Local)", - "type": "debugpy", - "request": "launch", - "module": "azure_functions_worker", - "args": [ - "--host", - "127.0.0.1", - "--port", - "7071", - "--worker-id", - "test-worker" - ], - "cwd": "${workspaceFolder}/python", - "env": { - "PYTHONPATH": "${workspaceFolder}/python", - "AzureWebJobsScriptRoot": "${workspaceFolder}/python" - }, - "console": "integratedTerminal" - }, { "name": "Python: Current File", "type": "debugpy", diff --git a/python/common/recluster.py b/python/common/recluster.py index ae997ea..26f7708 100644 --- a/python/common/recluster.py +++ b/python/common/recluster.py @@ -12,13 +12,12 @@ import numpy as np import pandas as pd import zstandard as zstd -from sklearn.cluster import DBSCAN - from common.container_client import FlowAnalyticsContainerClient from common.database import pool from common.enums import ReclusterStatus from common.logger_util import CustomDbLogHandler from common.utils import get_season +from sklearn.cluster import DBSCAN logger = logging.getLogger("api") @@ -109,13 +108,16 @@ async def load_preprocess_files( dfs = [] decompressor = zstd.ZstdDecompressor() + + for r in results: compressed_data = r[0] decompressed_csv = decompressor.decompress(compressed_data) df = pd.read_csv(io.BytesIO(decompressed_csv), sep=";") - df["tst_median"] = pd.to_datetime(df["tst_median"], format="ISO8601").dt.tz_convert( - "UTC" - ) + if "tst_median" in df.columns: + df["tst_median"] = pd.to_datetime(df["tst_median"], format="ISO8601").dt.tz_convert( + "UTC" + ) dfs.append(df) if not dfs: