diff --git a/.gitignore b/.gitignore index 30d74d2584..c788155960 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,53 @@ -test \ No newline at end of file +# --- OS / IDE --- +.DS_Store +Thumbs.db +.vscode/ +.idea/ + +# --- Python --- +__pycache__/ +.pytest_cache/ +.ruff_cache/ +*.py[cod] +*.pyo +*.pyd +.venv/ +venv/ +.env + +# --- Terraform --- +.terraform/ +*.tfstate +*.tfstate.* +crash.log +crash.*.log +*.tfvars +*.tfvars.json +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# --- Pulumi --- +.pulumi/ +pulumi.*.log + +# Stack config files often contain values you don't want in git (and may contain secrets) +Pulumi.*.yaml +!Pulumi.yaml + +# --- Secrets / keys --- +**/key.json +**/*.pem +**/*.p12 +**/*.pfx +**/*passphrase* + +# Ansible +*.retry +.vault_pass +ansible/inventory/*.pyc +__pycache__/ + +# GitHub Actions self-hosted runner (local) +actions-runner/ \ No newline at end of file diff --git a/README.md b/README.md index 371d51f456..771bd2192f 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # DevOps Engineering: Core Practices +[![Ansible Deployment](https://github.com/your-username/your-repo/actions/workflows/ansible-deploy.yml/badge.svg)](https://github.com/your-username/your-repo/actions/workflows/ansible-deploy.yml) + [![Labs](https://img.shields.io/badge/Labs-18-blue)](#labs) [![Exam](https://img.shields.io/badge/Exam-Optional-green)](#exam-alternative) [![Duration](https://img.shields.io/badge/Duration-18%20Weeks-lightgrey)](#course-roadmap) @@ -39,7 +41,7 @@ Master **production-grade DevOps practices** through hands-on labs. Build, conta | 16 | 16 | Cluster Monitoring | Kube-Prometheus, Init Containers | | — | **Exam Alternative Labs** | | | | 17 | 17 | Edge Deployment | Fly.io, Global Distribution | -| 18 | 18 | Decentralized Storage | 4EVERLAND, IPFS, Web3 | +| 18 | 18 | Reproducible Builds | Nix, Deterministic Builds, Flakes | --- @@ -61,7 +63,7 @@ Don't want to take the exam? Complete **both** bonus labs: | Lab | Topic | Points | |-----|-------|--------| | **Lab 17** | Fly.io Edge Deployment | 20 pts | -| **Lab 18** | 4EVERLAND & IPFS | 20 pts | +| **Lab 18** | Reproducible Builds with Nix | 20 pts | **Requirements:** - Complete both labs (17 + 18 = 40 pts, replaces exam) @@ -142,7 +144,7 @@ Each lab is worth **10 points** (main tasks) + **2.5 points** (bonus). - StatefulSets, Monitoring **Exam Alternative (Labs 17-18)** -- Fly.io, 4EVERLAND/IPFS +- Fly.io, Nix Reproducible Builds diff --git a/ansible/.vault_pass.sh b/ansible/.vault_pass.sh new file mode 100644 index 0000000000..6697c36319 --- /dev/null +++ b/ansible/.vault_pass.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +cat /mnt/c/Users/zagur/DevOps/DevOps-Core-Course/ansible/.vault_pass diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg new file mode 100644 index 0000000000..d3d413f464 --- /dev/null +++ b/ansible/ansible.cfg @@ -0,0 +1,12 @@ +[defaults] +inventory = inventory/hosts.ini +roles_path = ./roles +host_key_checking = False +remote_user = liza +retry_files_enabled = False +vault_password_file = .vault_pass + +[privilege_escalation] +become = True +become_method = sudo +become_user = root diff --git a/ansible/docs/LAB05.md b/ansible/docs/LAB05.md new file mode 100644 index 0000000000..437b606c94 --- /dev/null +++ b/ansible/docs/LAB05.md @@ -0,0 +1,530 @@ +# LAB05 - Ansible Fundamentals + +> Repo: `ansible/` +> Control node: WSL (Ubuntu) +> Target node: VirtualBox VM (Ubuntu) + +--- + +## 1. Architecture overview + +### Versions / environment +- **Ansible:** `ansible [core 2.16.3]` +- **Target VM OS:** Ubuntu **24.04.4 LTS** +- **Docker Engine:** 29.2.1 (installed by Ansible) +- **Docker Python SDK on VM:** 5.0.3 (required for `community.docker` modules) + +### Project structure +``` +ansible/ +├── ansible.cfg +├── inventory/ +│ └── hosts.ini +├── group_vars/ +│ └── all.yml # encrypted via Ansible Vault +├── playbooks/ +│ ├── provision.yml # runs roles: common + docker +│ └── deploy.yml # runs role: app_deploy +├── roles/ +│ ├── common/ +│ ├── docker/ +│ └── app_deploy/ +└── docs/ + └── LAB05.md +``` +This structure follows the lab requirement to use **roles** instead of a monolithic playbook. + +### Why roles (instead of monolithic playbooks)? +Roles make automation: +- **Reusable** (same role can be reused across playbooks/projects), +- **Readable** (clear separation of concerns), +- **Maintainable** (each role can be changed/tested independently), +- **Scalable** (easy to compose `common + docker + app_deploy`). + +--- + +## 2. Inventory & configuration + +### Inventory (`inventory/hosts.ini`) +Example: +```ini +[webservers] +vm1 ansible_host=172.21.112.1 ansible_port=2222 ansible_user=liza +``` + +### Ansible config (`ansible.cfg`) +Key settings: +```ini +[defaults] +inventory = inventory/hosts.ini +roles_path = roles +host_key_checking = False +retry_files_enabled = False +remote_user = liza +vault_password_file = .vault_pass + +[privilege_escalation] +become = True +become_method = sudo +become_user = root +``` +> Note: `vault_password_file` is one of the recommended approaches in the lab. + +### Connectivity test +```bash +ansible all -m ping +ansible webservers -a "uname -a" +``` + +--- + +## 3. Roles documentation + +The lab requires 3 roles: **common**, **docker**, **app_deploy**, each with tasks/handlers/defaults as needed. + +### 3.1 Role: `common` + +**Purpose:** baseline VM provisioning (APT cache, essential packages, timezone). + +**Key tasks (examples):** +- Update apt cache (`apt: update_cache=yes`) +- Install common packages (`apt: state=present`) +- Set timezone (optional) + +**Variables (defaults):** +- `common_packages`: list of packages to install (curl/git/vim/htop/python3-pip/etc.) + +**Handlers:** none (not needed). + +**Dependencies:** none. + +--- + +### 3.2 Role: `docker` + +**Purpose:** install and configure Docker Engine on Ubuntu, enable service, add user to docker group, install `python3-docker`. + +**Key tasks (examples):** +- Install prerequisites +- Add Docker GPG key +- Add Docker apt repository +- Install Docker packages +- Enable/start Docker service +- Add user to `docker` group +- Install `python3-docker` for Ansible Docker modules + +**Variables (defaults):** +- `docker_user`: user to add to docker group (e.g., `liza`) + +**Handlers:** +- `restart docker` (triggered when repo/key/packages change) + +**Dependencies:** none. + +--- + +### 3.3 Role: `app_deploy` + +**Purpose:** deploy a containerized Python app from Docker Hub: +- login to registry using vaulted credentials +- pull image +- run container with port mapping + restart policy +- verify readiness via HTTP health endpoint + +**Key tasks (implemented):** +1. `docker_login` (with `no_log: true`) +2. `docker_image` pull +3. `docker_container` run +4. short `pause` to avoid early health-check race +5. `uri` health check to `/health` + +**Variables (defaults + vaulted):** +- vaulted: `dockerhub_username`, `dockerhub_password` +- app: `docker_image`, `docker_image_tag`, `app_container_name` +- ports: `app_port`, `container_port` +- `restart_policy` (default: `unless-stopped`) +- `app_env` (optional env vars dict) + +**Handlers:** +- `restart app container` (used if container definition changes) + +**Dependencies:** +- Requires Docker installed (role `docker` should be applied first via `provision.yml`). + +--- + +## 4. Playbooks + +### 4.1 `playbooks/provision.yml` +```yaml +- name: Provision web servers + hosts: webservers + become: yes + roles: + - common + - docker +``` +Matches lab requirements: provisioning logic must live in roles. fileciteturn5file9 + +### 4.2 `playbooks/deploy.yml` +Because `group_vars/all.yml` is stored at the project root, I explicitly include it: +```yaml +- name: Deploy application + hosts: webservers + become: yes + + vars_files: + - ../group_vars/all.yml + + roles: + - app_deploy +``` + +--- + +## 5. Idempotency demonstration (Provision) + +The lab requires running provisioning twice and documenting that the second run results in **zero changes**. + +### First run (`changed > 0`) +Command: +```bash +ansible-playbook playbooks/provision.yml +``` + +Result (summary): +- `vm1 : ok=13 changed=6 failed=0` + +```bash +zagur@LAPTOP-JONCQBVT:~/projects/ansible$ ansible-playbook playbooks/provision.yml + +PLAY [Provision web servers] ****************************************************************** + +TASK [Gathering Facts] ************************************************************************ +ok: [vm1] + +TASK [common : Update apt cache] ************************************************************** +ok: [vm1] + +TASK [common : Install common packages] ******************************************************* +ok: [vm1] + +TASK [common : Set timezone (optional)] ******************************************************* +ok: [vm1] + +TASK [docker : Install prerequisites] ********************************************************* +ok: [vm1] + +TASK [docker : Ensure /etc/apt/keyrings exists] *********************************************** +ok: [vm1] + +TASK [docker : Add Docker GPG key] ************************************************************ +changed: [vm1] + +TASK [docker : Add Docker apt repository] ***************************************************** +changed: [vm1] + +TASK [docker : Install Docker packages] ******************************************************* +changed: [vm1] + +TASK [docker : Ensure Docker is enabled and running] ****************************************** +ok: [vm1] + +TASK [docker : Add user to docker group] ****************************************************** +changed: [vm1] + +TASK [docker : Install python docker SDK for Ansible docker modules] ************************** +changed: [vm1] + +RUNNING HANDLER [docker : restart docker] ***************************************************** +changed: [vm1] + +PLAY RECAP ************************************************************************************ +vm1 : ok=13 changed=6 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 +``` + +### Second run (`changed = 0`) +Command: +```bash +ansible-playbook playbooks/provision.yml +``` + +Result (summary): +- `vm1 : ok=12 changed=0 failed=0` + +```bash +zagur@LAPTOP-JONCQBVT:~/projects/ansible$ ansible-playbook playbooks/provision.yml + +PLAY [Provision web servers] ****************************************************************** + +TASK [Gathering Facts] ************************************************************************ +ok: [vm1] + +TASK [common : Update apt cache] ************************************************************** +ok: [vm1] + +TASK [common : Install common packages] ******************************************************* +ok: [vm1] + +TASK [common : Set timezone (optional)] ******************************************************* +ok: [vm1] + +TASK [docker : Install prerequisites] ********************************************************* +ok: [vm1] + +TASK [docker : Ensure /etc/apt/keyrings exists] *********************************************** +ok: [vm1] + +TASK [docker : Add Docker GPG key] ************************************************************ +ok: [vm1] + +TASK [docker : Add Docker apt repository] ***************************************************** +ok: [vm1] + +TASK [docker : Install Docker packages] ******************************************************* +ok: [vm1] + +TASK [docker : Ensure Docker is enabled and running] ****************************************** +ok: [vm1] + +TASK [docker : Add user to docker group] ****************************************************** +ok: [vm1] + +TASK [docker : Install python docker SDK for Ansible docker modules] ************************** +ok: [vm1] + +PLAY RECAP ************************************************************************************ +vm1 : ok=12 changed=0 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 +``` + +### Why the second run has no changes +All tasks use **stateful modules** (e.g., `apt state=present`, `service state=started enabled=yes`) so Ansible converges the system to the desired state and then reports **ok** on repeated runs. + +### Why my roles are idempotent +- **State-driven modules (declarative)** + - `apt` is used with `state: present` (packages are installed only if missing). + - `service`/`systemd` is used with `state: started` and `enabled: yes` (service is started/enabled only if needed). + - `file` is used with `state: directory` (directory is created only if missing, and permissions are enforced). +- **Docker modules that converge to a desired state** + - `community.docker.docker_image` with `source: pull` ensures the image exists locally (it won’t “re-pull” unnecessarily when nothing changed). + - `community.docker.docker_container` ensures the container is in the desired state (`started`, correct image, ports, env, restart policy). +- **Handlers instead of unconditional restarts** + - Docker is restarted **only when notified** (e.g., after repo/key/package changes), not on every run. This keeps repeated runs stable and fast. +- **Explicit readiness checks (without forcing changes)** + - `wait_for`/`pause`/`uri` validate that the service is up, but they do not modify system state. They prevent race conditions without breaking idempotency. + +### Evidence in logs +- `provision.yml` second run: `changed=0` (system is already converged). +- `deploy.yml` may show changes if a new image/container revision is applied; otherwise it should converge and remain stable. + +--- + +## 6. Ansible Vault usage + +The lab requires storing Docker Hub credentials in an encrypted file. + +### Vaulted variables file +File: `group_vars/all.yml` (encrypted) + +Example of encrypted header: +``` +$ANSIBLE_VAULT;1.1;AES256 +... +``` +![encrypted](/ansible/docs/screenshots/cat_vars.png) + +Decrypted contents (example / sanitized): +```yaml +dockerhub_username: "wkwtfigo" +dockerhub_password: "" + +app_name: "devops-info-service" +docker_image: "{{ dockerhub_username }}/{{ app_name }}" +docker_image_tag: "latest" +app_port: 5000 +container_port: 5000 +app_container_name: "{{ app_name }}" +``` + +### Vault password management +I use a local password file: +```bash +printf "my-vault-password" > .vault_pass +chmod 600 .vault_pass +``` + +`.vault_pass` is excluded from git: +``` +.vault_pass +*.retry +__pycache__/ +``` +This is aligned with the lab’s security guidance (do not commit secrets). + +Ansible Vault solves a practical security problem: we want to keep infrastructure code in Git (so it’s reviewable and reproducible), but we **must not** store secrets in plain text. + +### What Vault protects in this lab +- Docker Hub credentials: + - `dockerhub_username` + - `dockerhub_password` (recommended as an **access token**) + +### Why Vault matters +- **Prevents accidental secret leaks** + - Plain-text secrets can be leaked through git history, screenshots, CI logs, or shared archives. +- **Enables safe collaboration** + - The team can clone the repo and run playbooks, while only authorized people who have the vault password can decrypt secrets. +- **Audit-friendly** + - Secrets are encrypted at rest, while the rest of the automation stays transparent and reviewable in version control. +- **Supports secret rotation** + - If a token is rotated, you update the vaulted file once; roles/playbooks remain unchanged. + +### Best practices applied +- The vault password is stored locally in `.vault_pass` with `chmod 600`. +- `.vault_pass` is excluded from Git via `.gitignore`. +- `no_log: true` is used for the Docker login task so secrets don’t appear in Ansible output. +- If a token is exposed, it should be revoked immediately and replaced in Vault. + +--- + +## 7. Deployment verification + +### Deploy run +Command: +```bash +ansible-playbook playbooks/deploy.yml --vault-password-file .vault_pass +``` + +Result (summary): +- `vm1 : ok=7 changed=3 failed=0` +- Health check: `ok` (no retries after adding a small startup pause) + +```bash +zagur@LAPTOP-JONCQBVT:~/projects/ansible$ ansible-playbook playbooks/deploy.yml --vault-password-file .vault_pass + +PLAY [Deploy application] ******************************************************************************************************************************************************************************************* + +TASK [Gathering Facts] ********************************************************************************************************************************************************************************************** +ok: [vm1] + +TASK [app_deploy : Login to Docker Hub] ***************************************************************************************************************************************************************************** +changed: [vm1] + +TASK [app_deploy : Pull application image] ************************************************************************************************************************************************************************** +ok: [vm1] + +TASK [app_deploy : Ensure app container is running] ***************************************************************************************************************************************************************** +changed: [vm1] + +TASK [app_deploy : Give app time to start] ************************************************************************************************************************************************************************** +Pausing for 5 seconds +(ctrl+C then 'C' = continue early, ctrl+C then 'A' = abort) +ok: [vm1] + +TASK [app_deploy : Health check] ************************************************************************************************************************************************************************************ +ok: [vm1] + +RUNNING HANDLER [app_deploy : restart app container] **************************************************************************************************************************************************************** +changed: [vm1] + +PLAY RECAP ********************************************************************************************************************************************************************************************************** +vm1 : ok=7 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 + +zagur@LAPTOP-JONCQBVT:~/projects/ansible$ +``` + +### Container status +Command: +```bash +ansible webservers -a "docker ps" +``` + +Output: +```bash +zagur@LAPTOP-JONCQBVT:~/projects/ansible$ ansible webservers -a "docker ps" +vm1 | CHANGED | rc=0 >> +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +42e3cf173671 wkwtfigo/devops-info-service:latest "python app.py" 34 seconds ago Up 25 seconds (healthy) 0.0.0.0:5000->5000/tcp devops-info-service +``` + +### Health endpoint verification +Because the VM is accessed via **VirtualBox NAT + SSH port forwarding**, port `5000` is not directly reachable from the host without extra forwarding. +Verification options: + +**Option A (recommended in report): run curl on VM through Ansible** +```bash +ansible webservers -m shell -a "curl -sS -m 3 http://127.0.0.1:5000/health && echo" +ansible webservers -m shell -a "curl -sS -m 3 http://127.0.0.1:5000/ && echo" +``` + +```bash +zagur@LAPTOP-JONCQBVT:~/projects/ansible$ ansible webservers -m shell -a "curl -sS -m 3 http://127.0.0.1:5000/health && echo" +vm1 | CHANGED | rc=0 >> +{"status":"healthy","timestamp":"2026-02-20T14:29:09.512Z","uptime_seconds":63} +zagur@LAPTOP-JONCQBVT:~/projects/ansible$ +``` + +```bash +zagur@LAPTOP-JONCQBVT:~/projects/ansible$ ansible webservers -m shell -a "curl -sS -m 3 http://127.0.0.1:5000 && echo" +vm1 | CHANGED | rc=0 >> +{"service":{"name":"devops-info-service","version":"1.0.0","description":"DevOps course info service","framework":"FastAPI"},"system":{"hostname":"42e3cf173671","platform":"Linux","platform_version":"#14~24.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Jan 15 15:52:10 UTC 2","architecture":"x86_64","cpu_count":3,"python_version":"3.13.11"},"runtime":{"uptime_seconds":90,"uptime_human":"0 hours, 1 minutes","current_time":"2026-02-20T14:29:36.772Z","timezone":"UTC"},"request":{"client_ip":"172.17.0.1","user_agent":"curl/8.5.0","method":"GET","path":"/"},"endpoints":[{"path":"/","method":"GET","description":"Service information"},{"path":"/health","method":"GET","description":"Health check"}]} +zagur@LAPTOP-JONCQBVT:~/projects/ansible$ +``` + +**Option B: SSH tunnel** +```bash +ssh -p 2222 -L 5000:127.0.0.1:5000 liza@172.21.112.1 +# in another terminal: +curl http://127.0.0.1:5000/health +curl http://127.0.0.1:5000/ +``` + +--- + +## 8. Key decisions (short answers) + +**Why use roles instead of plain playbooks?** +Roles enforce a clean structure, encourage reuse, and keep playbooks minimal and readable. + +**How do roles improve reusability?** +Each role encapsulates one responsibility (baseline setup, Docker, app deploy) and can be reused across environments by changing variables. + +**What makes a task idempotent?** +A task is idempotent when re-running it keeps the system in the same desired state and Ansible reports `ok` instead of `changed`. + +**How do handlers improve efficiency?** +Handlers run only when notified (e.g., restart Docker only if packages/repo change), reducing unnecessary restarts. + +**Why is Ansible Vault necessary?** +It allows committing configuration to VCS while keeping secrets encrypted and safe, meeting security requirements. + +--- + +## 9. Challenges & solutions (brief) + +- **Ansible ignored `ansible.cfg` (“world writable directory”)** + Moved project to WSL Linux filesystem and fixed permissions (`chmod go-w`) so Ansible loads config. + +- **`dockerhub_username` undefined in deploy role** + `group_vars/all.yml` was not auto-loaded due to playbook path; resolved by `vars_files: ../group_vars/all.yml`. + +- **Health check had a retry on first attempt** + Added a short `pause` before HTTP health check to avoid race condition during app startup. + +--- + +## 10. How to reproduce + +```bash +# 1) Provision VM +ansible-playbook playbooks/provision.yml + +# 2) Run again to show idempotency +ansible-playbook playbooks/provision.yml + +# 3) Deploy app +ansible-playbook playbooks/deploy.yml --vault-password-file .vault_pass + +# 4) Verify +ansible webservers -a "docker ps" +ansible webservers -m shell -a "curl -sS -m 3 http://127.0.0.1:5000/health && echo" +``` \ No newline at end of file diff --git a/ansible/docs/LAB06.md b/ansible/docs/LAB06.md new file mode 100644 index 0000000000..1105569969 --- /dev/null +++ b/ansible/docs/LAB06.md @@ -0,0 +1,1030 @@ +# LAB06 — Advanced Ansible & CI/CD + +> **Tech stack:** Ansible, Ansible Vault, Docker, Docker Compose (v2), GitHub Actions +> **Goal:** Refactor roles using **blocks + tags**, migrate deployment to **Docker Compose**, add **role dependencies**, implement **wipe logic**, and automate deployment with **CI/CD**. + +--- + +## 1). Overview + +In this lab I improved my Ansible project with advanced practices: + +- Refactored `common` and `docker` roles using **block / rescue / always**. +- Implemented a consistent **tag strategy** to run or skip specific parts of playbooks. +- Migrated deployment from `docker_container` (“docker run”-style) to **Docker Compose** using a Jinja2 template. +- Added **role dependency** so `web_app` automatically runs `docker` first. +- Implemented **wipe logic** (safe cleanup) controlled by **both** a variable and a tag. +- Added **GitHub Actions** workflow for linting + automated deployment. + +Repository structure (key parts): + +```text +ansible/ +├── inventory/ +│ ├── group_vars/ +│ └── hosts.ini +├── playbooks/ +│ ├── provision.yml +│ ├── deploy.yml +│ └── site.yml +└── roles/ + ├── common/ + ├── docker/ + └── web_app/ +``` + +## 2). Blocks & Tags + +### 2.1 Tag Strategy + +| Tag | Scope | Purpose | +| ---------------- | -------------------- | ---------------------------------------- | +| `common` | role-level | Run/skip entire `common` role | +| `packages` | block-level | Install OS packages | +| `users` | block-level | User management | +| `docker` | role-level | Run/skip entire `docker` role | +| `docker_install` | block-level | Install Docker + Compose | +| `docker_config` | block-level | Docker post-install config | +| `web_app` | role-level | Run/skip entire `web_app` role | +| `app_deploy` | block-level | Application deployment tasks | +| `compose` | block-level | Docker Compose template + up | +| `web_app_wipe` | tasks include + wipe | Wipe tasks (only with variable gate too) | + +### 2.2 `common` role refactor + +**Where:** `roles/common/tasks/main.yml` + +Implementation highlights: + +- Packages are grouped inside a block tagged `packages`. + + ```bash + zagur@LAPTOP-JONCQBVT:~/projects/ansible$ ansible-playbook playbooks/provision.yml --tags packages + + PLAY [Provision web servers] *************************************************************************** + + TASK [Gathering Facts] ********************************************************************************* + ok: [vm1] + + TASK [common : Common | Update apt cache] ************************************************************** + ok: [vm1] + + TASK [common : Common | Install common packages] ******************************************************* + ok: [vm1] + + TASK [common : Mark common packages done] ************************************************************** + ok: [vm1] + + PLAY RECAP ********************************************************************************************* + vm1 : ok=4 changed=0 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 + ``` + + ![tags packages](/ansible/docs/screenshots/tags_packages.png) + +- User management tasks are grouped inside a block tagged `users`. + + ```bash + zagur@LAPTOP-JONCQBVT:~/projects/ansible$ ansible-playbook playbooks/provision.yml --tags users + + PLAY [Provision web servers] ******************************************************************** + + TASK [Gathering Facts] ************************************************************************** + ok: [vm1] + + TASK [common : Ensure users exist] ************************************************************** + skipping: [vm1] + + TASK [common : Mark common users done] ********************************************************** + ok: [vm1] + + PLAY RECAP ************************************************************************************** + vm1 : ok=2 changed=0 unreachable=0 failed=0 skipped=1 rescued=0 ignored=0 + ``` + + ![tags users](/ansible/docs/screenshots/tags_users.png) + +- `rescue` handles apt cache issues using `apt-get update --fix-missing`. +- `always` writes a small marker/log file unider `/tmp` to confirm clock competition. + +### 2.3 `docker` role refactor + +**Where:** `roles/docker/tasks/main.yml` + +Implementation highlights: + +- Installation is grouped in a block tagged `docker install`. + + ```bash + zagur@LAPTOP-JONCQBVT:~/projects/ansible$ ansible-playbook playbooks/provision.yml --tags docker_ + install + + PLAY [Provision web servers] ******************************************************************** + + TASK [Gathering Facts] ************************************************************************** + ok: [vm1] + + TASK [docker : Install prerequisites] *********************************************************** + ok: [vm1] + + TASK [docker : Ensure /etc/apt/keyrings exists] ************************************************* + ok: [vm1] + + TASK [docker : Add Docker GPG key] ************************************************************** + ok: [vm1] + + TASK [docker : Add Docker repo] ***************************************************************** + ok: [vm1] + + TASK [docker : Install Docker packages] ********************************************************* + ok: [vm1] + + TASK [docker : Ensure Docker service enabled and running] *************************************** + ok: [vm1] + + PLAY RECAP ************************************************************************************** + vm1 : ok=7 changed=0 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 + ``` + + ![docker install](/ansible/docs/screenshots/tags_docker_install.png) + +- Configuration is grouped in a block tagged `docker_config`. + + ```bash + zagur@LAPTOP-JONCQBVT:~/projects/ansible$ ansible-playbook playbooks/provision.yml --tags docker_ + config + + PLAY [Provision web servers] ******************************************************************** + + TASK [Gathering Facts] ************************************************************************** + ok: [vm1] + + TASK [docker : Add user to docker group] ******************************************************** + ok: [vm1] + + TASK [docker : Install python docker SDK for Ansible docker modules] **************************** + ok: [vm1] + + PLAY RECAP ************************************************************************************** + vm1 : ok=3 changed=0 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 + ``` + + ![tags docker config](/ansible/docs/screenshots/tags_docker_config.png) + +- `rescue` retries apt update in case of GPG/network flakiness. + + ![](/ansible/docs/screenshots/rescue.png) + +- `always` ensures Docker service is enabled and running. + + ![docker status](/ansible/docs/screenshots/docker_status_vm.png) + +### 2.4 Commands used for tag testing + +```bash +# Run only docker role +ansible-playbook playbooks/provision.yml --tags "docker" + +# Skip common role +ansible-playbook playbooks/provision.yml --skip-tags "common" + +# Install packages only across all roles +ansible-playbook playbooks/provision.yml --tags "packages" + +# Check mode (dry-run) +ansible-playbook playbooks/provision.yml --tags "docker" --check + +# Run only docker installation tasks +ansible-playbook playbooks/provision.yml --tags "docker_install" + +# List all tags +ansible-playbook playbooks/provision.yml --list-tags +``` + +![tags docker](/ansible/docs/screenshots/tags_docker.png) + +![skip tags common](/ansible/docs/screenshots/skip_tags_common.png) + +![tags packages](/ansible/docs/screenshots/tags_packages.png) + +![tags docker check](/ansible/docs/screenshots/tags_docker_check.png) + +![tags docker install](/ansible/docs/screenshots/tags_docker_install.png) + +![list tags](/ansible/docs/screenshots/list_tags.png) + +Rescue block triggered output: + +![rescue triggered](/ansible/docs/screenshots/rescue.png) + +## 3). Docker Compose Migration + +### 3.1 Role rename and playbook updates + +- Renamed role: `app_deploy` -> `web_app`. +- Updated references in playbooks to use `web_app`. + +### 3.2 Docker Compose template + +![docker outputs](/ansible/docs/screenshots/docker_vm.png) + +**Where:** `roles/web_app/templates/docker-compose.yml.j2`. + +Template supports: + +- dynamic `app_name` +- image `docker image` + tag +- ports (`app_port` <-> `container_port`) +- `app_env` environment map +- restart policy + +### 3.3 Role Dependancy + +**Where:** `roles/web_app/meta/main.yml`. + +`web_app` depends on `docker`, so running only deploy still install Docker first. + +Test: + +```bash +ansible-playbook playbooks/deploy.yml +# docker role runs automatically before web_app +``` + +![](/ansible/docs/screenshots/docker_test_web.png) + +### 3.4 Deployment with docker_compose module + +**Where:** `roles/web_app/tasks/main.yml`. + +Deployment steps: + +1. Create project dir (e.g `/opt/{{ app_name }}`). +2. Render `docker-compose.yml`. +3. Run `docker compose up` via Ansible module. +4. Health-check the app endpoint. + +Required tags applied: + +- `app_deploy` +- `compose` + +### 3.5 Variables & Secrets (Vault) + +Configuration is stored in `inventory/group_vars/main.yml` (and role defaults). Sensitive values are stored using Ansible Vault. + +![](/ansible/docs/screenshots/cat_vars.png) + +### 3.6 Full deployment and idempotency + +```bash +# Full deployment +ansible-playbook playbooks/deploy.yml --vault-password-file .vault_pass + +# Idempotency check: second run should show no changes (or minimal/no-op changes) +ansible-playbook playbooks/deploy.yml --vault-password-file .vault_pass + +# Verify on target VM +curl -fsS "http://127.0.0.1:/health" +``` + +```bash +zagur@LAPTOP-JONCQBVT:~/projects/ansible$ ansible-playbook -i inventory/hosts.ini playbooks/deploy.yml --vault-password-file .vault_pass.sh + +PLAY [Deploy application] *********************************************************************************** + +TASK [Gathering Facts] ************************************************************************************** +ok: [vm1] + +TASK [docker : Install prerequisites] *********************************************************************** +ok: [vm1] + +TASK [docker : Ensure /etc/apt/keyrings exists] ************************************************************* +ok: [vm1] + +TASK [docker : Add Docker GPG key] ************************************************************************** +ok: [vm1] + +TASK [docker : Add Docker repo] ***************************************************************************** +ok: [vm1] + +TASK [docker : Install Docker packages] ********************************************************************* +ok: [vm1] + +TASK [docker : Ensure Docker service enabled and running] *************************************************** +ok: [vm1] + +TASK [docker : Add user to docker group] ******************************************************************** +ok: [vm1] + +TASK [docker : Install python docker SDK for Ansible docker modules] **************************************** +ok: [vm1] + +TASK [web_app : Include wipe tasks] ************************************************************************* +included: /home/zagur/projects/ansible/roles/web_app/tasks/wipe.yml for vm1 + +TASK [web_app : Remove old container by name if exists] ***************************************************** +skipping: [vm1] + +TASK [web_app : Gather running containers info] ************************************************************* +skipping: [vm1] + +TASK [web_app : Compute containers publishing the app port] ************************************************* +skipping: [vm1] + +TASK [web_app : Remove containers publishing the app port] ************************************************** +skipping: [vm1] + +TASK [web_app : Stop and remove compose stack] ************************************************************** +skipping: [vm1] + +TASK [web_app : Remove compose default network if exists] *************************************************** +skipping: [vm1] + +TASK [web_app : Remove docker-compose.yml] ****************************************************************** +skipping: [vm1] + +TASK [web_app : Remove application directory] *************************************************************** +skipping: [vm1] + +TASK [web_app : Log wipe completion] ************************************************************************ +skipping: [vm1] + +TASK [web_app : Login to Docker Hub] ************************************************************************ +changed: [vm1] + +TASK [web_app : Ensure compose project directory exists] **************************************************** +ok: [vm1] + +TASK [web_app : Template docker-compose.yml] **************************************************************** +ok: [vm1] + +TASK [web_app : Deploy via Docker Compose v2] *************************************************************** +changed: [vm1] + +TASK [web_app : Health check] ******************************************************************************* +ok: [vm1] + +TASK [web_app : Log deploy completion marker] *************************************************************** +ok: [vm1] + +RUNNING HANDLER [web_app : Wait for app to start] *********************************************************** +Pausing for 5 seconds +(ctrl+C then 'C' = continue early, ctrl+C then 'A' = abort) +ok: [vm1] + +PLAY RECAP ************************************************************************************************** +vm1 : ok=17 changed=2 unreachable=0 failed=0 skipped=9 rescued=0 ignored=0 +``` +I ran the deployment playbook twice to validate idempotency: + +- **1st run:** the application was deployed successfully. +- **2nd run:** the playbook finished successfully again and the application health check remained **OK**. + +On the second run Ansible still reported a small number of `changed` tasks (**changed=2**). This is **expected** in my implementation and does not indicate configuration drift: + +1) **Docker Hub login task** +- I use `community.docker.docker_login` with `reauthorize: true`, which refreshes the authentication and can be reported as a change on every run. +- This keeps the deployment robust (avoids failures due to expired credentials), but it is not strictly “no-op” in Ansible reporting. + +2) **Docker Compose deployment task** +- I deploy via `community.docker.docker_compose_v2` and use an image tag such as `latest`. +- Docker Compose may perform a pull/check step on each run and Ansible can report it as `changed` even when the service state is effectively the same. + +**Conclusion:** The playbook is idempotent in terms of the final system state: the service remains running, configuration stays consistent, and the health endpoint returns `200`. The observed `changed=2` is an acceptable change caused by authentication refresh and image update checks, not by repeated resource recreation. + +```bash +zagur@LAPTOP-JONCQBVT:~/projects/ansible$ ansible-playbook -i inventory/hosts.ini playbooks/deploy.yml --vault-password-file .vault_pass.sh + +PLAY [Deploy application] *********************************************************************************** + +TASK [Gathering Facts] ************************************************************************************** +ok: [vm1] + +TASK [docker : Install prerequisites] *********************************************************************** +ok: [vm1] + +TASK [docker : Ensure /etc/apt/keyrings exists] ************************************************************* +ok: [vm1] + +TASK [docker : Add Docker GPG key] ************************************************************************** +ok: [vm1] + +TASK [docker : Add Docker repo] ***************************************************************************** +ok: [vm1] + +TASK [docker : Install Docker packages] ********************************************************************* +ok: [vm1] + +TASK [docker : Ensure Docker service enabled and running] *************************************************** +ok: [vm1] + +TASK [docker : Add user to docker group] ******************************************************************** +ok: [vm1] + +TASK [docker : Install python docker SDK for Ansible docker modules] **************************************** +ok: [vm1] + +TASK [web_app : Include wipe tasks] ************************************************************************* +included: /home/zagur/projects/ansible/roles/web_app/tasks/wipe.yml for vm1 + +TASK [web_app : Remove old container by name if exists] ***************************************************** +skipping: [vm1] + +TASK [web_app : Gather running containers info] ************************************************************* +skipping: [vm1] + +TASK [web_app : Compute containers publishing the app port] ************************************************* +skipping: [vm1] + +TASK [web_app : Remove containers publishing the app port] ************************************************** +skipping: [vm1] + +TASK [web_app : Stop and remove compose stack] ************************************************************** +skipping: [vm1] + +TASK [web_app : Remove compose default network if exists] *************************************************** +skipping: [vm1] + +TASK [web_app : Remove docker-compose.yml] ****************************************************************** +skipping: [vm1] + +TASK [web_app : Remove application directory] *************************************************************** +skipping: [vm1] + +TASK [web_app : Log wipe completion] ************************************************************************ +skipping: [vm1] + +TASK [web_app : Login to Docker Hub] ************************************************************************ +changed: [vm1] + +TASK [web_app : Ensure compose project directory exists] **************************************************** +ok: [vm1] + +TASK [web_app : Template docker-compose.yml] **************************************************************** +ok: [vm1] + +TASK [web_app : Deploy via Docker Compose v2] *************************************************************** +changed: [vm1] + +TASK [web_app : Health check] ******************************************************************************* +ok: [vm1] + +TASK [web_app : Log deploy completion marker] *************************************************************** +ok: [vm1] + +RUNNING HANDLER [web_app : Wait for app to start] *********************************************************** +Pausing for 5 seconds +(ctrl+C then 'C' = continue early, ctrl+C then 'A' = abort) +ok: [vm1] + +PLAY RECAP ************************************************************************************************** +vm1 : ok=17 changed=2 unreachable=0 failed=0 skipped=9 rescued=0 ignored=0 +``` + +![curl on vm](/ansible/docs/screenshots/curl_on_vm.png) + +## 4). Wipe Logic Implementation + +### 4.1 Requirements satisfied + +Wipe logic is: + +- controlled by vriable: `web_app_wipe: true` +- gated by tag: `web_app_wipe` +- does not use the special `never` tag +- default behavior: wipe does not run unless explicitly requested + +### 4.2 How it works + +- Wipe tasks are included at the top of `web_app` execution. +- Wipe removes the application stack (Compose down/absent) and related files/dirs. +- Deployment continues normally after wipe (for “clean install”), unless running wipe-only. + +### 4.3 Commands used + +```bash +# Wipe only (remove application completely) +ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" --tags web_app_wipe + +# Clean install: wipe first, then deploy +ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" + +# Normal deploy: wipe tasks skipped (default) +ansible-playbook playbooks/deploy.yml +``` + +```bash +zagur@LAPTOP-JONCQBVT:~/projects/ansible$ ansible-playbook -i inventory/hosts.ini playbooks/deploy.yml -e "web_app_wipe=true" --tags web_app_wipe --vault-password-file .vault_pass.sh + +PLAY [Deploy application] ****************************************************************************** + +TASK [Gathering Facts] ********************************************************************************* +ok: [vm1] + +TASK [../roles/web_app : Include wipe tasks] *********************************************************** +included: /home/zagur/projects/ansible/roles/web_app/tasks/wipe.yml for vm1 + +TASK [../roles/web_app : Remove old container by name if exists] *************************************** +changed: [vm1] + +TASK [../roles/web_app : Find containers publishing app_port] ****************************************** +ok: [vm1] + +TASK [../roles/web_app : Remove containers publishing app_port (wipe)] ********************************* +skipping: [vm1] + +TASK [../roles/web_app : Remove old container by name if exists] *************************************** +ok: [vm1] + +TASK [../roles/web_app : Stop and remove compose stack] ************************************************ +changed: [vm1] + +TASK [../roles/web_app : Remove compose default network if exists] ************************************* +ok: [vm1] + +TASK [../roles/web_app : Remove docker-compose.yml] **************************************************** +changed: [vm1] + +TASK [../roles/web_app : Remove application directory] ************************************************* +changed: [vm1] + +TASK [../roles/web_app : Log wipe completion] ********************************************************** +ok: [vm1] => { + "msg": "Application devops-info-service wiped successfully" +} + +PLAY RECAP ********************************************************************************************* +vm1 : ok=10 changed=4 unreachable=0 failed=0 skipped=1 rescued=0 ignored=0 +``` + +```bash +zagur@LAPTOP-JONCQBVT:~/projects/ansible$ ansible-playbook -i inventory/hosts.ini playbooks/deploy.yml -e "web_app_wipe=true" --vault-password-file .vault_pass.sh + +PLAY [Deploy application] ****************************************************************************** + +TASK [Gathering Facts] ********************************************************************************* +ok: [vm1] + +TASK [docker : Docker | Install prerequisites] ********************************************************* +ok: [vm1] + +TASK [docker : Docker | Ensure /etc/apt/keyrings exists] *********************************************** +ok: [vm1] + +TASK [docker : Docker | Add Docker GPG key] ************************************************************ +ok: [vm1] + +TASK [docker : Docker | Add Docker apt repository] ***************************************************** +ok: [vm1] + +TASK [docker : Docker | Install Docker packages] ******************************************************* +ok: [vm1] + +TASK [docker : Docker | Ensure Docker service enabled+running (always)] ******************************** +ok: [vm1] + +TASK [docker : Docker | Add user to docker group] ****************************************************** +ok: [vm1] + +TASK [docker : Docker | Install python docker SDK for Ansible modules] ********************************* +ok: [vm1] + +TASK [../roles/web_app : Include wipe tasks] *********************************************************** +included: /home/zagur/projects/ansible/roles/web_app/tasks/wipe.yml for vm1 + +TASK [../roles/web_app : Remove old container by name if exists] *************************************** +ok: [vm1] + +TASK [../roles/web_app : Find containers publishing app_port] ****************************************** +ok: [vm1] + +TASK [../roles/web_app : Remove containers publishing app_port (wipe)] ********************************* +skipping: [vm1] + +TASK [../roles/web_app : Remove old container by name if exists] *************************************** +ok: [vm1] + +TASK [../roles/web_app : Stop and remove compose stack] ************************************************ +fatal: [vm1]: FAILED! => {"changed": false, "msg": "\"/opt/devops-info-service\" is not a directory"} +...ignoring + +TASK [../roles/web_app : Remove compose default network if exists] ************************************* +ok: [vm1] + +TASK [../roles/web_app : Remove docker-compose.yml] **************************************************** +ok: [vm1] + +TASK [../roles/web_app : Remove application directory] ************************************************* +ok: [vm1] + +TASK [../roles/web_app : Log wipe completion] ********************************************************** +ok: [vm1] => { + "msg": "Application devops-info-service wiped successfully" +} + +TASK [../roles/web_app : Login to Docker Hub] ********************************************************** +changed: [vm1] + +TASK [../roles/web_app : Ensure compose project directory exists] ************************************** +changed: [vm1] + +TASK [../roles/web_app : Template docker-compose.yml] ************************************************** +changed: [vm1] + +TASK [../roles/web_app : Deploy via Docker Compose v2] ************************************************* +changed: [vm1] + +TASK [../roles/web_app : Give app time to start] ******************************************************* +Pausing for 5 seconds +(ctrl+C then 'C' = continue early, ctrl+C then 'A' = abort) +ok: [vm1] + +TASK [../roles/web_app : Health check] ***************************************************************** +ok: [vm1] + +TASK [../roles/web_app : Log deploy completion marker] ************************************************* +ok: [vm1] + +PLAY RECAP ********************************************************************************************* +vm1 : ok=25 changed=4 unreachable=0 failed=0 skipped=1 rescued=0 +ignored=1 +``` + +```bash +zagur@LAPTOP-JONCQBVT:~/projects/ansible$ ansible-playbook -i inventory/hosts.ini playbooks/deploy.yml - +-tags web_app_wipe --vault-password-file .vault_pass.sh + +PLAY [Deploy application] ****************************************************************************** + +TASK [Gathering Facts] ********************************************************************************* +ok: [vm1] + +TASK [../roles/web_app : Include wipe tasks] *********************************************************** +included: /home/zagur/projects/ansible/roles/web_app/tasks/wipe.yml for vm1 + +TASK [../roles/web_app : Remove old container by name if exists] *************************************** +skipping: [vm1] + +TASK [../roles/web_app : Find containers publishing app_port] ****************************************** +skipping: [vm1] + +TASK [../roles/web_app : Remove containers publishing app_port (wipe)] ********************************* +skipping: [vm1] + +TASK [../roles/web_app : Remove old container by name if exists] *************************************** +skipping: [vm1] + +TASK [../roles/web_app : Stop and remove compose stack] ************************************************ +skipping: [vm1] + +TASK [../roles/web_app : Remove compose default network if exists] ************************************* +skipping: [vm1] + +TASK [../roles/web_app : Remove docker-compose.yml] **************************************************** +skipping: [vm1] + +TASK [../roles/web_app : Remove application directory] ************************************************* +skipping: [vm1] + +TASK [../roles/web_app : Log wipe completion] ********************************************************** +skipping: [vm1] + +PLAY RECAP ********************************************************************************************* +vm1 : ok=2 changed=0 unreachable=0 failed=0 skipped=9 rescued=0 ignored=0 +``` + +```bash +zagur@LAPTOP-JONCQBVT:~/projects/ansible$ ansible-playbook -i inventory/hosts.ini playbooks/deploy.yml --vault-pa +ssword-file .vault_pass.sh + +PLAY [Deploy application] *************************************************************************************** + +TASK [Gathering Facts] ****************************************************************************************** +ok: [vm1] + +TASK [docker : Install prerequisites] *************************************************************************** +ok: [vm1] + +TASK [docker : Ensure /etc/apt/keyrings exists] ***************************************************************** +ok: [vm1] + +TASK [docker : Add Docker GPG key] ****************************************************************************** +ok: [vm1] + +TASK [docker : Add Docker repo] ********************************************************************************* +ok: [vm1] + +TASK [docker : Install Docker packages] ************************************************************************* +ok: [vm1] + +TASK [docker : Ensure Docker service enabled and running] ******************************************************* +ok: [vm1] + +TASK [docker : Add user to docker group] ************************************************************************ +ok: [vm1] + +TASK [docker : Install python docker SDK for Ansible docker modules] ******************************************** +ok: [vm1] + +TASK [web_app : Include wipe tasks] ***************************************************************************** +included: /home/zagur/projects/ansible/roles/web_app/tasks/wipe.yml for vm1 + +TASK [web_app : Remove old container by name if exists] ********************************************************* +skipping: [vm1] + +TASK [web_app : Gather running containers info] ***************************************************************** +skipping: [vm1] + +TASK [web_app : Compute containers publishing the app port] ***************************************************** +skipping: [vm1] + +TASK [web_app : Remove containers publishing the app port] ****************************************************** +skipping: [vm1] + +TASK [web_app : Stop and remove compose stack] ****************************************************************** +skipping: [vm1] + +TASK [web_app : Remove compose default network if exists] ******************************************************* +skipping: [vm1] + +TASK [web_app : Remove docker-compose.yml] ********************************************************************** +skipping: [vm1] + +TASK [web_app : Remove application directory] ******************************************************************* +skipping: [vm1] + +TASK [web_app : Log wipe completion] **************************************************************************** +skipping: [vm1] + +TASK [web_app : Login to Docker Hub] **************************************************************************** +changed: [vm1] + +TASK [web_app : Ensure compose project directory exists] ******************************************************** +ok: [vm1] + +TASK [web_app : Template docker-compose.yml] ******************************************************************** +ok: [vm1] + +TASK [web_app : Deploy via Docker Compose v2] ******************************************************************* +changed: [vm1] + +TASK [web_app : Health check] *********************************************************************************** +ok: [vm1] + +TASK [web_app : Log deploy completion marker] ******************************************************************* +ok: [vm1] + +RUNNING HANDLER [web_app : Wait for app to start] *************************************************************** +Pausing for 5 seconds +(ctrl+C then 'C' = continue early, ctrl+C then 'A' = abort) +ok: [vm1] + +PLAY RECAP ****************************************************************************************************** +vm1 : ok=17 changed=2 unreachable=0 failed=0 skipped=9 rescued=0 ignored=0 +``` + +## 5). CI/CD Integration (GitHub Actions) + +### 5.1 Workflow summary + +CI/CD pipeline: + +```code +Push/PR → ansible-lint → run ansible-playbook (deploy) → verify health endpoint +``` + +### 5.2 Setup steps + +1. Add workflow file: `.github/workflows/ansible-deploy.yml` +2. Configure repository secrets: + - `ANSIBLE_VAULT_PASSWORD` + - `SSH_PRIVATE_KEY` + - `VM_HOST` + - `VM_USER` +3. Push to `lab06` (or `main` / `master`) to trigger CI. + +![lint](/ansible/docs/screenshots/lint_github.png) + +```bash +Run . .venv/bin/activate + +PLAY [Deploy application] ****************************************************** +TASK [Gathering Facts] ********************************************************* +Warning: : Host 'vm1' is using the discovered Python interpreter at '/usr/bin/python3.12', but future installation of another Python interpreter could cause a different interpreter to be discovered. See https://docs.ansible.com/ansible-core/2.20/reference_appendices/interpreter_discovery.html for more information. +ok: [vm1] +TASK [docker : Install prerequisites] ****************************************** +ok: [vm1] +TASK [docker : Ensure /etc/apt/keyrings exists] ******************************** +ok: [vm1] +TASK [docker : Add Docker GPG key] ********************************************* +ok: [vm1] +TASK [docker : Add Docker repo] ************************************************ +Warning: : Deprecation warnings can be disabled by setting `deprecation_warnings=False` in ansible.cfg. +[DEPRECATION WARNING]: INJECT_FACTS_AS_VARS default to `True` is deprecated, top-level facts will not be auto injected after the change. This feature will be removed from ansible-core version 2.24. +Origin: /home/zagur/actions-runner/_work/DevOps-Core-Course/DevOps-Core-Course/ansible/roles/docker/tasks/main.yml:29:15 +27 - name: Add Docker repo +28 ansible.builtin.apt_repository: +29 repo: "deb [arch=amd64 signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu {{... + ^ column 15 +Use `ansible_facts["fact_name"]` (no `ansible_` prefix) instead. +ok: [vm1] +TASK [docker : Install Docker packages] **************************************** +ok: [vm1] +TASK [docker : Ensure Docker service enabled and running] ********************** +ok: [vm1] +TASK [docker : Add user to docker group] *************************************** +ok: [vm1] +TASK [docker : Install python docker SDK for Ansible docker modules] *********** +ok: [vm1] +TASK [web_app : Include wipe tasks] ******************************************** +included: /home/zagur/actions-runner/_work/DevOps-Core-Course/DevOps-Core-Course/ansible/roles/web_app/tasks/wipe.yml for vm1 +TASK [web_app : Remove old container by name if exists] ************************ +skipping: [vm1] +TASK [web_app : Gather running containers info] ******************************** +skipping: [vm1] +TASK [web_app : Compute containers publishing the app port] ******************** +skipping: [vm1] +TASK [web_app : Remove containers publishing the app port] ********************* +skipping: [vm1] +TASK [web_app : Stop and remove compose stack] ********************************* +skipping: [vm1] +TASK [web_app : Remove compose default network if exists] ********************** +skipping: [vm1] +TASK [web_app : Remove docker-compose.yml] ************************************* +skipping: [vm1] +TASK [web_app : Remove application directory] ********************************** +skipping: [vm1] +TASK [web_app : Log wipe completion] ******************************************* +skipping: [vm1] +TASK [web_app : Login to Docker Hub] ******************************************* +changed: [vm1] +TASK [web_app : Ensure compose project directory exists] *********************** +ok: [vm1] +TASK [web_app : Template docker-compose.yml] *********************************** +ok: [vm1] +TASK [web_app : Deploy via Docker Compose v2] ********************************** +ok: [vm1] +TASK [web_app : Health check] ************************************************** +ok: [vm1] +TASK [web_app : Log deploy completion marker] ********************************** +[DEPRECATION WARNING]: INJECT_FACTS_AS_VARS default to `True` is deprecated, top-level facts will not be auto injected after the change. This feature will be removed from ansible-core version 2.24. +Origin: /home/zagur/actions-runner/_work/DevOps-Core-Course/DevOps-Core-Course/ansible/roles/web_app/tasks/main.yml:76:18 +74 ansible.builtin.copy: +75 dest: "/tmp/web_app_deploy_done" +76 content: "web_app deploy completed on {{ ansible_date_time.iso8601 }}\n" + ^ column 18 +Use `ansible_facts["fact_name"]` (no `ansible_` prefix) instead. +ok: [vm1] +PLAY RECAP ********************************************************************* +vm1 : ok=16 changed=1 unreachable=0 failed=0 skipped=9 rescued=0 ignored=0 +``` + +## 6). Testing results + +### 6.1 Tags & selective execution + +I verified that tags allow running only specific parts of the automation: + +- `--tags packages` executes only the package installation block in `common` +- `--tags users` executes only the user management block in `common` +- `--tags docker_install` executes only Docker installation tasks +- `--tags docker_config` executes only Docker post-install configuration +- `--list-tags` shows all available tags for the project + +Evidence: terminal outputs and screenshots are attached in `docs/screenshots/` for each command. + +### 6.2 Docker Compose deployment + +I confirmed that the application is deployed via Docker Compose (v2 module): + +- `docker-compose.yml` is rendered to `/opt/{{ app_name }}/docker-compose.yml` +- the compose stack is started with `community.docker.docker_compose_v2` +- health endpoint `/health` returns HTTP 200 + +Evidence attached: +- successful `ansible-playbook deploy.yml` output (previous sections) +- curl output `/health` from inside the VM + +![](/ansible/docs/screenshots/curl_on_vm.png) + +### 6.3 Idempotency check + +I ran the deploy playbook twice: + +- Run #1: successful deployment +- Run #2: the final system state is unchanged (service stays running and healthy) + +On the second run Ansible still reported `changed=2`. This is expected in my implementation: +- Docker Hub login refresh (`reauthorize: true`) can be reported as `changed` +- Docker Compose may re-check/pull images (e.g., tag `latest`) and be reported as `changed` + +Despite these small changes, the deployment is idempotent in terms of **resulting state**: +the service remains running and passes the health check. + +### 6.4 Wipe logic test scenarios + +I validated all wipe scenarios required by the lab: + +1) Normal deploy: wipe tasks are skipped by default +2) Wipe-only: `-e web_app_wipe=true --tags web_app_wipe` removes the stack and files +3) Clean reinstall: `-e web_app_wipe=true` performs wipe first, then deploys again +4) Safety check: `--tags web_app_wipe` with default variable `false` does not wipe anything + +Evidence: terminal outputs for each scenario are included in this report. + +## 7). Challenges & Solutions + +### 1. Docker container name conflict +**Challenge:** +Deployment failed with `Conflict. The container name "/devops-info-service" is already in use`, because an old container with the same name already existed on the VM (leftovers from a previous deployment not managed by the current Compose project). + +**Solution:** +Implemented a **wipe** procedure (double-gated by `web_app_wipe=true` and the `web_app_wipe` tag) that removes leftovers before redeploy: +- force-removes the legacy container by name (`docker_container: state=absent`) +- removes the Compose stack (`docker_compose_v2: state=absent, remove_orphans=true`) +- deletes the project directory and `docker-compose.yml` from the VM + +This ensured the Compose deployment could create resources without naming conflicts. + +### 2. CI failed because `.vault_pass` was missing on GitHub Actions +**Challenge:** `ansible-lint` triggers `ansible-playbook --syntax-check`, and Ansible tried to read `vault_password_file = .vault_pass` from `ansible.cfg`. On GitHub runners this file does not exist, so syntax-check failed before linting even started. + +**Solution:** Provide the vault password file dynamically in the workflow and point Ansible to it via `ANSIBLE_VAULT_PASSWORD_FILE` (or remove `vault_password_file` from `ansible.cfg` and pass `--vault-password-file` explicitly in CI). + +### 3. `ignore_errors` is discouraged and flagged by lint +**Challenge:** Wipe tasks used `ignore_errors: true` in multiple places. ansible-lint flags this as unpredictable because it hides failures and makes runs harder to debug. + +**Solution:** Replace `ignore_errors` with explicit error handling: +- Use `failed_when: false` for "best-effort cleanup" tasks. +- Optionally keep `changed_when: false` to avoid noisy diffs. +This keeps playbooks reliable and lint-compliant. + +## 8). Research Answers + +### Blocks & Tags + +**Q: What happens if the rescue block also fails?** +If a task inside `rescue:` fails, the whole block fails and the playbook stops (unless errors are ignored). `always:` still runs (because it executes regardless of success/failure). In production, rescue should be conservative (log context, retry safely, and fail with a clear message if recovery is impossible). + +**Q: Can you have nested blocks?** +Yes. Blocks can be nested to structure complex flows (e.g., “install → configure → verify”), each with its own rescue/always. This can improve readability, but too much nesting can make playbooks hard to follow. + +**Q: How do tags inherit to tasks within blocks?** +Tags applied at the block level apply to all tasks inside the block. Tasks may also define their own tags. When running with `--tags X`, tasks run if they match tag `X` either directly or via inheritance. + +### Docker Compose + +**Q: What’s the difference between `restart: always` and `restart: unless-stopped`?** +- `always`: the container always restarts after failure and also after daemon restarts, even if the user manually stopped it (it will start again on daemon restart). +- `unless-stopped`: it restarts on failure/daemon restart, but if the container was manually stopped, it will not be restarted automatically. + +**Q: How do Docker Compose networks differ from Docker bridge networks?** +Compose creates project-scoped networks automatically and connects services by service name (internal DNS). A “bridge network” is a Docker network driver type; Compose typically creates a bridge network for the project, but it is isolated and named per project. Compose makes multi-container networking reproducible and consistent. + +**Q: Can you reference Ansible Vault variables in the template?** +Yes. Vault-encrypted variables are decrypted at runtime (with the vault password) and can be used like normal Ansible variables in Jinja2 templates, including docker-compose.yml.j2. + +### Wipe logic + +**Q: Why use both variable AND tag (double gating)?** +This provides two independent safety checks: +- the variable indicates intent (“I really want to wipe”) +- the tag indicates scope (“run only wipe logic now”) +Accidental wipe becomes much less likely. + +**Q: What’s the difference between using `never` tag and this approach?** +`never` prevents tasks from running unless explicitly tagged, but it’s a special tag with a particular behavior. The lab explicitly requests NOT using it. Double gating (var + tag) achieves safety without relying on `never`. + +**Q: Why must wipe logic come BEFORE deployment in main.yml?** +Because the “clean reinstall” scenario requires deterministic order: remove old deployment first, then deploy fresh. If wipe was after deployment, it could delete the newly deployed stack. + +**Q: When would you want clean reinstall vs rolling update?** +- Clean reinstall: corrupted state, major reconfiguration, troubleshooting, or decommissioning. +- Rolling update: normal upgrades where downtime should be minimized, and state should be preserved. + +**Q: How would you extend wipe to remove images/volumes too?** +You could add optional tasks guarded by another variable (e.g., `web_app_wipe_images=true`) to: +- remove specific images (`community.docker.docker_image state: absent`) +- remove volumes (`docker volume rm ...` or compose down with volume removal) +This should be carefully gated because it is destructive and affects disk usage/state. + +### CI/CD (GitHub Actions) + +**Q: What are the security implications of storing SSH keys in GitHub Secrets?** +Secrets are encrypted and not visible in logs by default, but any workflow with write access could potentially misuse them. Best practices: +- use least-privileged SSH keys (restricted user, limited commands if possible) +- rotate keys periodically +- restrict who can modify workflows/merge to main +- prefer self-hosted runner + private network when possible + +**Q: How would you implement a staging → production pipeline?** +Use two environments (staging/prod) with separate inventories and secrets, and add manual approval for production: +- on push → deploy to staging automatically +- on release tag / manual approval → deploy to production +GitHub Environments can enforce approvals and secret separation. + +**Q: What would you add to make rollbacks possible?** +Pin images to immutable tags (version/SHA) instead of `latest`. Keep previous versions available, and allow redeploying with an older tag. Optionally store deployment metadata and provide a “rollback” workflow that redeploys last known good version. + +**Q: How does a self-hosted runner improve security compared to GitHub-hosted?** +A self-hosted runner can live inside the same private network as the target VM, avoiding exposing SSH to the internet and reducing key distribution. It also allows tighter control over the execution environment, but it requires maintaining runner security and updates. \ No newline at end of file diff --git a/ansible/docs/screenshots/cat_vars.png b/ansible/docs/screenshots/cat_vars.png new file mode 100644 index 0000000000..b2ddf14fb0 Binary files /dev/null and b/ansible/docs/screenshots/cat_vars.png differ diff --git a/ansible/docs/screenshots/curl_on_vm.png b/ansible/docs/screenshots/curl_on_vm.png new file mode 100644 index 0000000000..79eaca0f9c Binary files /dev/null and b/ansible/docs/screenshots/curl_on_vm.png differ diff --git a/ansible/docs/screenshots/docker_status_vm.png b/ansible/docs/screenshots/docker_status_vm.png new file mode 100644 index 0000000000..01ca8ab226 Binary files /dev/null and b/ansible/docs/screenshots/docker_status_vm.png differ diff --git a/ansible/docs/screenshots/docker_test_web.png b/ansible/docs/screenshots/docker_test_web.png new file mode 100644 index 0000000000..5d3518b193 Binary files /dev/null and b/ansible/docs/screenshots/docker_test_web.png differ diff --git a/ansible/docs/screenshots/docker_vm.png b/ansible/docs/screenshots/docker_vm.png new file mode 100644 index 0000000000..e81979645e Binary files /dev/null and b/ansible/docs/screenshots/docker_vm.png differ diff --git a/ansible/docs/screenshots/lint_github.png b/ansible/docs/screenshots/lint_github.png new file mode 100644 index 0000000000..41ca4e3bbd Binary files /dev/null and b/ansible/docs/screenshots/lint_github.png differ diff --git a/ansible/docs/screenshots/list_tags.png b/ansible/docs/screenshots/list_tags.png new file mode 100644 index 0000000000..dc6e5e5ae2 Binary files /dev/null and b/ansible/docs/screenshots/list_tags.png differ diff --git a/ansible/docs/screenshots/rescue.png b/ansible/docs/screenshots/rescue.png new file mode 100644 index 0000000000..b1b4b6991e Binary files /dev/null and b/ansible/docs/screenshots/rescue.png differ diff --git a/ansible/docs/screenshots/skip_tags_common.png b/ansible/docs/screenshots/skip_tags_common.png new file mode 100644 index 0000000000..4618f2d64a Binary files /dev/null and b/ansible/docs/screenshots/skip_tags_common.png differ diff --git a/ansible/docs/screenshots/tags_docker.png b/ansible/docs/screenshots/tags_docker.png new file mode 100644 index 0000000000..c665730603 Binary files /dev/null and b/ansible/docs/screenshots/tags_docker.png differ diff --git a/ansible/docs/screenshots/tags_docker_check.png b/ansible/docs/screenshots/tags_docker_check.png new file mode 100644 index 0000000000..5e46be0121 Binary files /dev/null and b/ansible/docs/screenshots/tags_docker_check.png differ diff --git a/ansible/docs/screenshots/tags_docker_config.png b/ansible/docs/screenshots/tags_docker_config.png new file mode 100644 index 0000000000..d8ae1368c4 Binary files /dev/null and b/ansible/docs/screenshots/tags_docker_config.png differ diff --git a/ansible/docs/screenshots/tags_docker_install.png b/ansible/docs/screenshots/tags_docker_install.png new file mode 100644 index 0000000000..244c629f05 Binary files /dev/null and b/ansible/docs/screenshots/tags_docker_install.png differ diff --git a/ansible/docs/screenshots/tags_packages.png b/ansible/docs/screenshots/tags_packages.png new file mode 100644 index 0000000000..a54770e944 Binary files /dev/null and b/ansible/docs/screenshots/tags_packages.png differ diff --git a/ansible/docs/screenshots/tags_users.png b/ansible/docs/screenshots/tags_users.png new file mode 100644 index 0000000000..2fbd27e292 Binary files /dev/null and b/ansible/docs/screenshots/tags_users.png differ diff --git a/ansible/inventory/group_vars/all/main.yml b/ansible/inventory/group_vars/all/main.yml new file mode 100644 index 0000000000..dea63a270f --- /dev/null +++ b/ansible/inventory/group_vars/all/main.yml @@ -0,0 +1,19 @@ +$ANSIBLE_VAULT;1.1;AES256 +39346665336332616661636239656232323763303165653531633466396135373332363038653731 +6436393430666665393666313737376163336361333234610a383765323162656532613134323165 +36396439303733313562623132386332336231346533633830393165396532346166643632633930 +3261363865383238620a326635343166333139323230373137653466386538656532386131366431 +63386239393530366538613439306335666161663136333161343365663664633735323166303364 +62616436623238646664336139363662396538616566306236316566616566303832636463316230 +37396633386233626136396630306139356635326266333139653264386436633662346139643938 +33366135353862306565363437666139323063363033386332666166306461383963636361373262 +39663134376638663833396164393365326336326131623366653430376132613366323963663665 +62373362663334373735303331313863373033383539393966336566343661313135373164663036 +37343862376432316666623833643639613430356662626435613365323262343033303438653630 +39663933373264353135383636653662623761656330396430616639393639623434623763306633 +66663030323435616537616437643434313964366264356432333564646666616264323963363038 +30626536353036386465343131613038393461383132386538386534383233616136616435326339 +36653032323730316237306431313366656530633464356163333030386461666164626166313064 +36633965303730626163303730376535303636376133343336363336623834366130643439346431 +36653766383865633365343235643563663061613737313433386336326135653563393030363462 +6634323939613939333030313530313264623033326430653439 diff --git a/ansible/inventory/hosts.ini b/ansible/inventory/hosts.ini new file mode 100644 index 0000000000..2c23cf7655 --- /dev/null +++ b/ansible/inventory/hosts.ini @@ -0,0 +1,2 @@ +[webservers] +vm1 ansible_host=172.21.112.1 ansible_port=2222 ansible_user=liza ansible_ssh_private_key_file=/home/zagur/.ssh/id_ed25519 \ No newline at end of file diff --git a/ansible/playbooks/deploy.yml b/ansible/playbooks/deploy.yml new file mode 100644 index 0000000000..4f62cf5f6d --- /dev/null +++ b/ansible/playbooks/deploy.yml @@ -0,0 +1,6 @@ +--- +- name: Deploy application + hosts: webservers + become: true + roles: + - role: web_app diff --git a/ansible/playbooks/provision.yml b/ansible/playbooks/provision.yml new file mode 100644 index 0000000000..7cc2e6678d --- /dev/null +++ b/ansible/playbooks/provision.yml @@ -0,0 +1,8 @@ +--- +- name: Provision web servers + hosts: webservers + become: true + + roles: + - common + - docker diff --git a/ansible/playbooks/site.yml b/ansible/playbooks/site.yml new file mode 100644 index 0000000000..37a716fe9b --- /dev/null +++ b/ansible/playbooks/site.yml @@ -0,0 +1,13 @@ +--- +- name: Provision servers + hosts: all + become: true + roles: + - common + - docker + +- name: Deploy web application + hosts: webservers + become: true + roles: + - web_app diff --git a/ansible/requirements-ci.txt b/ansible/requirements-ci.txt new file mode 100644 index 0000000000..25d2bc2768 --- /dev/null +++ b/ansible/requirements-ci.txt @@ -0,0 +1,2 @@ +ansible==13.4.0 +ansible-lint==26.2.0 \ No newline at end of file diff --git a/ansible/roles/common/defaults/main.yml b/ansible/roles/common/defaults/main.yml new file mode 100644 index 0000000000..8f562681d4 --- /dev/null +++ b/ansible/roles/common/defaults/main.yml @@ -0,0 +1,10 @@ +--- +common_packages: + - curl + - git + - vim + - htop + - python3-pip + +common_user: liza +common_timezone: "Europe/Moscow" diff --git a/ansible/roles/common/tasks/main.yml b/ansible/roles/common/tasks/main.yml new file mode 100644 index 0000000000..816e6967cd --- /dev/null +++ b/ansible/roles/common/tasks/main.yml @@ -0,0 +1,70 @@ +--- +# roles/common/tasks/main.yml +# Purpose: +# Baseline OS setup: packages + users. +# +# Tags: +# common - all common role tasks +# packages - apt packages tasks +# users - user management tasks +# +# Notes: +# - Blocks include rescue/always to satisfy LAB06 requirements. +# - Marker files in /tmp are used as lab evidence and reduce ambiguity. + +- name: Common | Packages + become: true + tags: [common, packages] + block: + - name: Common | Update apt cache + # cache_valid_time avoids updating too often (idempotency-friendly) + ansible.builtin.apt: + update_cache: true + cache_valid_time: 3600 + + - name: Common | Install common packages + ansible.builtin.apt: + name: "{{ common_packages }}" + state: present + + rescue: + - name: Common | Retry apt cache update + # Simple retry in case of transient networking/repo issues + ansible.builtin.apt: + update_cache: true + + - name: Common | Retry install common packages + ansible.builtin.apt: + name: "{{ common_packages }}" + state: present + + always: + - name: Mark common packages done + # LAB evidence marker; keep changed_when=false to reduce noise + ansible.builtin.copy: + dest: /tmp/common_packages_done + content: "common packages finished at {{ ansible_date_time.iso8601 }}\n" + mode: "0644" + changed_when: false + +- name: Common | Users + become: true + tags: [common, users] + block: + - name: Ensure users exist + # common_users is expected to be a list of dicts: + # - { name: "user1", groups: "sudo", shell: "/bin/bash" } + ansible.builtin.user: + name: "{{ item.name }}" + groups: "{{ item.groups | default(omit) }}" + shell: "{{ item.shell | default('/bin/bash') }}" + state: present + loop: "{{ common_users | default([]) }}" + + always: + - name: Mark common users done + ansible.builtin.copy: + dest: /tmp/common_users_done + content: "common users finished at {{ ansible_date_time.iso8601 }}\n" + mode: "0644" + changed_when: false diff --git a/ansible/roles/docker/defaults/main.yml b/ansible/roles/docker/defaults/main.yml new file mode 100644 index 0000000000..445d4f2cde --- /dev/null +++ b/ansible/roles/docker/defaults/main.yml @@ -0,0 +1,8 @@ +--- +docker_user: "{{ ansible_user | default('ubuntu') }}" +docker_packages: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-buildx-plugin + - docker-compose-plugin diff --git a/ansible/roles/docker/handlers/main.yml b/ansible/roles/docker/handlers/main.yml new file mode 100644 index 0000000000..501c89b42f --- /dev/null +++ b/ansible/roles/docker/handlers/main.yml @@ -0,0 +1,9 @@ +--- +# roles/docker/handlers/main.yml +# Purpose: +# Handlers for docker role. Triggered by notify when docker config changes. + +- name: Restart docker + ansible.builtin.service: + name: docker + state: restarted diff --git a/ansible/roles/docker/tasks/main.yml b/ansible/roles/docker/tasks/main.yml new file mode 100644 index 0000000000..36742659b3 --- /dev/null +++ b/ansible/roles/docker/tasks/main.yml @@ -0,0 +1,101 @@ +--- +# roles/docker/tasks/main.yml +# Purpose: +# Install and configure Docker Engine + Docker Compose plugin on Ubuntu. +# +# Tags: +# docker - all docker tasks +# docker_install - repository + packages + service ensure +# docker_config - user group + python docker SDK +# +# Notes: +# - Uses docker-compose-plugin (Compose v2) required by community.docker.docker_compose_v2 + +- name: Docker | Install + become: true + tags: [docker, docker_install] + block: + + - name: Install prerequisites + # Packages needed to add Docker apt repo + GPG key + ansible.builtin.apt: + name: + - ca-certificates + - curl + - gnupg + state: present + update_cache: true + + - name: Ensure /etc/apt/keyrings exists + # Modern Ubuntu practice: store third-party keys in /etc/apt/keyrings + ansible.builtin.file: + path: /etc/apt/keyrings + state: directory + mode: "0755" + + - name: Add Docker GPG key + ansible.builtin.get_url: + url: https://download.docker.com/linux/ubuntu/gpg + dest: /etc/apt/keyrings/docker.asc + mode: "0644" + + - name: Add Docker repo + ansible.builtin.apt_repository: + repo: "deb [arch=amd64 signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable" + state: present + + - name: Install Docker packages + # docker-compose-plugin provides "docker compose" (Compose v2) + ansible.builtin.apt: + name: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-buildx-plugin + - docker-compose-plugin + state: present + update_cache: true + + rescue: + - name: RESCUE | Triggered after failure inside Docker install block + # For lab: show that rescue exists and executes when a task fails + ansible.builtin.debug: + msg: "Rescue block executed (rescue_demo={{ rescue_demo | default(false) }})" + tags: [docker, docker_install] + + - name: Wait and retry apt update (network/gpg flake) + # Typical transient failures: temporary network issues, repo propagation, etc. + ansible.builtin.pause: + seconds: 10 + tags: [docker, docker_install] + + - name: Retry apt update + ansible.builtin.apt: + update_cache: true + tags: [docker, docker_install] + + always: + - name: Ensure Docker service enabled and running + # ALWAYS block: we want docker started if install succeeded partially. + ansible.builtin.service: + name: docker + state: started + enabled: true + tags: [docker, docker_install] + +- name: Docker | Config + become: true + tags: [docker, docker_config] + block: + - name: Add user to docker group + # Allows running docker without sudo for this user + ansible.builtin.user: + name: "{{ docker_user }}" + groups: docker + append: true + + - name: Install python docker SDK for Ansible docker modules + # Needed for community.docker modules on the target in some setups + ansible.builtin.apt: + name: python3-docker + state: present diff --git a/ansible/roles/web_app/defaults/main.yml b/ansible/roles/web_app/defaults/main.yml new file mode 100644 index 0000000000..945d8a3ab7 --- /dev/null +++ b/ansible/roles/web_app/defaults/main.yml @@ -0,0 +1,42 @@ +--- +# roles/web_app/defaults/main.yml +# Purpose: +# Default variables for the web_app role. +# How to override: +# - group_vars/all.yml (recommended) +# - or CLI: -e "var=value" +# +# NOTE: +# Destructive wipe requires BOTH: +# -e "web_app_wipe=true" AND --tags web_app_wipe + +# Host port exposed on the VM (used in health check) +web_app_app_port: 5000 + +# Port inside the container +web_app_container_port: 5000 + +# Docker image tag (e.g., latest, v1.2.3) +web_app_docker_image_tag: latest + +# Container name (for cleanup / backward compatibility) +web_app_container_name: "{{ app_name }}" + +# Restart policy: no | always | on-failure | unless-stopped +web_app_restart_policy: unless-stopped + +# Environment variables passed to the container (dictionary) +# Example: +# web_app_env: +# LOG_LEVEL: "info" +# FEATURE_X: "true" +web_app_env: {} + +# Where docker-compose.yml is placed on the target host +web_app_compose_project_dir: "/opt/{{ app_name }}" + +# Compose file version +web_app_docker_compose_version: "3.8" + +# Wipe gate (destructive). Must be explicitly enabled + tag selected. +web_app_wipe: false diff --git a/ansible/roles/web_app/handlers/main.yml b/ansible/roles/web_app/handlers/main.yml new file mode 100644 index 0000000000..f81bbf50bb --- /dev/null +++ b/ansible/roles/web_app/handlers/main.yml @@ -0,0 +1,4 @@ +--- +- name: Wait for app to start + ansible.builtin.pause: + seconds: 5 diff --git a/ansible/roles/web_app/meta/main.yml b/ansible/roles/web_app/meta/main.yml new file mode 100644 index 0000000000..cb7d8e0460 --- /dev/null +++ b/ansible/roles/web_app/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: docker diff --git a/ansible/roles/web_app/tasks/main.yml b/ansible/roles/web_app/tasks/main.yml new file mode 100644 index 0000000000..0f718d75ed --- /dev/null +++ b/ansible/roles/web_app/tasks/main.yml @@ -0,0 +1,97 @@ +--- +# roles/web_app/tasks/main.yml +# Purpose: +# Deploy application using Docker Compose v2 (docker-compose-plugin). +# Safety: +# Destructive wipe is isolated in wipe.yml and is double-gated: +# - must run with --tags web_app_wipe +# - AND web_app_wipe=true +# Tags: +# web_app, compose, app_deploy, web_app_wipe + +- name: Include wipe tasks (double-gated inside wipe.yml) + # NOTE: Tag here allows "wipe-only" runs: --tags web_app_wipe + tags: [web_app_wipe] + ansible.builtin.include_tasks: wipe.yml + +- name: Deploy web application with Docker Compose + # This block does "happy path" deploy; failures go into rescue for debug. + tags: [web_app, compose, app_deploy] + block: + - name: Login to Docker Hub + # no_log hides credentials by default; enable debug by setting debug_docker_login=true. + tags: [app_deploy] + community.docker.docker_login: + username: "{{ dockerhub_username }}" + password: "{{ dockerhub_password }}" + registry_url: "https://index.docker.io/v1/" + reauthorize: true + no_log: "{{ not (debug_docker_login | default(false)) }}" + + - name: Ensure compose project directory exists + # Stores docker-compose.yml on the target host (idempotent directory creation). + tags: [compose, app_deploy] + ansible.builtin.file: + path: "{{ web_app_compose_project_dir }}" + state: directory + mode: "0755" + + - name: Template docker-compose.yml + # Render compose file from Jinja template using documented vars in the template header. + tags: [compose, app_deploy] + ansible.builtin.template: + src: docker-compose.yml.j2 + dest: "{{ web_app_compose_project_dir }}/docker-compose.yml" + mode: "0644" + + - name: Deploy via Docker Compose v2 + # pull=always makes deploy pick up new images even if tag is unchanged (e.g., latest). + tags: [compose, app_deploy] + community.docker.docker_compose_v2: + project_src: "{{ web_app_compose_project_dir }}" + pull: always + state: present + recreate: auto + register: web_app_compose_up + notify: + - Wait for app to start + + - name: Health check + # Localhost check avoids networking issues (NAT / firewall); retries allow container warm-up. + tags: [app_deploy] + ansible.builtin.uri: + url: "http://127.0.0.1:{{ web_app_app_port }}/health" + status_code: 200 + register: web_app_health + retries: 10 + delay: 3 + until: web_app_health.status == 200 + + rescue: + - name: Show compose services on failure + # Debug help: show container state even if compose-up failed mid-way. + tags: [compose, app_deploy] + ansible.builtin.command: docker compose -f "{{ web_app_compose_project_dir }}/docker-compose.yml" ps + register: web_app_compose_ps + changed_when: false + failed_when: false + + - name: Debug compose output + tags: [compose, app_deploy] + ansible.builtin.debug: + var: web_app_compose_ps.stdout_lines + + - name: Fail deployment explicitly + tags: [app_deploy] + ansible.builtin.fail: + msg: "Deployment failed. See docker compose ps output above." + + always: + - name: Log deploy completion marker + # Non-functional marker file for lab evidence; should not create 'changed' noise. + tags: [app_deploy] + ansible.builtin.copy: + dest: "/tmp/web_app_deploy_done" + content: "web_app deploy completed on {{ ansible_date_time.iso8601 }}\n" + mode: "0644" + changed_when: false diff --git a/ansible/roles/web_app/tasks/wipe.yml b/ansible/roles/web_app/tasks/wipe.yml new file mode 100644 index 0000000000..876bb3a959 --- /dev/null +++ b/ansible/roles/web_app/tasks/wipe.yml @@ -0,0 +1,113 @@ +--- +# roles/web_app/tasks/wipe.yml +# Purpose: +# Destructive cleanup of the application deployment: +# - stop/remove old containers +# - remove compose stack +# - delete compose file and project directory +# +# SAFETY MECHANISM (LAB06): +# Wipe is "double-gated" to prevent accidents: +# 1) Variable gate: web_app_wipe=true (explicit user intent) +# 2) Tag gate: --tags web_app_wipe (explicit wipe selection) +# +# This prevents accidental wipe during normal "deploy" or CI/CD runs. + +- name: SAFETY GUARD | Refuse wipe unless --tags web_app_wipe is provided + # If someone sets web_app_wipe=true but runs playbook without tags, + # ansible_run_tags is empty (means "run all"), and we refuse to wipe. + ansible.builtin.assert: + that: + - "'web_app_wipe' in (ansible_run_tags | default([]))" + fail_msg: > + Refusing to wipe because --tags web_app_wipe was not provided. + Run: ansible-playbook ... -e "web_app_wipe=true" --tags web_app_wipe + when: web_app_wipe | bool + tags: [web_app_wipe] + +- name: Wipe web application (Compose + leftovers) + # Variable gate: destructive actions only when explicitly enabled + when: web_app_wipe | bool + tags: [web_app_wipe] + block: + + # 0) Remove standalone container (if created not by compose) + - name: Remove old container by name if exists + community.docker.docker_container: + name: "{{ web_app_container_name }}" + state: absent + force_kill: true + # We don't want wipe to fail if container doesn't exist. + failed_when: false + + # 1) Find containers exposing the same host port (defensive cleanup) + - name: Gather running containers info + community.docker.docker_host_info: + containers: true + register: web_app_docker_info + changed_when: false + failed_when: false + + - name: Compute containers publishing the app port + # Defensive step: if something else binds the same port, remove it. + # NOTE: Structure of "Ports" varies by Docker version; we handle missing fields safely. + ansible.builtin.set_fact: + web_app_port_containers: >- + {{ + (web_app_docker_info.containers | default([])) + | selectattr('Ports', 'defined') + | selectattr('Ports', 'ne', none) + | selectattr('Ports', 'length', '>', 0) + | selectattr( + 'Ports', + 'select', + 'match', + ('^' ~ (web_app_app_port | string) ~ '/') + ) + | map(attribute='Id') + | list + }} + changed_when: false + + - name: Remove containers publishing the app port + community.docker.docker_container: + name: "{{ item }}" + state: absent + force_kill: true + loop: "{{ web_app_port_containers }}" + when: web_app_port_containers | length > 0 + failed_when: false + + # 2) Remove compose stack if it exists + - name: Stop and remove compose stack + community.docker.docker_compose_v2: + project_src: "{{ web_app_compose_project_dir }}" + state: absent + remove_orphans: true + # If compose file is already gone, we still want wipe to continue. + failed_when: false + + # 3) Remove leftover default network (optional, defensive) + - name: Remove compose default network if exists + # Network name is typically "_default" + community.docker.docker_network: + name: "{{ app_name }}_default" + state: absent + failed_when: false + + # 4) Remove files on disk + - name: Remove docker-compose.yml + ansible.builtin.file: + path: "{{ web_app_compose_project_dir }}/docker-compose.yml" + state: absent + failed_when: false + + - name: Remove application directory + ansible.builtin.file: + path: "{{ web_app_compose_project_dir }}" + state: absent + failed_when: false + + - name: Log wipe completion + ansible.builtin.debug: + msg: "Application {{ app_name }} wiped successfully" diff --git a/ansible/roles/web_app/templates/docker-compose.yml.j2 b/ansible/roles/web_app/templates/docker-compose.yml.j2 new file mode 100644 index 0000000000..0da9beb9eb --- /dev/null +++ b/ansible/roles/web_app/templates/docker-compose.yml.j2 @@ -0,0 +1,24 @@ +{# roles/web_app/templates/docker-compose.yml.j2 #} +{# ------------------------------------------------------------------------- + Template: docker-compose.yml + Purpose: + Compose definition for {{ app_name }} service. + Variables (expected): + app_name (str) - service/container base name + docker_image (str) - image repo, e.g. "user/devops-info-service" + web_app_docker_image_tag (str) - image tag, e.g. "latest" + web_app_app_port (int/str) - host port, e.g. 5000 + web_app_container_port (int/str) - container port, e.g. 5000 + web_app_restart_policy (str) - e.g. "unless-stopped" + web_app_env (dict) - env vars map, e.g. {KEY: "value"} + Notes: + environment is rendered as YAML mapping for readability and idempotency. +------------------------------------------------------------------------- #} + +services: + {{ app_name }}: + image: "{{ docker_image }}:{{ web_app_docker_image_tag }}" + ports: + - "{{ web_app_app_port }}:{{ web_app_container_port }}" + environment: {{ web_app_env | to_nice_yaml(indent=6) | trim }} + restart: "{{ web_app_restart_policy }}" diff --git a/app_python/.coverage b/app_python/.coverage new file mode 100644 index 0000000000..9c87632b2d Binary files /dev/null and b/app_python/.coverage differ diff --git a/app_python/.dockerignore b/app_python/.dockerignore new file mode 100644 index 0000000000..1ec2bf5614 --- /dev/null +++ b/app_python/.dockerignore @@ -0,0 +1,28 @@ +# Python +__pycache__/ +*.pyc +*.pyo +*.pyd +.pytest_cache/ +.mypy_cache/ + +# Virtual envs +venv/ +.venv/ + +# VCS / IDE +.git/ +.idea/ +.vscode/ + +# OS junk +.DS_Store +Thumbs.db + +# Project docs/tests not needed at runtime +docs/ +tests/ + +# Markdown/images not needed at runtime +*.md +*.png diff --git a/app_python/.gitignore b/app_python/.gitignore new file mode 100644 index 0000000000..a8ba50671d --- /dev/null +++ b/app_python/.gitignore @@ -0,0 +1,8 @@ +__pycache__/ +*.pyc +.env +.venv/ +venv/ +.idea/ +.vscode/ +.DS_Store diff --git a/app_python/Dockerfile b/app_python/Dockerfile new file mode 100644 index 0000000000..14b07d8b48 --- /dev/null +++ b/app_python/Dockerfile @@ -0,0 +1,54 @@ +# syntax=docker/dockerfile:1.7 + +############################ +# Stage 1: build wheels +############################ +FROM python:3.13-slim AS builder + +WORKDIR /build + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PIP_NO_CACHE_DIR=1 + +COPY requirements.txt . + +RUN python -m pip install --upgrade pip \ + && python -m pip wheel --wheel-dir /wheels -r requirements.txt + + +############################ +# Stage 2: runtime +############################ +FROM python:3.13-slim AS runtime + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PIP_NO_CACHE_DIR=1 \ + HOST=0.0.0.0 \ + PORT=5000 \ + DEBUG=FALSE + +WORKDIR /app + +RUN addgroup --system app \ + && adduser --system --ingroup app --home /home/app --shell /usr/sbin/nologin app + +COPY --from=builder /wheels /wheels +COPY requirements.txt . + +RUN python -m pip install --no-index --find-links=/wheels -r requirements.txt \ + && rm -rf /wheels + +COPY app.py . + +USER app + +EXPOSE 5000 + +HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \ + CMD python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:5000/health').read()" || exit 1 + +CMD ["python", "app.py"] diff --git a/app_python/README.md b/app_python/README.md new file mode 100644 index 0000000000..efff171dd2 --- /dev/null +++ b/app_python/README.md @@ -0,0 +1,167 @@ +# DevOps Info Service + +![Python CI](https://github.com/wkwtfigo/DevOps-Core-Course/actions/workflows/python-ci.yaml/badge.svg) + +## Overview +The **DevOps Info Service** is a simple web service built with **FastAPI** that provides system information, including: +- Service metadata (name, version, description) +- System details (hostname, platform, CPU count, etc.) +- Runtime details (uptime, current time, timezone) +- A health check endpoint for monitoring the application's status + +## Prerequisites +- **Python version:** 3.8 or higher +- **Dependencies:** See `requirements.txt` for the exact package versions. + +## Installation +To set up the project locally, follow these steps: + +1. **Create a virtual environment:** + ```bash + python -m venv venv + ``` + +2. **Activate the virtual environment:** + - On Windows: + ```bash + .\venv\Scripts\activate + ``` + - On Mac/Linux: + ```bash + source venv/bin/activate + ``` + +3. **Install dependencies:** + for running the app: + ```bash + pip install -r requirements.txt + ``` + for tests/lint: + ```bash + pip install -r requirements.txt -r requirements-dev.txt + ``` + +## Running the Application +To run the app locally on the default host and port (`127.0.0.1:5000`): + +```bash +python app.py +``` + +or with a custom port + +```bash +$env:PORT="8080" python app.py # PowerShell +export PORT=8080 && python app.py # bash (Linux/Mac) +``` + +## API Endpoints + +GET / + +Returns comprehensive information about the service, system, and runtime. + +Example response: + +```json +{ + "service": { + "name": "devops-info-service", + "version": "1.0.0", + "description": "DevOps course info service", + "framework": "FastAPI" + }, + "system": { + "hostname": "my-laptop", + "platform": "Linux", + "platform_version": "Ubuntu 24.04", + "architecture": "x86_64", + "cpu_count": 8, + "python_version": "3.9.6" + }, + "runtime": { + "uptime_seconds": 3600, + "uptime_human": "1 hour, 0 minutes", + "current_time": "2026-01-07T14:30:00.000Z", + "timezone": "UTC" + }, + "request": { + "client_ip": "127.0.0.1", + "user_agent": "curl/7.81.0", + "method": "GET", + "path": "/" + }, + "endpoints": [ + {"path": "/", "method": "GET", "description": "Service information"}, + {"path": "/health", "method": "GET", "description": "Health check"} + ] +} +``` + +GET /health + +Returns a simple health status, useful for monitoring and Kubernetes probes. + +Example response: + +```json +{ + "status": "healthy", + "timestamp": "2024-01-15T14:30:00.000Z", + "uptime_seconds": 3600 +} +``` + +## Configuration + +The following environment variables can be configured to change the application behavior: + +|Variable| Description| Default Value| +|------|---|----| +|HOST |Host |IP address| 0.0.0.0| +|PORT |Port number| 5000| +|DEBUG |Enable/Disable |debug mode| False| + +## Testing + +Install dev dependencies: +```bash +pip install -r requirements.txt -r requirements-dev.txt +``` + +Run unit tests: + +```bash +pytest -q +``` + +## Docker + +### Build locally +Pattern: +- Build an image from the Dockerfile in this directory: + - `docker build -t : .` + +Notes: +- Local builds do not require Docker Hub naming. Example image name: `devops-info-service:lab02`. + +### Run locally +Pattern: +- Run and publish the container port: + - `docker run --rm --name -p :5000 :` + +Example: +- `docker run --rm --name devops-info -p 5000:5000 devops-info-service:lab02` + +Optional environment overrides (if needed): +- `-e HOST= -e DEBUG=` + +Note: +- The container is designed to listen on port **5000** by default. It is recommended to keep the internal port unchanged and only remap the host port using `-p`. + +### Pull from Docker Hub +Pattern: +- Pull: + - `docker pull /:` +- Run: + - `docker run --rm -p :5000 /:` diff --git a/app_python/app.py b/app_python/app.py new file mode 100644 index 0000000000..0e1c27bfc7 --- /dev/null +++ b/app_python/app.py @@ -0,0 +1,254 @@ +import os +import time +import platform +import logging +import socket +import uvicorn +import sys +from pythonjsonlogger import jsonlogger +from datetime import datetime, timezone + +from fastapi import FastAPI, Request, HTTPException +from fastapi.responses import JSONResponse + +# Configure logging + +class DefaultFieldsFilter(logging.Filter): + def filter(self, record: logging.LogRecord) -> bool: + if not hasattr(record, "service"): + record.service = os.getenv("SERVICE_NAME", "devops-info-service") + if not hasattr(record, "version"): + record.version = os.getenv("SERVICE_VERSION", "1.0.0") + if not hasattr(record, "hostname"): + record.hostname = socket.gethostname() + + for key in ("method", "path", "status_code", "client_ip", "duration_ms"): + if not hasattr(record, key): + setattr(record, key, None) + + return True + + +def setup_json_logging() -> None: + handler = logging.StreamHandler(sys.stdout) + formatter = jsonlogger.JsonFormatter( + "%(asctime)s %(levelname)s %(name)s %(message)s " + "%(service)s %(version)s %(hostname)s " + "%(method)s %(path)s %(status_code)s %(client_ip)s %(duration_ms)s" + ) + handler.setFormatter(formatter) + handler.addFilter(DefaultFieldsFilter()) + + root = logging.getLogger() + root.handlers = [handler] + root.setLevel(logging.INFO) + + for name in ("uvicorn", "uvicorn.error", "uvicorn.access"): + log = logging.getLogger(name) + log.handlers = [handler] + log.propagate = False + log.setLevel(logging.INFO) + +setup_json_logging() +logger = logging.getLogger(__name__) + +logger.info('Application starting...') + +# Configuration from environment variables +HOST = os.getenv("HOST", "0.0.0.0") +PORT = int(os.getenv("PORT", "5000")) +DEBUG = os.getenv("DEBUG", "FALSE").lower() == "true" + +SERVICE_NAME = os.getenv("SERVICE_NAME", "devops-info-service") +SERVICE_VERSION = os.getenv("SERVICE_VERSION", "1.0.0") +SERVICE_DESCRIPTION = os.getenv("SERVICE_DESCRIPTION", + "DevOps course info service") +FRAMEWORK = "FastAPI" + +START_TIME = time.time() + +app = FastAPI( + title=SERVICE_NAME, + version=SERVICE_VERSION, + description=SERVICE_DESCRIPTION, +) + + +@app.middleware("http") +async def access_log_middleware(request: Request, call_next): + start = time.perf_counter() + client_ip = request.client.host if request.client else "unknown" + + try: + response = await call_next(request) + status_code = response.status_code + return response + except Exception: + duration_ms = int((time.perf_counter() - start) * 1000) + logger.exception( + "unhandled_exception", + extra={ + "service": SERVICE_NAME, + "version": SERVICE_VERSION, + "hostname": socket.gethostname(), + "method": request.method, + "path": request.url.path, + "status_code": 500, + "client_ip": client_ip, + "duration_ms": duration_ms, + }, + ) + raise + finally: + if "status_code" in locals(): + duration_ms = int((time.perf_counter() - start) * 1000) + logger.info( + "http_request", + extra={ + "service": SERVICE_NAME, + "version": SERVICE_VERSION, + "hostname": socket.gethostname(), + "method": request.method, + "path": request.url.path, + "status_code": status_code, + "client_ip": client_ip, + "duration_ms": duration_ms, + }, + ) + + +def get_uptime_seconds(): + """ + Calculate the uptime of the application. + Returns a dictionary with total seconds and human-readable format. + """ + delta = time.time() - START_TIME + seconds = int(delta) + hours = seconds // 3600 + minutes = (seconds % 3600) // 60 + return { + 'seconds': seconds, + 'human': f"{hours} hours, {minutes} minutes" + } + + +def iso_utc_now() -> str: + """ + Get current time in ISO 8601 UTC format. + """ + dt = datetime.now(timezone.utc) + return dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" + + +def system_info() -> dict: + """ + Get system information such as platform, version, CPU count. + """ + return { + "hostname": socket.gethostname(), + "platform": platform.system(), + "platform_version": platform.version(), + "architecture": platform.machine(), + "cpu_count": os.cpu_count() or 0, + "python_version": platform.python_version(), + } + + +def client_ip_from_request(request: Request) -> str: + """ + Extract client IP address from request. + """ + if request.client and request.client.host: + return request.client.host + return "unknown" + + +@app.get("/", response_class=JSONResponse) +async def root(request: Request): + """ + Main endpoint that returns service, system, and runtime information. + """ + up = get_uptime_seconds() + + return { + "service": { + "name": SERVICE_NAME, + "version": SERVICE_VERSION, + "description": SERVICE_DESCRIPTION, + "framework": FRAMEWORK, + }, + "system": system_info(), + "runtime": { + "uptime_seconds": up['seconds'], + "uptime_human": up['human'], + "current_time": iso_utc_now(), + "timezone": "UTC", + }, + "request": { + "client_ip": client_ip_from_request(request), + "user_agent": request.headers.get("user-agent", "unknown"), + "method": request.method, + "path": request.url.path, + }, + "endpoints": [ + { + "path": "/", + "method": "GET", + "description": "Service information" + }, + { + "path": "/health", + "method": "GET", + "description": "Health check" + }, + ], + } + + +@app.get("/health", response_class=JSONResponse) +async def health(request: Request): + """ + Health check endpoint for monitoring. + """ + up = get_uptime_seconds() + + return { + "status": "healthy", + "timestamp": iso_utc_now(), + "uptime_seconds": up['seconds'], + } + + +@app.exception_handler(500) +async def internal_server_error(request: Request, exc: HTTPException): + logger.error(f"500 Error: {str(exc)} for {request.url.path}") + return JSONResponse( + status_code=500, + content={"message": "Internal server error", "error": str(exc)}, + ) + + +@app.exception_handler(404) +async def not_found_exception(request: Request, exc: HTTPException): + logger.warning( + "not_found", + extra={ + "timestamp": iso_utc_now(), + "level": "WARNING", + "service": SERVICE_NAME, + "version": SERVICE_VERSION, + "hostname": socket.gethostname(), + "method": request.method, + "path": request.url.path, + "status_code": 404, + "client_ip": request.client.host if request.client else "unknown", + }, + ) + return JSONResponse( + status_code=404, + content={"message": "Endpoint not found", "error": str(exc)}, + ) + + +if __name__ == "__main__": + uvicorn.run("app:app", host=HOST, port=PORT, reload=DEBUG) diff --git a/app_python/docs/LAB01.md b/app_python/docs/LAB01.md new file mode 100644 index 0000000000..5d93dc5749 --- /dev/null +++ b/app_python/docs/LAB01.md @@ -0,0 +1,272 @@ +# LAB01 -- DevOps Info Service Implementation + +## Framework Selection + +### Choice: **FastAPI** + +I chose FastAPI because it's a lightweight and efficient framework specifically designed for building APIs, which is exactly what the lab requires. Since this is a DevOps course, a full-featured framework like Django would be overkill, as it includes many features we don't need, like an ORM for databases. FastAPI is fast, easy to learn, and supports asynchronous programming, making it ideal for scalable, performance-oriented services in a DevOps context. Additionally, it provides automatic API documentation, which is very useful for rapid development and testing. + +### Comparison Table + +| Feature | **FastAPI** | Flask | Django | +|------------------------------------|----------------------------------|---------------------------------|--------------------------------| +| **Type** | micro-web framework| micro-web framework| full-stack web framework| +| **Asynchronous support** | built-in async support | limited (with extensions) | possible with help of Asyncio but slower.| +| **Automatic API documentation** | Yes (Swagger, ReDoc) | Requires extensions (Flask-RESTful, Flask-OpenAPI) | Yes (DRF with auto docs) | +| **Performance** | very fast for building APIs and microservices|slower because of manual validation and synchronised programming| fast for building large web applications | +| **Learning curve** | Easy | Moderate | Complex | +| **Flexibility** | High | High | Low (more opinionated) | +| **Documentation** | clear, but smaller | large |extensive | + +--- + +## Best Practices Applied + +### 1. **Clean Code Organization** + +- **Clear function names**: Functions are named descriptively to improve readability and maintainability. + + Example: + ```python + def get_uptime_seconds(): + """Calculate the uptime of the application.""" + ``` +- **Grouping imports**: All imports are grouped into standard library imports, followed by third-party and local imports. + + Example: + ```python + import os + import socket + import platform + from datetime import datetime, timezone + ``` + +- **Comments and docstrings**: Comments are minimal, used only where the code is not self-explanatory, and function docstrings are added to describe their purpose. + + Example: + ```python + def iso_utc_now() -> str: + """Returns the current time in ISO 8601 UTC format.""" + dt = datetime.now(timezone.utc) + return dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" + ``` + +### 2. **Error Handling** + +- FastAPI handles errors automatically, but custom error handling can be added for cases like 404 (Not Found) and 500 (Internal Server Error). + + Example: + ```python + @app.exception_handler(404) + async def not_found_exception(request: Request, exc: HTTPException): + return JSONResponse( + status_code=404, + content={"message": "Endpoint not found", "error": str(exc)}, + ) + ``` + + This allows us to give more informative and controlled responses for errors. + +### 3. **Logging** + +- Implemented logging to capture important application events and errors. + + Example: + ```python + import logging + + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + logger = logging.getLogger(__name__) + + logger.info('Application starting...') + ``` + +Logging helps in tracing application behavior and troubleshooting issues. + +### 4. **PEP 8 Compliance** + +- Adhered to Python’s **PEP 8** guidelines, including consistent naming conventions, indentation, and spacing. + + Example: + ```python + def system_info() -> dict: + """Returns system information like hostname, platform, and architecture.""" + return { + "hostname": socket.gethostname(), + "platform": platform.system(), + "platform_version": platform.version(), + "architecture": platform.machine(), + "cpu_count": os.cpu_count() or 0, + "python_version": platform.python_version(), + } + ``` + +--- + +## API Documentation + +### Request/Response Examples + +#### `GET /` + +**Request Example:** +```bash +curl http://127.0.0.1:5000/ +``` + +Response Example: + +```json +{ + "service": { + "name": "devops-info-service", + "version": "1.0.0", + "description": "DevOps course info service", + "framework": "FastAPI" + }, + "system": { + "hostname": "my-laptop", + "platform": "Linux", + "platform_version": "Ubuntu 24.04", + "architecture": "x86_64", + "cpu_count": 8, + "python_version": "3.9.6" + }, + "runtime": { + "uptime_seconds": 3600, + "uptime_human": "1 hour, 0 minutes", + "current_time": "2026-01-07T14:30:00.000Z", + "timezone": "UTC" + }, + "request": { + "client_ip": "127.0.0.1", + "user_agent": "curl/7.81.0", + "method": "GET", + "path": "/" + }, + "endpoints": [ + {"path": "/", "method": "GET", "description": "Service information"}, + {"path": "/health", "method": "GET", "description": "Health check"} + ] +} +``` + +#### `GET /health` + +Returns a simple health status, useful for monitoring and Kubernetes probes. + +**Request Example:** +```bash +curl http://127.0.0.1:5000/health +``` + +Example response: + +```json +{ + "status": "healthy", + "timestamp": "2024-01-15T14:30:00.000Z", + "uptime_seconds": 3600 +} +``` + +### Testing commands + +1. Test the `/` endpoint: + + ```bash + curl -s http://127.0.0.1:5000/ | python -m json.tool + ``` + +2. Test the `/health` endpoint: + + ```bash + curl -s http://127.0.0.1:5000/health | python -m json.tool + ``` + +## Testing evidence + +### Screenshots showing endpoints work + +1. `01-main-endpoint`: +![01-main-endpoint](screenshots/01-main-endpoint.png) + +2. `02-health-check`: +![02-health-check](screenshots/02-health-check.png) + +3. `03-formatted-output`: +![03-formatted-output](screenshots/03-formatted-output.png) + +### Terminal output showing successful response + +1. 01-main-endpoint + +```bash +zagur@LAPTOP-JONCQBVT:/mnt/c/Users/zagur/DevOps/DevOps-Core-Course$ curl -s http://127.0.0.1:3000/ | python3 -m json.tool +{ + "service": { + "name": "devops-info-service", + "version": "1.0.0", + "description": "DevOps course info service", + "framework": "FastAPI" + }, + "system": { + "hostname": "LAPTOP-JONCQBVT", + "platform": "Linux", + "platform_version": "#1 SMP PREEMPT_DYNAMIC Thu Jun 5 18:30:46 UTC 2025", + "architecture": "x86_64", + "cpu_count": 8, + "python_version": "3.12.3" + }, + "runtime": { + "uptime_seconds": 3, + "uptime_human": "0 hours, 0 minutes", + "current_time": "2026-01-24T18:38:52.811Z", + "timezone": "UTC" + }, + "request": { + "client_ip": "127.0.0.1", + "user_agent": "curl/8.5.0", + "method": "GET", + "path": "/" + }, + "endpoints": [ + { + "path": "/", + "method": "GET", + "description": "Service information" + }, + { + "path": "/health", + "method": "GET", + "description": "Health check" + } + ] +} +``` + +2. 02-health-check + +```bash +zagur@LAPTOP-JONCQBVT:/mnt/c/Users/zagur/DevOps/DevOps-Core-Course$ curl -s http://127.0.0.1:3000/health | python3 -m json.tool +{ + "status": "healthy", + "timestamp": "2026-01-24T18:39:57.365Z", + "uptime_seconds": 67 +} +``` + +## Challenges & Solutions + +**Problem:** Since I'm not very familiar with the syntax of FastAPI and Python, I encountered several syntax errors while writing the code, such as incorrect function definitions or missing imports. These errors caused issues like the app failing to start or returning unexpected results. + +**Solution:** To resolve this, I carefully referred to the FastAPI documentation and Python's official documentation. I also ran the app frequently during development to catch any issues early. Debugging with error messages and checking online resources helped me understand and correct mistakes in the syntax. + +## GitHub Community + +1. **Why stars matter in open source:** Stars are the way to discover, bookmark, and show appreciation for interesting and promising projects. The more stars a repo has, the more popular and trusted it is. This helps projects get more visibility and attract more contributors. + +2. **How followers can help:** Following developers lets you stay updated on their work and learn from them. It also helps you find potential teammates for future projects and stay aware of new technologies. \ No newline at end of file diff --git a/app_python/docs/LAB02.md b/app_python/docs/LAB02.md new file mode 100644 index 0000000000..01f0643650 --- /dev/null +++ b/app_python/docs/LAB02.md @@ -0,0 +1,312 @@ +# LAB02 -- Docker Containerization + +## 1. Docker Best Practices Applied + +### 1.1 Specific base image version +Used: `python:3.13-slim` + +Why it matters: +- Pinned major/minor version improves reproducibility (same interpreter + base libs) +- `slim` reduces image size vs full Debian images + +Dockerfile snippet: +```dockerfile +FROM python:3.13-slim AS builder +FROM python:3.13-slim AS runtime +``` + +### 1.2 Non-root user + +Implemented with adduser/addgroup and USER app + +Why it matters: +- Reduces impact if the app is compromised +- Prevents accidental writers to system paths + +Snippet: +```dockerfile +RUN addgroup --system app \ + && adduser --system --ingroup app --home /home/app --shell /usr/sbin/nologin app +USER app +``` + +### 1.3 Proper layer ordering (dependency caching) + +Copied requirements.txt before the code + +Why it matters: +- Docker cache can reuse the dependency layer if only app changes +- Faster rebuilds development + +### 1.4 Copy only necessary files + +The runtime image contains only: +- requirements.txt (for install step) +- app.py (runtime code) + +Why it matters: +- Smaller image +- Less attack surface (no tests/docs/dev artifacts) + +### 1.5 .dockerignore + +Excluded: caches, venvs, docs, tests + +Why it matters: +- Smaller build context → faster builds +- Avoid leaking local artifacts into the build + +### 1.6 Multi-stage build (optimization) + +- Stage 1 builds wheels +- Stage 2 installs from wheels with --no-index + +Why it matters: +- Cleaner runtime stage +- More reproducible installs (install exactly built artifacts) +- Better separation of responsibilities + +## 2. Image Information & Decisions + +### 2.1 Base image choice + +Chosen: python:3.13-slim + +Justification: Small footprint, still compatible with most Python packages + +### 2.2 Final image size + +![image_size](screenshots/image_size.png) +Image size: 165MB + +Assessment: +- **165MB** is reasonable for a FastAPI service on `python:3.13-slim` because the final size includes: + - the Python runtime + Debian slim base layers + - installed dependencies (FastAPI, Uvicorn, etc.) +- The image is already optimized by: + - using `slim` instead of full image + - multi-stage build (wheels built in builder stage) + - not shipping tests/docs and using `.dockerignore` +- Further possible reductions (optional): + - switch to a smaller runtime base (e.g., distroless python) if compatible + - ensure only runtime deps are installed (no dev/test deps) + - audit dependencies to reduce transitive packages + +### 2.3 Layer structure + +Command: +```bash +docker history devops-info-service:lab02 +``` + +```powershell +PS C:\Users\zagur\DevOps\DevOps-Core-Course\app_python> docker history devops-info-service:lab02 +IMAGE CREATED CREATED BY SIZE COMMENT +8eb6994eaeaa 14 seconds ago CMD ["python" "app.py"] 0B buildkit.dockerfile.v0 + 14 seconds ago HEALTHCHECK &{["CMD-SHELL" "python -c \"impo… 0B buildkit.dockerfile.v0 + 14 seconds ago EXPOSE map[5000/tcp:{}] 0B buildkit.dockerfile.v0 + 14 seconds ago USER app 0B buildkit.dockerfile.v0 + 14 seconds ago COPY app.py . # buildkit 4.63kB buildkit.dockerfile.v0 + 14 seconds ago RUN /bin/sh -c python -m pip install --no-in… 38.1MB buildkit.dockerfile.v0 + 17 seconds ago COPY requirements.txt . # buildkit 43B buildkit.dockerfile.v0 + 17 seconds ago COPY /wheels /wheels # buildkit 9.48MB buildkit.dockerfile.v0 + 28 seconds ago RUN /bin/sh -c addgroup --system app && add… 4.3kB buildkit.dockerfile.v0 + 24 hours ago WORKDIR /app 0B buildkit.dockerfile.v0 + 24 hours ago ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFER… 0B buildkit.dockerfile.v0 + 2 weeks ago CMD ["python3"] 0B buildkit.dockerfile.v0 + 2 weeks ago RUN /bin/sh -c set -eux; for src in idle3 p… 36B buildkit.dockerfile.v0 + 2 weeks ago RUN /bin/sh -c set -eux; savedAptMark="$(a… 35.2MB buildkit.dockerfile.v0 + 2 weeks ago ENV PYTHON_SHA256=16ede7bb7cdbfa895d11b0642f… 0B buildkit.dockerfile.v0 + 2 weeks ago ENV PYTHON_VERSION=3.13.11 0B buildkit.dockerfile.v0 + 2 weeks ago ENV GPG_KEY=7169605F62C751356D054A26A821E680… 0B buildkit.dockerfile.v0 + 2 weeks ago RUN /bin/sh -c set -eux; apt-get update; a… 3.81MB buildkit.dockerfile.v0 + 2 weeks ago ENV PATH=/usr/local/bin:/usr/local/sbin:/usr… 0B buildkit.dockerfile.v0 + 2 weeks ago # debian.sh --arch 'amd64' out/ 'trixie' '@1… 78.6MB debuerreotype 0.17 +``` + +**Explanation:** + +- **Stable layers (change rarely):** + - Base image layers from `python:3.13-slim` (Debian OS + Python runtime). + - Creation of the non-root user (`addgroup` / `adduser`). + - Dependency installation layer (`pip install --no-index --find-links=/wheels`) **as long as `requirements.txt` does not change**. + +- **Frequently changing layer:** + - `COPY app.py .` — any change in the application code invalidates only this layer and the layers above it, which makes rebuilds fast. + +- **Why the layer order matters:** + - `requirements.txt` is copied and dependencies are installed **before** copying the application code. + - This allows Docker to reuse cached dependency layers when only the source code changes. + - As a result, rebuilds are significantly faster during development. + +- **Size observation:** + - The largest custom layer in the image is the dependency installation (~38MB). + - The application code itself is very small (~4.6KB), showing that most of the image size comes from the base image and Python packages, not from the app. + + +## 3. Build & Run Process +### 3.1 Build output + +```bash +PS C:\Users\zagur\DevOps\DevOps-Core-Course\app_python> docker build -t devops-info-service:lab02 . +>> +[+] Building 31.0s (16/16) FINISHED docker:desktop-linux + => [internal] load build definition from Dockerfile 0.0s + => => transferring dockerfile: 1.13kB 0.0s + => WARN: FromAsCasing: 'as' and 'FROM' keywords' casing do not match 0.0s + => [internal] load metadata for docker.io/library/python:3.13-slim 2.6s + => [auth] library/python:pull token for registry-1.docker.io 0.0s + => [internal] load .dockerignore 0.1s + => => transferring context: 342B 0.0s + => [internal] load build context 0.1s + => => transferring context: 4.75kB 0.0s + => [builder 1/4] FROM docker.io/library/python:3.13-slim@sha256:51e1a 5.3s + => => resolve docker.io/library/python:3.13-slim@sha256:51e1a0a317fdb 0.0s + => => sha256:51e1a0a317fdb6e170dc791bbeae63fac5272c 10.37kB / 10.37kB 0.0s + => => sha256:fbc43b66207d7e2966b5f06e86f2bc46aa4b10f3 1.75kB / 1.75kB 0.0s + => => sha256:dd4049879a507d6f4bb579d2d94b591135b95daa 5.53kB / 5.53kB 0.0s + => => sha256:119d43eec815e5f9a47da3a7d59454581b1e20 29.77MB / 29.77MB 1.8s + => => sha256:8843ea38a07e15ac1b99c72108fbb492f7370329 1.29MB / 1.29MB 1.7s + => => sha256:0bee50492702eb5d822fbcbac8f545a25f5fe1 11.79MB / 11.79MB 2.7s + => => sha256:36b6de65fd8d6bd36071ea9efa7d078ebdc11ecc23d2 249B / 249B 2.1s + => => extracting sha256:119d43eec815e5f9a47da3a7d59454581b1e204b0c34d 1.7s + => => extracting sha256:8843ea38a07e15ac1b99c72108fbb492f737032986cc0 0.3s + => => extracting sha256:0bee50492702eb5d822fbcbac8f545a25f5fe173ec803 0.9s + => => extracting sha256:36b6de65fd8d6bd36071ea9efa7d078ebdc11ecc23d24 0.0s + => [runtime 2/7] WORKDIR /app 0.3s + => [builder 2/4] WORKDIR /build 0.3s + => [runtime 3/7] RUN addgroup --system app && adduser --system --in 1.0s + => [builder 3/4] COPY requirements.txt . 0.1s + => [builder 4/4] RUN python -m pip install --upgrade pip && python 15.5s + => [runtime 4/7] COPY --from=builder /wheels /wheels 0.1s + => [runtime 5/7] COPY requirements.txt . 0.1s + => [runtime 6/7] RUN python -m pip install --no-index --find-links=/w 5.8s + => [runtime 7/7] COPY app.py . 0.0s + => exporting to image 0.3s + => => exporting layers 0.3s + => => writing image sha256:2ef988bd86b19dd70ff6a63c17cf3c2d78aec55343 0.0s + => => naming to docker.io/library/devops-info-service:lab02 0.0s + +View build details: docker-desktop://dashboard/build/desktop-linux/desktop-linux/lpcyojni4xwvqtruz2h4lyk77 + +What's next: + View a summary of image vulnerabilities and recommendations → docker scout quickview +``` + +### 3.2 Run output (container logs) + +Commands: +```bash +docker run -d --name devops-info -p 5000:5000 devops-info-service:lab02 +docker ps +docker logs devops-info +``` + +![image ps](screenshots/image_ps.png) + +![image logs](screenshots/image_logs.png) + +### 3.3 Endpoint tests + +Commands: + +curl http://localhost:5000/ +curl http://localhost:5000/health + + +Outputs: + +```bash +PS C:\Users\zagur\DevOps\DevOps-Core-Course> curl http://localhost:5000/health +{"status":"healthy","timestamp":"2026-02-01T09:25:27.205Z","uptime_seconds":102} +PS C:\Users\zagur\DevOps\DevOps-Core-Course> curl http://localhost:5000 +{"service":{"name":"devops-info-service","version":"1.0.0","description":"DevOps course info service","framework":"FastAPI"},"system":{"hostname":"e84c8b5f4d68","platform":"Linux","platform_version":"#1 SMP PREEMPT_DYNAMIC Thu Jun 5 18:30:46 UTC 2025","architecture":"x86_64","cpu_count":8,"python_version":"3.13.11"},"runtime":{"uptime_seconds":111,"uptime_human":"0 hours, 1 minutes","current_time":"2026-02-01T09:25:36.565Z","timezone":"UTC"},"request":{"client_ip":"172.17.0.1","user_agent":"curl/8.16.0","method":"GET","path":"/"},"endpoints":[{"path":"/","method":"GET","description":"Service information"},{"path":"/health","method":"GET","description":"Health check"}]} +``` + +![image_curl](screenshots/image_curl.png) + +### 3.4 Docker Hub + +Repository: `wkwtfigo/devops-info-service` + +Docker Hub URL: +```text +https://hub.docker.com/r/wkwtfigo/devops-info-service +``` + +Push output: + +```bash +PS C:\Users\zagur\DevOps\DevOps-Core-Course> docker push wkwtfigo/devops-info-service:lab02 +The push refers to repository [docker.io/wkwtfigo/devops-info-service] +6a094d39e043: Pushed +8b8714259814: Pushed +0c5a41331b54: Pushed +7a93808411ef: Pushed +5b31ce4e2f66: Pushed +014fb08f1599: Pushed +a915d0aa80cd: Pushed +ad1b18dd62d2: Pushed +d85cc8d16465: Mounted from library/python +e50a58335e13: Mounted from library/python +lab02: digest: sha256:5936cd71eb78ae5a3b17cd47b34156c6c061cc549ce06d46d9c365234cc2d2a2 size: 2411 +PS C:\Users\zagur\DevOps\DevOps-Core-Course> docker push wkwtfigo/devops-info-service:latest +The push refers to repository [docker.io/wkwtfigo/devops-info-service] +6a094d39e043: Layer already exists +8b8714259814: Layer already exists +0c5a41331b54: Layer already exists +7a93808411ef: Layer already exists +5b31ce4e2f66: Layer already exists +014fb08f1599: Layer already exists +a915d0aa80cd: Layer already exists +ad1b18dd62d2: Layer already exists +d85cc8d16465: Layer already exists +e50a58335e13: Layer already exists +latest: digest: sha256:5936cd71eb78ae5a3b17cd47b34156c6c061cc549ce06d46d9c365234cc2d2a2 size: 2411 +PS C:\Users\zagur\DevOps\DevOps-Core-Course> docker push wkwtfigo/devops-info-service:1.0.0 +The push refers to repository [docker.io/wkwtfigo/devops-info-service] +6a094d39e043: Layer already exists +8b8714259814: Layer already exists +0c5a41331b54: Layer already exists +7a93808411ef: Layer already exists +d85cc8d16465: Layer already exists +e50a58335e13: Layer already exists +1.0.0: digest: sha256:5936cd71eb78ae5a3b17cd47b34156c6c061cc549ce06d46d9c365234cc2d2a2 size: 2411 +``` + +## 4. Technical Analysis +### 4.1 Why this Dockerfile works + +CMD ["python", "app.py"] matches local workflow: app.py starts uvicorn and reads env vars. EXPOSE 5000 documents the port used by the app. Dependencies are installed before copying application code to maximize caching. + +### 4.2 What happens if layer order changes? + +If we copy all files before installing dependencies: +- Any code change invalidates the cache +- Docker will reinstall dependencies on every build → much slower builds + +### 4.3 Security considerations + +- Non-root runtime user +- Minimal base image (slim) +- No extra tools installed (e.g., curl) → smaller attack surface + +### 4.4 How .dockerignore improves the build + +- Reduces amount of data sent to Docker daemon +- Prevents accidental inclusion of venv/caches/tests/docs +- Speeds up build and reduces image clutter + +## 5. Challenges & Solutions + +### 5.1 Docker Hub authentication/network issue during build +- **Issue:** Docker failed to fetch OAuth token from Docker Hub (`auth.docker.io/token`) and could not pull the base image. +- **Debug:** Verified that `docker pull python:3.13-slim` failed, checked network/DNS connectivity. +- **Fix:** Restarted Docker Desktop / WSL networking and adjusted network settings (DNS/proxy) until pull worked. +- **Lesson learned:** Docker builds depend on registry access; when base images cannot be pulled, the issue is usually network/DNS/proxy rather than Dockerfile syntax. + +### 5.2 Dockerfile linter warning (FROM/AS casing) +- **Issue:** Build produced warning `FromAsCasing: 'as' and 'FROM' keywords' casing do not match`. +- **Fix:** Replaced `as` with `AS` in multi-stage `FROM ... AS ...` lines. +- **Lesson learned:** Even non-fatal warnings should be fixed to keep the build output clean and professional. + diff --git a/app_python/docs/LAB03.md b/app_python/docs/LAB03.md new file mode 100644 index 0000000000..031071b4df --- /dev/null +++ b/app_python/docs/LAB03.md @@ -0,0 +1,276 @@ +# LAB03 — CI/CD with GitHub Actions + +## 1. Overview + +This lab implements a CI/CD pipeline for the **DevOps Info Service** using **GitHub Actions**. + +- **Testing framework:** `pytest` +- **Linting:** `ruff` +- **Container build & publish:** Docker Buildx → Docker Hub +- **Security scanning:** Snyk CLI (dependency scan from `requirements.txt`) + +### Testing framework choice + +Two common Python testing frameworks were considered: + +- **unittest** — built-in Python testing framework with class-based test structure. +- **pytest** — third-party framework with simpler syntax, fixtures, and better readability. + +**Chosen framework: pytest** + +**Justification:** +- simpler test syntax (plain functions instead of classes) +- powerful fixtures and ecosystem +- widely used in modern Python projects +- better failure output and developer experience +- easy integration with CI pipelines + +For this project, pytest provides a clean and maintainable way to test API endpoints. + +**What is tested:** +- `GET /` returns a valid JSON structure with required sections (`service`, `system`, `runtime`, `request`, `endpoints`) +- `GET /health` returns `"status": "healthy"` and includes `timestamp` and `uptime_seconds` +- unknown endpoints return a JSON 404 response + +**Workflow triggers:** +- Runs on `push` and `pull_request` for branches `master` and `lab03` +- Runs only when files in `app_python/**` or the workflow file change (path filter) + +**Versioning strategy (Docker tags):** +- **CalVer** tag: `YYYY.MM.DD-` (traceable build version) +- Branch tag: `lab03` (easy to pull the latest build from this branch) + +--- + +## 2. Workflow Evidence + +### 2.1 GitHub Actions run +- Workflow file: `.github/workflows/python-ci.yaml` +- Successful run link: **** + +### 2.2 Test + lint output (CI) + +![lint and test evidence](/app_python/docs/screenshots/lint+test.png) + +### 2.3 Docker image publish +Docker Hub repository: +- `wkwtfigo/devops-info-service` +- URL: +```text +https://hub.docker.com/r/wkwtfigo/devops-info-service +``` + +Published tags from CI: +- `lab03` +- `2026.02.09-1d708b5` + +```bash +#21 pushing ***/devops-info-service:lab03 with docker +#21 pushing layer bf48bca45dca +#21 pushing layer 220bf4a7cb08 +#21 pushing layer 473bf974db40 +#21 pushing layer f73c1c8fba85 +#21 pushing layer 96c05063c739 +#21 pushing layer 241fcae5008f +#21 pushing layer 61e0df330e38 +#21 pushing layer 1dfdd9260fd4 +#21 pushing layer 0ae7ca672022 +#21 pushing layer a8ff6f8cbdfd +#21 pushing layer 473bf974db40 512B / 42B 0.2s +#21 pushing layer bf48bca45dca 7.17kB / 4.72kB 0.3s +#21 pushing layer 473bf974db40 2.56kB / 42B 0.3s +#21 pushing layer f73c1c8fba85 1.65MB / 9.48MB 0.4s +#21 pushing layer 96c05063c739 11.78kB / 3.87kB 0.3s +#21 pushing layer f73c1c8fba85 5.40MB / 9.48MB 0.5s +#21 pushing layer 220bf4a7cb08 2.31MB / 38.07MB 0.5s +#21 pushing layer f73c1c8fba85 8.84MB / 9.48MB 0.6s +#21 pushing layer 220bf4a7cb08 8.64MB / 38.07MB 0.7s +#21 pushing layer f73c1c8fba85 9.49MB / 9.48MB 0.6s +#21 pushing layer 220bf4a7cb08 11.77MB / 38.07MB 0.8s +#21 pushing layer 220bf4a7cb08 14.52MB / 38.07MB 0.9s +#21 pushing layer 220bf4a7cb08 17.65MB / 38.07MB 1.0s +#21 pushing layer 220bf4a7cb08 20.40MB / 38.07MB 1.2s +#21 pushing layer 220bf4a7cb08 22.76MB / 38.07MB 1.3s +#21 pushing layer 96c05063c739 1.3s done +#21 pushing layer 220bf4a7cb08 26.30MB / 38.07MB 1.4s +#21 pushing layer 473bf974db40 1.3s done +#21 pushing layer bf48bca45dca 1.4s done +#21 pushing layer 220bf4a7cb08 33.37MB / 38.07MB 1.6s +#21 pushing layer 220bf4a7cb08 36.51MB / 38.07MB 1.7s +#21 pushing layer 220bf4a7cb08 38.92MB / 38.07MB 1.8s +#21 pushing layer f73c1c8fba85 2.1s done +#21 pushing layer 241fcae5008f 2.5s done +#21 pushing layer 220bf4a7cb08 3.1s done +#21 pushing layer 0ae7ca672022 5.9s done +#21 pushing layer 61e0df330e38 5.9s done +#21 pushing layer 1dfdd9260fd4 5.9s done +#21 pushing layer a8ff6f8cbdfd 5.9s done +#21 DONE 6.0s + +#22 pushing ***/devops-info-service:2026.02.09-1d708b5 with docker +#22 pushing layer a8ff6f8cbdfd 1.9s done +#22 pushing layer bf48bca45dca 1.9s done +#22 pushing layer 220bf4a7cb08 1.9s done +#22 pushing layer 473bf974db40 1.9s done +#22 pushing layer f73c1c8fba85 1.9s done +#22 pushing layer 96c05063c739 1.9s done +#22 pushing layer 241fcae5008f 1.9s done +#22 pushing layer 61e0df330e38 1.9s done +#22 pushing layer 1dfdd9260fd4 1.9s done +#22 pushing layer 0ae7ca672022 1.9s done +#22 DONE 1.9s +``` + +![docker hub](/app_python/docs/screenshots/dockerhub.png) + +## 3. Best Practices Implemented +### 3.1 Separation of concerns (jobs) + +- `test` job runs lint + unit tests +- `docker` job runs only after tests succeed (`needs: test`) +- Result: images are published only from verified code. + +### Matrix testing + +Tests run on Python 3.12 and 3.13, which increases confidence that the service works across supported versions. + +### 3.3 Dependency caching + +Dependency caching is implemented using `actions/setup-python` with pip cache enabled: + +```yaml +cache: pip +cache-dependency-path: + - app_python/requirements.txt + - app_python/requirements-dev.txt +``` + +Cache behavior was verified using two workflow runs: +- Without cache: dependencies were downloaded from PyPI +- With cache: pip reused locally cached wheels ("Using cached ...") + +In this project, dependency installation time remained around ~9 seconds in both cases because the dependency set is small and installs quickly. + +However, caching still prevents repeated downloads and becomes significantly more beneficial for larger projects with heavy dependencies. + +`pip` logs without cache: +```bash +Downloading httpx-0.28.1-py3-none-any.whl (73 kB) +Downloading httpcore-1.0.9-py3-none-any.whl (78 kB) +Downloading annotated_types-0.7.0-py3-none-any.whl (13 kB) +Downloading click-8.3.1-py3-none-any.whl (108 kB) +Downloading coverage-7.13.4-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl (254 kB) +Downloading h11-0.16.0-py3-none-any.whl (37 kB) +Downloading httptools-0.7.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl (517 kB) +Downloading idna-3.11-py3-none-any.whl (71 kB) +Downloading iniconfig-2.3.0-py3-none-any.whl (7.5 kB) +Downloading packaging-26.0-py3-none-any.whl (74 kB) +Downloading pygments-2.19.2-py3-none-any.whl (1.2 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.2/1.2 MB 89.3 MB/s 0:00:00 +Downloading python_dotenv-1.2.1-py3-none-any.whl (21 kB) +Downloading pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (807 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 807.9/807.9 kB 68.2 MB/s 0:00:00 +Downloading typing_extensions-4.15.0-py3-none-any.whl (44 kB) +Downloading typing_inspection-0.4.2-py3-none-any.whl (14 kB) +Downloading uvloop-0.22.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (4.4 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.4/4.4 MB 118.8 MB/s 0:00:00 +Downloading watchfiles-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (456 kB) +Downloading websockets-16.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl (184 kB) +Downloading certifi-2026.1.4-py3-none-any.whl (152 kB) +``` + +`pip` logs with cache: +```bash +Using cached fastapi-0.115.0-py3-none-any.whl (94 kB) +Using cached uvicorn-0.30.6-py3-none-any.whl (62 kB) +Using cached pydantic-2.12.5-py3-none-any.whl (463 kB) +Using cached pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB) +Using cached starlette-0.38.6-py3-none-any.whl (71 kB) +Using cached anyio-4.12.1-py3-none-any.whl (113 kB) +Using cached pytest-9.0.2-py3-none-any.whl (374 kB) +Using cached pluggy-1.6.0-py3-none-any.whl (20 kB) +Using cached pytest_cov-7.0.0-py3-none-any.whl (22 kB) +Using cached ruff-0.15.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.1 MB) +Using cached httpx-0.28.1-py3-none-any.whl (73 kB) +Using cached httpcore-1.0.9-py3-none-any.whl (78 kB) +Using cached annotated_types-0.7.0-py3-none-any.whl (13 kB) +Using cached click-8.3.1-py3-none-any.whl (108 kB) +Downloading coverage-7.13.4-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl (254 kB) +Using cached h11-0.16.0-py3-none-any.whl (37 kB) +Using cached httptools-0.7.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl (517 kB) +Using cached idna-3.11-py3-none-any.whl (71 kB) +Using cached iniconfig-2.3.0-py3-none-any.whl (7.5 kB) +Using cached packaging-26.0-py3-none-any.whl (74 kB) +Using cached pygments-2.19.2-py3-none-any.whl (1.2 MB) +Using cached python_dotenv-1.2.1-py3-none-any.whl (21 kB) +Using cached pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (807 kB) +Using cached typing_extensions-4.15.0-py3-none-any.whl (44 kB) +Using cached typing_inspection-0.4.2-py3-none-any.whl (14 kB) +Using cached uvloop-0.22.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (4.4 MB) +Using cached watchfiles-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (456 kB) +Using cached websockets-16.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl (184 kB) +Using cached certifi-2026.1.4-py3-none-any.whl (152 kB) +``` + +### 3.4 Path filtering + +Workflow runs only when app_python/** or the workflow file changes. +This avoids running CI for unrelated edits (docs/lectures/etc.). + +### 3.5 Concurrency control + +The workflow uses: + +```yaml +concurrency: + group: python-ci-${{ github.ref }} + cancel-in-progress: true +``` + +This prevents wasting minutes on outdated runs when multiple commits are pushed quickly. + +## 4. Security (Snyk) + +Snyk is executed using the Snyk CLI and scans the runtime dependency file: + +- Target file: `app_python/requirements.txt` +- Command: + +```bash +snyk test --file=app_python/requirements.txt --severity-threshold=high --skip-unresolved +``` + +**Result:** + +- Snyk findings: `0 high-severity vulnerabilities` +- Action taken: + - No vulnerabilities were found in the project. + +![snyk output](/app_python/docs/screenshots/snyk.png) + +## 5. Key Decisions + +- Ruff instead of flake8: faster and simpler configuration; good for CI. +- CalVer tags for Docker images: easy to see when an image was produced and link it to a commit (shortSHA). +- Tests validate JSON structure, not exact host values: values like `hostname` and platform differ between local machine and CI runner. + +## 6. Challenges & Fixes +### 6.1 Workflow not triggering + +**Cause:** incorrect path filter (`.yml` vs `.yaml`) prevented workflow changes from triggering runs. + +**Fix:** updated paths to match the real filename: `.github/workflows/python-ci.yaml`. + +### 6.2 Snyk action failing due to missing packages + +**Cause:** `snyk/actions/python@master` runs in a container and didn’t see installed packages. + +**Fix:** switched to Snyk CLI installed via `npm`, scanning `requirements.txt` directly. + +### 6.3 Dependency separation + +**Cause:** dev tools (pytest/ruff/httpx) were mixed into runtime requirements, increasing image size and scan noise. + +**Fix:** split dependencies into: +- `requirements.txt` (runtime) +- `requirements-dev.txt` (dev/test tools) \ No newline at end of file diff --git a/app_python/docs/screenshots/01-main-endpoint-browser.png b/app_python/docs/screenshots/01-main-endpoint-browser.png new file mode 100644 index 0000000000..97c8c63098 Binary files /dev/null and b/app_python/docs/screenshots/01-main-endpoint-browser.png differ diff --git a/app_python/docs/screenshots/01-main-endpoint.png b/app_python/docs/screenshots/01-main-endpoint.png new file mode 100644 index 0000000000..b294763257 Binary files /dev/null and b/app_python/docs/screenshots/01-main-endpoint.png differ diff --git a/app_python/docs/screenshots/02-health-check-browser.png b/app_python/docs/screenshots/02-health-check-browser.png new file mode 100644 index 0000000000..231d343e16 Binary files /dev/null and b/app_python/docs/screenshots/02-health-check-browser.png differ diff --git a/app_python/docs/screenshots/02-health-check.png b/app_python/docs/screenshots/02-health-check.png new file mode 100644 index 0000000000..0bb1796299 Binary files /dev/null and b/app_python/docs/screenshots/02-health-check.png differ diff --git a/app_python/docs/screenshots/03-formatted-output.png b/app_python/docs/screenshots/03-formatted-output.png new file mode 100644 index 0000000000..7b63a8a3bc Binary files /dev/null and b/app_python/docs/screenshots/03-formatted-output.png differ diff --git a/app_python/docs/screenshots/dockerhub.png b/app_python/docs/screenshots/dockerhub.png new file mode 100644 index 0000000000..8643388483 Binary files /dev/null and b/app_python/docs/screenshots/dockerhub.png differ diff --git a/app_python/docs/screenshots/image_curl.png b/app_python/docs/screenshots/image_curl.png new file mode 100644 index 0000000000..43ebc8aa99 Binary files /dev/null and b/app_python/docs/screenshots/image_curl.png differ diff --git a/app_python/docs/screenshots/image_logs.png b/app_python/docs/screenshots/image_logs.png new file mode 100644 index 0000000000..13fa5f73c0 Binary files /dev/null and b/app_python/docs/screenshots/image_logs.png differ diff --git a/app_python/docs/screenshots/image_ps.png b/app_python/docs/screenshots/image_ps.png new file mode 100644 index 0000000000..b829cef600 Binary files /dev/null and b/app_python/docs/screenshots/image_ps.png differ diff --git a/app_python/docs/screenshots/image_size.png b/app_python/docs/screenshots/image_size.png new file mode 100644 index 0000000000..dbd57747ee Binary files /dev/null and b/app_python/docs/screenshots/image_size.png differ diff --git a/app_python/docs/screenshots/lint+test.png b/app_python/docs/screenshots/lint+test.png new file mode 100644 index 0000000000..e443474de9 Binary files /dev/null and b/app_python/docs/screenshots/lint+test.png differ diff --git a/app_python/docs/screenshots/pytest_q.png b/app_python/docs/screenshots/pytest_q.png new file mode 100644 index 0000000000..d88dfc5d7e Binary files /dev/null and b/app_python/docs/screenshots/pytest_q.png differ diff --git a/app_python/docs/screenshots/pytest_q_cov.png b/app_python/docs/screenshots/pytest_q_cov.png new file mode 100644 index 0000000000..032ff8dc0a Binary files /dev/null and b/app_python/docs/screenshots/pytest_q_cov.png differ diff --git a/app_python/docs/screenshots/ruff_check.png b/app_python/docs/screenshots/ruff_check.png new file mode 100644 index 0000000000..802dab01ea Binary files /dev/null and b/app_python/docs/screenshots/ruff_check.png differ diff --git a/app_python/docs/screenshots/snyk.png b/app_python/docs/screenshots/snyk.png new file mode 100644 index 0000000000..0f2474d663 Binary files /dev/null and b/app_python/docs/screenshots/snyk.png differ diff --git a/app_python/requirements-dev.txt b/app_python/requirements-dev.txt new file mode 100644 index 0000000000..ff4670fdf0 --- /dev/null +++ b/app_python/requirements-dev.txt @@ -0,0 +1,4 @@ +pytest>=8.0.0 +pytest-cov>=5.0.0 +ruff>=0.6.0 +httpx>=0.27.0 \ No newline at end of file diff --git a/app_python/requirements.txt b/app_python/requirements.txt new file mode 100644 index 0000000000..b226e0ed45 --- /dev/null +++ b/app_python/requirements.txt @@ -0,0 +1,3 @@ +fastapi==0.115.0 +uvicorn[standard]==0.30.6 +python-json-logger>=4.0.0 diff --git a/app_python/tests/__init__.py b/app_python/tests/__init__.py new file mode 100644 index 0000000000..82c0a8f9c5 --- /dev/null +++ b/app_python/tests/__init__.py @@ -0,0 +1 @@ +# lab 3 task \ No newline at end of file diff --git a/app_python/tests/test_app.py b/app_python/tests/test_app.py new file mode 100644 index 0000000000..c839e8e0cc --- /dev/null +++ b/app_python/tests/test_app.py @@ -0,0 +1,52 @@ +from fastapi.testclient import TestClient + +from app import app + +client = TestClient(app) + + +def test_root_ok_structure(): + r = client.get("/", headers={"user-agent": "pytest"}) + assert r.status_code == 200 + data = r.json() + + # top-level keys + for key in ("service", "system", "runtime", "request", "endpoints"): + assert key in data + + # service section + assert "name" in data["service"] + assert "version" in data["service"] + assert "framework" in data["service"] + + # system section (values depend on runner, so only validate presence/types) + assert "hostname" in data["system"] + assert "cpu_count" in data["system"] + assert isinstance(data["system"]["cpu_count"], int) + + # runtime section + assert "uptime_seconds" in data["runtime"] + assert isinstance(data["runtime"]["uptime_seconds"], int) + assert "current_time" in data["runtime"] + assert data["runtime"]["timezone"] == "UTC" + + # request section + assert data["request"]["path"] == "/" + assert data["request"]["method"] == "GET" + + +def test_health_ok(): + r = client.get("/health") + assert r.status_code == 200 + data = r.json() + assert data["status"] == "healthy" + assert "timestamp" in data + assert "uptime_seconds" in data + assert isinstance(data["uptime_seconds"], int) + + +def test_404_returns_json_message(): + r = client.get("/no-such-endpoint") + assert r.status_code == 404 + data = r.json() + assert "message" in data diff --git a/docs/LAB04.md b/docs/LAB04.md new file mode 100644 index 0000000000..e780669f1c --- /dev/null +++ b/docs/LAB04.md @@ -0,0 +1,1087 @@ +# Lab 4 + +## Cloud Provider & Infrasrtucture + +**Cloud provider chosen:** Yandex Cloud +**Rationale:** free-tier, simple console + good Terraform provider support, suitable for quick VM+VPC lab. + +**Region/zone selected:** `ru-central1-a` + +**Instance type/size (smallest practical):** +- Platform: `standard-v1` +- vCPU: `2` +- RAM: `2 GB` +- Core fraction: `20%` +- Boot disk: `20 GB` (`network-hdd`) +Reason: minimal resources while still stable for SSH + future app/Ansible. + +**Total cost:** at the time of work the console balance showed `0 ₽` (lab run). + +**Resources created (Terraform + Pulumi, equivalent set):** +- VPC Network: `lab-vm-net` +- Subnet: `lab-vm-subnet` (`10.10.0.0/24`, zone `ru-central1-a`) +- Security Group: `lab-vm-sg` + - Ingress: SSH `22/tcp` from **my IP /32** + - Ingress: HTTP `80/tcp` from `0.0.0.0/0` + - Ingress: Custom `5000/tcp` from `0.0.0.0/0` + - Egress: `ANY` to `0.0.0.0/0` +- Static Public IPv4 address: `lab-vm-ip` +- Compute instance: `lab-vm` (NAT enabled, static public IP attached) + + +## Terraform Implementation + +**Terraform version used:** Terraform v1.14.x (HashiCorp package) +**Provider:** `yandex-cloud/yandex` (used version seen during init: `v0.186.0`) + +**Project structure (terraform/):** +- `versions.tf` (required providers) +- `main.tf` (network/subnet/sg/address/compute) +- `variables.tf` (cloud_id, folder_id, zone, sizes, paths, CIDR, labels) +- `outputs.tf` (public_ip, ssh_command) +- `terraform.tfvars` (values; no secrets committed) +- `.gitignore` (excludes state + key.json) + +**Key configuration decisions:** +- Authentication via Service Account JSON key (`key.json`) and variables `cloud_id`, `folder_id`, `zone`. +- Ubuntu 24.04 image via `data.yandex_compute_image` family `ubuntu-2404-lts`. +- Security Group implements required firewall rules 22/80/5000. +- Labels: `project=lab04` for identification. + +**Challenges encountered (Terraform):** +- Initially WSL could not download providers from Terraform Registry (`Invalid provider registry host`), so Terraform commands were executed from Ubuntu VM where registry access worked. +- `ssh_public_key_path` issue: using wrong path on the VM (`/home/zagur/...`), fixed by pointing to the actual key on the VM (`/home/liza/...`). +- Service Account key issues: `key.json` must be valid JSON and placed in the terraform directory (and must not be committed). + +**Terminal output (key commands):** + +- `terraform init` + + ```bash + liza@liza-VirtualBox:/media/sf_shared_with_VB/DevOps/DevOps-Core-Course/terraform$ terraform init + Initializing the backend... + Initializing provider plugins... + - Reusing previous version of yandex-cloud/yandex from the dependency lock file + - Using previously-installed yandex-cloud/yandex v0.186.0 + + Terraform has been successfully initialized! + + You may now begin working with Terraform. Try running "terraform plan" to see + any changes that are required for your infrastructure. All Terraform commands + should now work. + + If you ever set or change modules or backend configuration for Terraform, + rerun this command to reinitialize your working directory. If you forget, other + commands will detect it and remind you to do so if necessary. + ``` +- `terraform plan` + + ```bash + liza@liza-VirtualBox:/media/sf_shared_with_VB/DevOps/DevOps-Core-Course/terraform$ terraform plan + data.yandex_compute_image.ubuntu: Reading... + data.yandex_compute_image.ubuntu: Read complete after 1s [id=fd8p685sjqdraf7mpkuc] + + Terraform used the selected providers to generate the following execution plan. + Resource actions are indicated with the following symbols: + + create + + Terraform will perform the following actions: + + # yandex_compute_instance.vm will be created + + resource "yandex_compute_instance" "vm" { + + created_at = (known after apply) + + folder_id = (known after apply) + + fqdn = (known after apply) + + gpu_cluster_id = (known after apply) + + hardware_generation = (known after apply) + + hostname = (known after apply) + + id = (known after apply) + + labels = { + + "project" = "lab04" + } + + maintenance_grace_period = (known after apply) + + maintenance_policy = (known after apply) + + metadata = { + + "ssh-keys" = <<-EOT + ubuntu:ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL/Nz/lUWA58Ungm2qW9p4o+IHT5W+R0aQ3wWUIkr34j liza@liza-VirtualBox + EOT + } + + name = "lab-vm" + + network_acceleration_type = "standard" + + platform_id = "standard-v1" + + status = (known after apply) + + zone = (known after apply) + + + boot_disk { + + auto_delete = true + + device_name = (known after apply) + + disk_id = (known after apply) + + mode = (known after apply) + + + initialize_params { + + block_size = (known after apply) + + description = (known after apply) + + image_id = "fd8p685sjqdraf7mpkuc" + + name = (known after apply) + + size = 20 + + snapshot_id = (known after apply) + + type = "network-hdd" + } + } + + + metadata_options (known after apply) + + + network_interface { + + index = (known after apply) + + ip_address = (known after apply) + + ipv4 = true + + ipv6 = (known after apply) + + ipv6_address = (known after apply) + + mac_address = (known after apply) + + nat = true + + nat_ip_address = (known after apply) + + nat_ip_version = (known after apply) + + security_group_ids = (known after apply) + + subnet_id = (known after apply) + } + + + placement_policy (known after apply) + + + resources { + + core_fraction = 20 + + cores = 2 + + memory = 2 + } + + + scheduling_policy (known after apply) + } + + # yandex_vpc_address.public_ip will be created + + resource "yandex_vpc_address" "public_ip" { + + created_at = (known after apply) + + deletion_protection = (known after apply) + + folder_id = (known after apply) + + id = (known after apply) + + labels = (known after apply) + + name = "lab-vm-ip" + + reserved = (known after apply) + + used = (known after apply) + + + external_ipv4_address { + + address = (known after apply) + + ddos_protection_provider = (known after apply) + + outgoing_smtp_capability = (known after apply) + + zone_id = "ru-central1-a" + } + } + + # yandex_vpc_network.net will be created + + resource "yandex_vpc_network" "net" { + + created_at = (known after apply) + + default_security_group_id = (known after apply) + + folder_id = (known after apply) + + id = (known after apply) + + labels = { + + "project" = "lab04" + } + + name = "lab-vm-net" + + subnet_ids = (known after apply) + } + + # yandex_vpc_security_group.sg will be created + + resource "yandex_vpc_security_group" "sg" { + + created_at = (known after apply) + + folder_id = (known after apply) + + id = (known after apply) + + labels = { + + "project" = "lab04" + } + + name = "lab-vm-sg" + + network_id = (known after apply) + + status = (known after apply) + + + egress { + + description = "Allow all egress" + + from_port = -1 + + id = (known after apply) + + labels = (known after apply) + + port = -1 + + protocol = "ANY" + + to_port = -1 + + v4_cidr_blocks = [ + + "0.0.0.0/0", + ] + + v6_cidr_blocks = [] + # (2 unchanged attributes hidden) + } + + + ingress { + + description = "App port 5000" + + from_port = -1 + + id = (known after apply) + + labels = (known after apply) + + port = 5000 + + protocol = "TCP" + + to_port = -1 + + v4_cidr_blocks = [ + + "0.0.0.0/0", + ] + + v6_cidr_blocks = [] + # (2 unchanged attributes hidden) + } + + ingress { + + description = "HTTP" + + from_port = -1 + + id = (known after apply) + + labels = (known after apply) + + port = 80 + + protocol = "TCP" + + to_port = -1 + + v4_cidr_blocks = [ + + "0.0.0.0/0", + ] + + v6_cidr_blocks = [] + # (2 unchanged attributes hidden) + } + + ingress { + + description = "SSH from my IP" + + from_port = -1 + + id = (known after apply) + + labels = (known after apply) + + port = 22 + + protocol = "TCP" + + to_port = -1 + + v4_cidr_blocks = [ + + "188.130.155.177/32", + ] + + v6_cidr_blocks = [] + # (2 unchanged attributes hidden) + } + } + + # yandex_vpc_subnet.subnet will be created + + resource "yandex_vpc_subnet" "subnet" { + + created_at = (known after apply) + + folder_id = (known after apply) + + id = (known after apply) + + labels = { + + "project" = "lab04" + } + + name = "lab-vm-subnet" + + network_id = (known after apply) + + v4_cidr_blocks = [ + + "10.10.0.0/24", + ] + + v6_cidr_blocks = (known after apply) + + zone = "ru-central1-a" + } + + Plan: 5 to add, 0 to change, 0 to destroy. + + Changes to Outputs: + + public_ip = (known after apply) + + ssh_command = (known after apply) + + ────────────────────────────────────────────────────────────────────────────────────── + + Note: You didn't use the -out option to save this plan, so Terraform can't guarantee + to take exactly these actions if you run "terraform apply" now. + ``` +- `terraform apply` + + ```bash + liza@liza-VirtualBox:/media/sf_shared_with_VB/DevOps/DevOps-Core-Course/terraform$ terraform apply + data.yandex_compute_image.ubuntu: Reading... + data.yandex_compute_image.ubuntu: Read complete after 1s [id=fd8p685sjqdraf7mpkuc] + + Terraform used the selected providers to generate the following execution plan. + Resource actions are indicated with the following symbols: + + create + + Terraform will perform the following actions: + + # yandex_compute_instance.vm will be created + + resource "yandex_compute_instance" "vm" { + + created_at = (known after apply) + + folder_id = (known after apply) + + fqdn = (known after apply) + + gpu_cluster_id = (known after apply) + + hardware_generation = (known after apply) + + hostname = (known after apply) + + id = (known after apply) + + labels = { + + "project" = "lab04" + } + + maintenance_grace_period = (known after apply) + + maintenance_policy = (known after apply) + + metadata = { + + "ssh-keys" = <<-EOT + ubuntu:ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL/Nz/lUWA58Ungm2qW9p4o+IHT5W+R0aQ3wWUIkr34j liza@liza-VirtualBox + EOT + } + + name = "lab-vm" + + network_acceleration_type = "standard" + + platform_id = "standard-v1" + + status = (known after apply) + + zone = (known after apply) + + + boot_disk { + + auto_delete = true + + device_name = (known after apply) + + disk_id = (known after apply) + + mode = (known after apply) + + + initialize_params { + + block_size = (known after apply) + + description = (known after apply) + + image_id = "fd8p685sjqdraf7mpkuc" + + name = (known after apply) + + size = 20 + + snapshot_id = (known after apply) + + type = "network-hdd" + } + } + + + metadata_options (known after apply) + + + network_interface { + + index = (known after apply) + + ip_address = (known after apply) + + ipv4 = true + + ipv6 = (known after apply) + + ipv6_address = (known after apply) + + mac_address = (known after apply) + + nat = true + + nat_ip_address = (known after apply) + + nat_ip_version = (known after apply) + + security_group_ids = (known after apply) + + subnet_id = (known after apply) + } + + + placement_policy (known after apply) + + + resources { + + core_fraction = 20 + + cores = 2 + + memory = 2 + } + + + scheduling_policy (known after apply) + } + + # yandex_vpc_address.public_ip will be created + + resource "yandex_vpc_address" "public_ip" { + + created_at = (known after apply) + + deletion_protection = (known after apply) + + folder_id = (known after apply) + + id = (known after apply) + + labels = (known after apply) + + name = "lab-vm-ip" + + reserved = (known after apply) + + used = (known after apply) + + + external_ipv4_address { + + address = (known after apply) + + ddos_protection_provider = (known after apply) + + outgoing_smtp_capability = (known after apply) + + zone_id = "ru-central1-a" + } + } + + # yandex_vpc_network.net will be created + + resource "yandex_vpc_network" "net" { + + created_at = (known after apply) + + default_security_group_id = (known after apply) + + folder_id = (known after apply) + + id = (known after apply) + + labels = { + + "project" = "lab04" + } + + name = "lab-vm-net" + + subnet_ids = (known after apply) + } + + # yandex_vpc_security_group.sg will be created + + resource "yandex_vpc_security_group" "sg" { + + created_at = (known after apply) + + folder_id = (known after apply) + + id = (known after apply) + + labels = { + + "project" = "lab04" + } + + name = "lab-vm-sg" + + network_id = (known after apply) + + status = (known after apply) + + + egress { + + description = "Allow all egress" + + from_port = -1 + + id = (known after apply) + + labels = (known after apply) + + port = -1 + + protocol = "ANY" + + to_port = -1 + + v4_cidr_blocks = [ + + "0.0.0.0/0", + ] + + v6_cidr_blocks = [] + # (2 unchanged attributes hidden) + } + + + ingress { + + description = "App port 5000" + + from_port = -1 + + id = (known after apply) + + labels = (known after apply) + + port = 5000 + + protocol = "TCP" + + to_port = -1 + + v4_cidr_blocks = [ + + "0.0.0.0/0", + ] + + v6_cidr_blocks = [] + # (2 unchanged attributes hidden) + } + + ingress { + + description = "HTTP" + + from_port = -1 + + id = (known after apply) + + labels = (known after apply) + + port = 80 + + protocol = "TCP" + + to_port = -1 + + v4_cidr_blocks = [ + + "0.0.0.0/0", + ] + + v6_cidr_blocks = [] + # (2 unchanged attributes hidden) + } + + ingress { + + description = "SSH from my IP" + + from_port = -1 + + id = (known after apply) + + labels = (known after apply) + + port = 22 + + protocol = "TCP" + + to_port = -1 + + v4_cidr_blocks = [ + + "188.130.155.177/32", + ] + + v6_cidr_blocks = [] + # (2 unchanged attributes hidden) + } + } + + # yandex_vpc_subnet.subnet will be created + + resource "yandex_vpc_subnet" "subnet" { + + created_at = (known after apply) + + folder_id = (known after apply) + + id = (known after apply) + + labels = { + + "project" = "lab04" + } + + name = "lab-vm-subnet" + + network_id = (known after apply) + + v4_cidr_blocks = [ + + "10.10.0.0/24", + ] + + v6_cidr_blocks = (known after apply) + + zone = "ru-central1-a" + } + + Plan: 5 to add, 0 to change, 0 to destroy. + + Changes to Outputs: + + public_ip = (known after apply) + + ssh_command = (known after apply) + + Do you want to perform these actions? + Terraform will perform the actions described above. + Only 'yes' will be accepted to approve. + + Enter a value: yes + + yandex_vpc_address.public_ip: Creating... + yandex_vpc_network.net: Creating... + yandex_vpc_address.public_ip: Creation complete after 2s [id=e9bpa7ofbfbj4i4jg067] + yandex_vpc_network.net: Creation complete after 3s [id=enpqagvtk5g4ne5lnlv5] + yandex_vpc_subnet.subnet: Creating... + yandex_vpc_security_group.sg: Creating... + yandex_vpc_subnet.subnet: Creation complete after 1s [id=e9bvvrbqalt0rvkrombc] + yandex_vpc_security_group.sg: Creation complete after 3s [id=enpb3nevkhedg2m9mess] + yandex_compute_instance.vm: Creating... + yandex_compute_instance.vm: Still creating... [00m10s elapsed] + yandex_compute_instance.vm: Still creating... [00m20s elapsed] + yandex_compute_instance.vm: Still creating... [00m30s elapsed] + yandex_compute_instance.vm: Still creating... [00m40s elapsed] + yandex_compute_instance.vm: Creation complete after 43s [id=fhmp143am01jk4mp8l5t] + + Apply complete! Resources: 5 added, 0 changed, 0 destroyed. + + Outputs: + + public_ip = "93.77.187.114" + ssh_command = "ssh -i ~/.ssh/id_ed25519 ubuntu@93.77.187.114" + ``` +- `SSH connection to VM` + + ```bash + liza@liza-VirtualBox:/media/sf_shared_with_VB/DevOps/DevOps-Core-Course/terraform$ ssh -i ~/.ssh/id_ed25519 ubuntu@93.77.187.114 + The authenticity of host '93.77.187.114 (93.77.187.114)' can't be established. + ED25519 key fingerprint is SHA256:O4bQHhkR0EvL+sATJS3LhfXhGzPdlfoKHfg6ItBFccA. + This key is not known by any other names. + Are you sure you want to continue connecting (yes/no/[fingerprint])? yes + Warning: Permanently added '93.77.187.114' (ED25519) to the list of known hosts. + Welcome to Ubuntu 24.04.3 LTS (GNU/Linux 6.8.0-100-generic x86_64) + + * Documentation: https://help.ubuntu.com + * Management: https://landscape.canonical.com + * Support: https://ubuntu.com/pro + + System information as of Sun Feb 15 18:34:08 UTC 2026 + + System load: 0.16 Processes: 100 + Usage of /: 11.2% of 18.72GB Users logged in: 0 + Memory usage: 9% IPv4 address for eth0: 10.10.0.9 + Swap usage: 0% + + + Expanded Security Maintenance for Applications is not enabled. + + 0 updates can be applied immediately. + + Enable ESM Apps to receive additional future security updates. + See https://ubuntu.com/esm or run: sudo pro status + + + The list of available updates is more than a week old. + To check for new updates run: sudo apt update + + + The programs included with the Ubuntu system are free software; + the exact distribution terms for each program are described in the + individual files in /usr/share/doc/*/copyright. + + Ubuntu comes with ABSOLUTELY NO WARRANTY, to the extent permitted by + applicable law. + + To run a command as administrator (user "root"), use "sudo ". + See "man sudo_root" for details. + + ubuntu@fhmp143am01jk4mp8l5t:~$ sudo ss -tulpn | grep -E '(:22|:80|:5000)\s' + tcp LISTEN 0 4096 0.0.0.0:22 0.0.0.0:* users:(("sshd",pid=1007,fd=3),("systemd",pid=1,fd=60)) + tcp LISTEN 0 4096 [::]:22 [::]:* users:(("sshd",pid=1007,fd=4),("systemd",pid=1,fd=61)) + ubuntu@fhmp143am01jk4mp8l5t:~$ + ``` + +## Pulumi Implementation + +**Pulumi version and language:** Pulumi CLI (Python project), language = Python. +**Provider/library:** `pulumi-yandex` + +**How code differs from Terraform:** +- Terraform: declarative HCL resources. +- Pulumi: imperative Python program (variables, read SSH key via Python, export outputs). + +**Challenges encountered (Pulumi):** +- VirtualBox shared folder restrictions prevented creating Python venv in `/media/sf_*` (symlink permissions). Solution: run Pulumi project from `~/pulumi-yc` (home dir). +- `pulumi_yandex` imports `pkg_resources`; setuptools 82 removed it → pinned setuptools to a version that still provides `pkg_resources`. +- Wrong config key initially (`serviceAccountKeyFilePath`), fixed to `yandex:serviceAccountKeyFile`. +- `~` was not expanded inside Python `open()` for SSH public key path → fixed to absolute path `/home/liza/.ssh/id_ed25519.pub`. +- Pulumi secrets passphrase required for `pulumi stack output ...` → solved via `PULUMI_CONFIG_PASSPHRASE_FILE`. + +**Terminal output from:** + +- pulumi preview + + ```bash + (venv) liza@liza-VirtualBox:~/pulumi-yc$ pulumi preview + Enter your passphrase to unlock config/secrets + (set PULUMI_CONFIG_PASSPHRASE or PULUMI_CONFIG_PASSPHRASE_FILE to remember): + Enter your passphrase to unlock config/secrets + Previewing update (dev): + Type Name Plan Info + + pulumi:pulumi:Stack pulumi-yc-dev create 1 error; 2 messages + + ├─ yandex:index:VpcAddress lab-vm-ip create + + ├─ yandex:index:VpcNetwork lab-vm-net create + + ├─ yandex:index:VpcSubnet lab-vm-subnet create + + └─ yandex:index:VpcSecurityGroup lab-vm-sg create + + Diagnostics: + pulumi:pulumi:Stack (pulumi-yc-dev): + /home/liza/pulumi-yc/venv/lib/python3.12/site-packages/pulumi_yandex/_utilities.py:10: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. + import pkg_resources + + error: Program failed with an unhandled exception: + Traceback (most recent call last): + File "/home/liza/pulumi-yc/__main__.py", line 87, in + with open(ssh_public_key_path, "r", encoding="utf-8") as f: + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + FileNotFoundError: [Errno 2] No such file or directory: '~/.ssh/id_ed25519.pub' + + Resources: + + 5 to create + 1 errored + + (venv) liza@liza-VirtualBox:~/pulumi-yc$ pulumi config set sshPublicKeyPath "home/liza/.ssh/id_ed25519.pub" + (venv) liza@liza-VirtualBox:~/pulumi-yc$ pulumi preview + Enter your passphrase to unlock config/secrets + (set PULUMI_CONFIG_PASSPHRASE or PULUMI_CONFIG_PASSPHRASE_FILE to remember): + Enter your passphrase to unlock config/secrets + Previewing update (dev): + Type Name Plan Info + + pulumi:pulumi:Stack pulumi-yc-dev create 1 error; 2 messages + + ├─ yandex:index:VpcNetwork lab-vm-net create + + ├─ yandex:index:VpcAddress lab-vm-ip create + + ├─ yandex:index:VpcSecurityGroup lab-vm-sg create + + └─ yandex:index:VpcSubnet lab-vm-subnet create + + Diagnostics: + pulumi:pulumi:Stack (pulumi-yc-dev): + /home/liza/pulumi-yc/venv/lib/python3.12/site-packages/pulumi_yandex/_utilities.py:10: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. + import pkg_resources + + error: Program failed with an unhandled exception: + Traceback (most recent call last): + File "/home/liza/pulumi-yc/__main__.py", line 87, in + with open(ssh_public_key_path, "r", encoding="utf-8") as f: + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + FileNotFoundError: [Errno 2] No such file or directory: 'home/liza/.ssh/id_ed25519.pub' + + Resources: + + 5 to create + 1 errored + + (venv) liza@liza-VirtualBox:~/pulumi-yc$ pulumi config set sshPublicKeyPath "/home/liza/.ssh/id_ed25519.pub" + (venv) liza@liza-VirtualBox:~/pulumi-yc$ pulumi preview + Enter your passphrase to unlock config/secrets + (set PULUMI_CONFIG_PASSPHRASE or PULUMI_CONFIG_PASSPHRASE_FILE to remember): + Enter your passphrase to unlock config/secrets + Previewing update (dev): + Type Name Plan Info + + pulumi:pulumi:Stack pulumi-yc-dev create 2 messages + + ├─ yandex:index:VpcNetwork lab-vm-net create + + ├─ yandex:index:VpcAddress lab-vm-ip create + + ├─ yandex:index:VpcSecurityGroup lab-vm-sg create + + ├─ yandex:index:VpcSubnet lab-vm-subnet create + + └─ yandex:index:ComputeInstance lab-vm create + + Diagnostics: + pulumi:pulumi:Stack (pulumi-yc-dev): + /home/liza/pulumi-yc/venv/lib/python3.12/site-packages/pulumi_yandex/_utilities.py:10: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. + import pkg_resources + + Outputs: + public_ip : [unknown] + ssh_command: [unknown] + + Resources: + + 6 to create + ``` +- pulumi up + + ```bash + (venv) liza@liza-VirtualBox:~/pulumi-yc$ pulumi up + Enter your passphrase to unlock config/secrets + (set PULUMI_CONFIG_PASSPHRASE or PULUMI_CONFIG_PASSPHRASE_FILE to remember): + Enter your passphrase to unlock config/secrets + Previewing update (dev): + Type Name Plan Info + + pulumi:pulumi:Stack pulumi-yc-dev create 2 messages + + ├─ yandex:index:VpcAddress lab-vm-ip create + + ├─ yandex:index:VpcNetwork lab-vm-net create + + ├─ yandex:index:VpcSubnet lab-vm-subnet create + + ├─ yandex:index:VpcSecurityGroup lab-vm-sg create + + └─ yandex:index:ComputeInstance lab-vm create + + Diagnostics: + pulumi:pulumi:Stack (pulumi-yc-dev): + /home/liza/pulumi-yc/venv/lib/python3.12/site-packages/pulumi_yandex/_utilities.py:10: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. + import pkg_resources + + Outputs: + public_ip : [unknown] + ssh_command: [unknown] + + Resources: + + 6 to create + + Do you want to perform this update? yes + Updating (dev): + Type Name Status Inf + + pulumi:pulumi:Stack pulumi-yc-dev **creating failed (6s)** 1 e + + ├─ yandex:index:VpcAddress lab-vm-ip created (1s) + + ├─ yandex:index:VpcNetwork lab-vm-net created (3s) + + ├─ yandex:index:VpcSecurityGroup lab-vm-sg **creating failed** 1 e + + └─ yandex:index:VpcSubnet lab-vm-subnet created (0.69s) + + Diagnostics: + pulumi:pulumi:Stack (pulumi-yc-dev): + /home/liza/pulumi-yc/venv/lib/python3.12/site-packages/pulumi_yandex/_utilities.py:10: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. + import pkg_resources + + error: update failed + + yandex:index:VpcSecurityGroup (lab-vm-sg): + error: 1 error occurred: + * error while requesting API to create security group: client-request-id = a8cd88af-ef76-426a-b251-256186350269 client-trace-id = 90223eb4-77e7-4081-b256-ab7658e6b66f rpc error: code = InvalidArgument desc = Illegal argument Cannot parse CIDR: 103.112.171.163 + /32 + + Outputs: + public_ip : "89.169.128.133" + ssh_command: "ssh -i ~/.ssh/id_ed25519 ubuntu@89.169.128.133" + + Resources: + + 4 created + 2 errored + + Duration: 8s + + (venv) liza@liza-VirtualBox:~/pulumi-yc$ pulumi config get sshAllowCidr + 103.112.171.163 + /32 + + (venv) liza@liza-VirtualBox:~/pulumi-yc$ pulumi config set sshAllowCidr "103.112.171.163/32" + (venv) liza@liza-VirtualBox:~/pulumi-yc$ pulumi up + Enter your passphrase to unlock config/secrets + (set PULUMI_CONFIG_PASSPHRASE or PULUMI_CONFIG_PASSPHRASE_FILE to remember): + Enter your passphrase to unlock config/secrets + Previewing update (dev): + Type Name Plan Info + pulumi:pulumi:Stack pulumi-yc-dev 2 messages + + ├─ yandex:index:VpcSecurityGroup lab-vm-sg create + + └─ yandex:index:ComputeInstance lab-vm create + + Diagnostics: + pulumi:pulumi:Stack (pulumi-yc-dev): + /home/liza/pulumi-yc/venv/lib/python3.12/site-packages/pulumi_yandex/_utilities.py:10: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. + import pkg_resources + + Resources: + + 2 to create + 4 unchanged + + Do you want to perform this update? yes + Updating (dev): + Type Name Status Info + pulumi:pulumi:Stack pulumi-yc-dev 2 messages + + ├─ yandex:index:VpcSecurityGroup lab-vm-sg created (2s) + + └─ yandex:index:ComputeInstance lab-vm created (46s) + + Diagnostics: + pulumi:pulumi:Stack (pulumi-yc-dev): + /home/liza/pulumi-yc/venv/lib/python3.12/site-packages/pulumi_yandex/_utilities.py:10: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. + import pkg_resources + + Outputs: + public_ip : "89.169.128.133" + ssh_command: "ssh -i ~/.ssh/id_ed25519 ubuntu@89.169.128.133" + + Resources: + + 2 created + 4 unchanged + + Duration: 54s + ``` +- SSH connection to VM + + ```bash + (venv) liza@liza-VirtualBox:~/pulumi-yc$ pulumi stack output public_ip + 89.169.128.133 + (venv) liza@liza-VirtualBox:~/pulumi-yc$ ssh -i ~/.ssh/id_ed25519 ubuntu@$(pulumi stack output public_ip) "uname -a" + The authenticity of host '89.169.128.133 (89.169.128.133)' can't be established. + ED25519 key fingerprint is SHA256:sAzx95etFkjGRS3naJOJ6JT47tTfPFZoe7ZLtrtuwd0. + This key is not known by any other names. + Are you sure you want to continue connecting (yes/no/[fingerprint])? yes + Warning: Permanently added '89.169.128.133' (ED25519) to the list of known hosts. + Linux fhm6qjabrghet3gtdam5 6.8.0-100-generic #100-Ubuntu SMP PREEMPT_DYNAMIC Tue Jan 13 16:40:06 UTC 2026 x86_64 x86_64 x86_64 GNU/Linux + (venv) liza@liza-VirtualBox:~/pulumi-yc$ pulumi stack output ssh_command + ssh -i ~/.ssh/id_ed25519 ubuntu@89.169.128.133 + ``` + +## Terraform vs Pulumi Comparison + +**Ease of Learning:** +Terraform was easier to start: HCL is simple for typical infra and plan/apply is very clear. Pulumi required extra Python environment setup (venv, deps) and config secrets management. + +**Code Readability:** +Terraform is very readable for standard infra blocks. Pulumi becomes more readable when logic/abstractions are needed (reusing code, loops, conditionals), but adds Python complexity. + +**Debugging:** +Terraform debugging is straightforward via terraform plan diffs and provider errors. Pulumi debugging was harder due to Python runtime errors (file paths, missing modules) and provider configuration keys. + +**Documentation:** +Terraform + provider examples were easier to follow for the exact resources. Pulumi docs are good, but provider configuration key naming is less obvious and errors can be less direct. + +**Use Case:** +Use Terraform for standard infrastructure provisioning and predictable declarative changes. Use Pulumi when infrastructure needs real programming constructs, reusable components, or complex logic in code. + +## Lab 5 Preparation & Cleanup + +Are you keeping your VM for Lab 5? (Yes/No): No +If no: What will you use for Lab 5? (Local VM/Will recreate cloud VM): local VM + +Cleanup Status: + +If destroying everything: Terminal output showing both tools' resources destroyed +Cloud console screenshot showing resource status (optional but recommended): already deleted, as well as paying account. + +```bash +liza@liza-VirtualBox:/media/sf_shared_with_VB/DevOps/DevOps-Core-Course/terraform$ terraform destroy +data.yandex_compute_image.ubuntu: Reading... +yandex_vpc_address.public_ip: Refreshing state... [id=e9bpa7ofbfbj4i4jg067] +yandex_vpc_network.net: Refreshing state... [id=enpqagvtk5g4ne5lnlv5] +data.yandex_compute_image.ubuntu: Read complete after 1s [id=fd8p685sjqdraf7mpkuc] +yandex_vpc_subnet.subnet: Refreshing state... [id=e9bvvrbqalt0rvkrombc] +yandex_vpc_security_group.sg: Refreshing state... [id=enpb3nevkhedg2m9mess] +yandex_compute_instance.vm: Refreshing state... [id=fhmp143am01jk4mp8l5t] + +Terraform used the selected providers to generate the following execution plan. +Resource actions are indicated with the following symbols: + - destroy + +Terraform will perform the following actions: + + # yandex_compute_instance.vm will be destroyed + - resource "yandex_compute_instance" "vm" { + - created_at = "2026-02-15T18:32:06Z" -> null + - folder_id = "b1grll246n43md1tgbl4" -> null + - fqdn = "fhmp143am01jk4mp8l5t.auto.internal" -> null + - hardware_generation = [ + - { + - generation2_features = [] + - legacy_features = [ + - { + - pci_topology = "PCI_TOPOLOGY_V2" + }, + ] + }, + ] -> null + - id = "fhmp143am01jk4mp8l5t" -> null + - labels = { + - "project" = "lab04" + } -> null + - metadata = { + - "ssh-keys" = <<-EOT + ubuntu:ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL/Nz/lUWA58Ungm2qW9p4o+IHT5W+R0aQ3wWUIkr34j liza@liza-VirtualBox + EOT + } -> null + - name = "lab-vm" -> null + - network_acceleration_type = "standard" -> null + - platform_id = "standard-v1" -> null + - status = "running" -> null + - zone = "ru-central1-a" -> null + # (5 unchanged attributes hidden) + + - boot_disk { + - auto_delete = true -> null + - device_name = "fhma95uovclrvhk04rr5" -> null + - disk_id = "fhma95uovclrvhk04rr5" -> null + - mode = "READ_WRITE" -> null + + - initialize_params { + - block_size = 4096 -> null + - image_id = "fd8p685sjqdraf7mpkuc" -> null + name = null + - size = 20 -> null + - type = "network-hdd" -> null + # (3 unchanged attributes hidden) + } + } + + - metadata_options { + - aws_v1_http_endpoint = 1 -> null + - aws_v1_http_token = 2 -> null + - gce_http_endpoint = 1 -> null + - gce_http_token = 1 -> null + } + + - network_interface { + - index = 0 -> null + - ip_address = "10.10.0.9" -> null + - ipv4 = true -> null + - ipv6 = false -> null + - mac_address = "d0:0d:19:09:06:ab" -> null + - nat = true -> null + - nat_ip_address = "93.77.187.114" -> null + - nat_ip_version = "IPV4" -> null + - security_group_ids = [ + - "enpb3nevkhedg2m9mess", + ] -> null + - subnet_id = "e9bvvrbqalt0rvkrombc" -> null + # (1 unchanged attribute hidden) + } + + - placement_policy { + - host_affinity_rules = [] -> null + - placement_group_partition = 0 -> null + # (1 unchanged attribute hidden) + } + + - resources { + - core_fraction = 20 -> null + - cores = 2 -> null + - gpus = 0 -> null + - memory = 2 -> null + } + + - scheduling_policy { + - preemptible = false -> null + } + } + + # yandex_vpc_address.public_ip will be destroyed + - resource "yandex_vpc_address" "public_ip" { + - created_at = "2026-02-15T18:32:00Z" -> null + - deletion_protection = false -> null + - folder_id = "b1grll246n43md1tgbl4" -> null + - id = "e9bpa7ofbfbj4i4jg067" -> null + - labels = {} -> null + - name = "lab-vm-ip" -> null + - reserved = true -> null + - used = true -> null + # (1 unchanged attribute hidden) + + - external_ipv4_address { + - address = "93.77.187.114" -> null + - zone_id = "ru-central1-a" -> null + # (2 unchanged attributes hidden) + } + } + + # yandex_vpc_network.net will be destroyed + - resource "yandex_vpc_network" "net" { + - created_at = "2026-02-15T18:31:59Z" -> null + - default_security_group_id = "enpue623uci0thhopdls" -> null + - folder_id = "b1grll246n43md1tgbl4" -> null + - id = "enpqagvtk5g4ne5lnlv5" -> null + - labels = { + - "project" = "lab04" + } -> null + - name = "lab-vm-net" -> null + - subnet_ids = [ + - "e9bvvrbqalt0rvkrombc", + ] -> null + # (1 unchanged attribute hidden) + } + + # yandex_vpc_security_group.sg will be destroyed + - resource "yandex_vpc_security_group" "sg" { + - created_at = "2026-02-15T18:32:04Z" -> null + - folder_id = "b1grll246n43md1tgbl4" -> null + - id = "enpb3nevkhedg2m9mess" -> null + - labels = { + - "project" = "lab04" + } -> null + - name = "lab-vm-sg" -> null + - network_id = "enpqagvtk5g4ne5lnlv5" -> null + - status = "ACTIVE" -> null + # (1 unchanged attribute hidden) + + - egress { + - description = "Allow all egress" -> null + - from_port = -1 -> null + - id = "enp6av08k6d1ca6fnkeg" -> null + - labels = {} -> null + - port = -1 -> null + - protocol = "ANY" -> null + - to_port = -1 -> null + - v4_cidr_blocks = [ + - "0.0.0.0/0", + ] -> null + - v6_cidr_blocks = [] -> null + # (2 unchanged attributes hidden) + } + + - ingress { + - description = "App port 5000" -> null + - from_port = -1 -> null + - id = "enpg2i04845lsbdjtp0c" -> null + - labels = {} -> null + - port = 5000 -> null + - protocol = "TCP" -> null + - to_port = -1 -> null + - v4_cidr_blocks = [ + - "0.0.0.0/0", + ] -> null + - v6_cidr_blocks = [] -> null + # (2 unchanged attributes hidden) + } + - ingress { + - description = "HTTP" -> null + - from_port = -1 -> null + - id = "enprf7fiofoobj3u2in8" -> null + - labels = {} -> null + - port = 80 -> null + - protocol = "TCP" -> null + - to_port = -1 -> null + - v4_cidr_blocks = [ + - "0.0.0.0/0", + ] -> null + - v6_cidr_blocks = [] -> null + # (2 unchanged attributes hidden) + } + - ingress { + - description = "SSH from my IP" -> null + - from_port = -1 -> null + - id = "enplvdni38239u0hd9c4" -> null + - labels = {} -> null + - port = 22 -> null + - protocol = "TCP" -> null + - to_port = -1 -> null + - v4_cidr_blocks = [ + - "188.130.155.177/32", + ] -> null + - v6_cidr_blocks = [] -> null + # (2 unchanged attributes hidden) + } + } + + # yandex_vpc_subnet.subnet will be destroyed + - resource "yandex_vpc_subnet" "subnet" { + - created_at = "2026-02-15T18:32:02Z" -> null + - folder_id = "b1grll246n43md1tgbl4" -> null + - id = "e9bvvrbqalt0rvkrombc" -> null + - labels = { + - "project" = "lab04" + } -> null + - name = "lab-vm-subnet" -> null + - network_id = "enpqagvtk5g4ne5lnlv5" -> null + - v4_cidr_blocks = [ + - "10.10.0.0/24", + ] -> null + - v6_cidr_blocks = [] -> null + - zone = "ru-central1-a" -> null + # (2 unchanged attributes hidden) + } + +Plan: 0 to add, 0 to change, 5 to destroy. + +Changes to Outputs: + - public_ip = "93.77.187.114" -> null + - ssh_command = "ssh -i ~/.ssh/id_ed25519 ubuntu@93.77.187.114" -> null + +Do you really want to destroy all resources? + Terraform will destroy all your managed infrastructure, as shown above. + There is no undo. Only 'yes' will be accepted to confirm. + + Enter a value: yes + +yandex_compute_instance.vm: Destroying... [id=fhmp143am01jk4mp8l5t] +yandex_compute_instance.vm: Still destroying... [id=fhmp143am01jk4mp8l5t, 00m10s elapsed] +yandex_compute_instance.vm: Still destroying... [id=fhmp143am01jk4mp8l5t, 00m20s elapsed] +yandex_compute_instance.vm: Still destroying... [id=fhmp143am01jk4mp8l5t, 00m30s elapsed] +yandex_compute_instance.vm: Destruction complete after 33s +yandex_vpc_security_group.sg: Destroying... [id=enpb3nevkhedg2m9mess] +yandex_vpc_subnet.subnet: Destroying... [id=e9bvvrbqalt0rvkrombc] +yandex_vpc_address.public_ip: Destroying... [id=e9bpa7ofbfbj4i4jg067] +yandex_vpc_security_group.sg: Destruction complete after 1s +yandex_vpc_address.public_ip: Destruction complete after 1s +yandex_vpc_subnet.subnet: Destruction complete after 5s +yandex_vpc_network.net: Destroying... [id=enpqagvtk5g4ne5lnlv5] +yandex_vpc_network.net: Destruction complete after 1s + +Destroy complete! Resources: 5 destroyed. +``` + +![pulumi destroy](/docs/screenshots/pulumi_destroy.png) diff --git a/docs/screenshots/pulumi_destroy.png b/docs/screenshots/pulumi_destroy.png new file mode 100644 index 0000000000..f1cae7b765 Binary files /dev/null and b/docs/screenshots/pulumi_destroy.png differ diff --git a/labs/lab18.md b/labs/lab18.md index 3491394659..864df70baa 100644 --- a/labs/lab18.md +++ b/labs/lab18.md @@ -1,430 +1,1306 @@ -# Lab 18 — Decentralized Hosting with 4EVERLAND & IPFS +# Lab 18 — Reproducible Builds with Nix ![difficulty](https://img.shields.io/badge/difficulty-intermediate-yellow) -![topic](https://img.shields.io/badge/topic-Web3%20Infrastructure-blue) -![points](https://img.shields.io/badge/points-20-orange) -![type](https://img.shields.io/badge/type-Exam%20Alternative-purple) +![topic](https://img.shields.io/badge/topic-Nix%20%26%20Reproducibility-blue) +![points](https://img.shields.io/badge/points-12-orange) -> Deploy content to the decentralized web using IPFS and 4EVERLAND for permanent, censorship-resistant hosting. +> **Goal:** Learn to create truly reproducible builds using Nix, eliminating "works on my machine" problems and achieving bit-for-bit reproducibility. +> **Deliverable:** A PR/MR from `feature/lab18` to the course repo with `labs/submission18.md` containing build artifacts, hash comparisons, Nix expressions, and analysis. Submit the PR/MR link via Moodle. -## Overview - -The decentralized web (Web3) offers an alternative to traditional hosting where content is stored across a distributed network rather than centralized servers. IPFS (InterPlanetary File System) is the foundation, and 4EVERLAND provides a user-friendly gateway to this ecosystem. +--- -**This is an Exam Alternative Lab** — Complete both Lab 17 and Lab 18 to replace the final exam. +## Overview -**What You'll Learn:** -- IPFS fundamentals and content addressing -- Decentralized storage concepts -- Pinning services and persistence -- 4EVERLAND hosting platform -- Centralized vs decentralized trade-offs +In this lab you will practice: +- Installing Nix and understanding the Nix philosophy +- Writing Nix derivations to build software reproducibly +- Creating reproducible Docker images using Nix +- Using Nix Flakes for modern, declarative dependency management +- **Comparing Nix with your previous work from Labs 1-2** -**Prerequisites:** Basic understanding of web hosting, completed Docker lab +**Why Nix?** Traditional build tools (Docker, npm, pip, etc.) claim to be reproducible, but they're not: +- `Dockerfile` with `apt-get install nodejs` gets different versions over time +- `pip install -r requirements.txt` without hash pinning can vary +- Docker builds include timestamps and vary across machines -**Tech Stack:** IPFS | 4EVERLAND | Docker | Content Addressing +**Nix solves this:** Every build is isolated in a sandbox with exact dependencies. The same Nix expression produces **identical binaries** on any machine, forever. -**Provided Files:** -- `labs/lab18/index.html` — A beautiful course landing page ready to deploy +**Building on Your Work:** Throughout this lab, you'll revisit your DevOps Info Service from Lab 1 and compare: +- **Lab 1**: `requirements.txt` vs Nix derivations for dependency management +- **Lab 2**: Traditional `Dockerfile` vs Nix `dockerTools` for containerization +- **Lab 10** *(bonus task)*: Helm `values.yaml` version pinning vs Nix Flakes locking --- -## Exam Alternative Requirements +## Prerequisites -| Requirement | Details | -|-------------|---------| -| **Deadline** | 1 week before exam date | -| **Minimum Score** | 16/20 points | -| **Must Complete** | Both Lab 17 AND Lab 18 | -| **Total Points** | 40 pts (replaces 40 pt exam) | +- **Required:** Completed Labs 1-16 (all required course labs) +- **Key Labs Referenced:** + - Lab 1: Python DevOps Info Service (you'll rebuild with Nix) + - Lab 2: Docker containerization (you'll compare with Nix dockerTools) + - Lab 10: Helm charts (you'll compare version pinning with Nix Flakes) +- Linux, macOS, or WSL2 +- Basic understanding of package managers +- Your `app_python/` directory from Lab 1-2 available --- ## Tasks -### Task 1 — IPFS Fundamentals (3 pts) +### Task 1 — Build Reproducible Python App (Revisiting Lab 1) (6 pts) + +**Objective:** Use Nix to build your DevOps Info Service from Lab 1 and compare Nix's reproducibility guarantees with traditional `pip install -r requirements.txt`. + +**Why This Matters:** You've already built this app in Lab 1 using `requirements.txt`. Now you'll see how Nix provides **true reproducibility** that `pip` cannot guarantee - the same derivation produces bit-for-bit identical results across different machines and times. + +#### 1.1: Install Nix Package Manager + +> ⚠️ **Important Installation Requirements:** +> - Requires sudo/admin access on your machine +> - Creates `/nix` directory at system root (Linux/macOS) or `C:\nix` (Windows WSL) +> - Modifies shell configuration files (`~/.bashrc`, `~/.zshrc`, etc.) +> - Installation size: ~500MB-1GB for base system +> - **Cannot be installed in home directory only** +> - Uninstallation requires manual cleanup (see [official guide](https://nixos.org/manual/nix/stable/installation/uninstall.html)) + +1. **Install Nix using the Determinate Systems installer (recommended):** + + ```bash + curl --proto '=https' --tlsv1.2 -sSf -L https://install.determinate.systems/nix | sh -s -- install + ``` + + > **Why Determinate Nix?** It enables flakes by default and provides better defaults for modern Nix usage. + +
+ 🐧 Alternative: Official Nix installer + + ```bash + sh <(curl -L https://nixos.org/nix/install) --daemon + ``` + + Then enable flakes by adding to `~/.config/nix/nix.conf`: + ``` + experimental-features = nix-command flakes + ``` + +
+ +2. **Verify Installation:** + + ```bash + nix --version + ``` + + You should see Nix 2.x or higher. + + **Restart your terminal** after installation to load Nix into your PATH. + +3. **Test Basic Nix Usage:** + + ```bash + # Try running a program without installing it + nix run nixpkgs#hello + ``` + + This downloads and runs `hello` without installing it permanently. + +#### 1.2: Prepare Your Python Application + +1. **Copy your Lab 1 app to the lab18 directory:** + + ```bash + mkdir -p labs/lab18/app_python + cp -r app_python/* labs/lab18/app_python/ + cd labs/lab18/app_python + ``` + + You should have: + - `app.py` - Your DevOps Info Service + - `requirements.txt` - Your Python dependencies (Flask/FastAPI) + +2. **Review your traditional workflow (Lab 1):** + + Recall how you built this in Lab 1: + ```bash + python -m venv venv + source venv/bin/activate + pip install -r requirements.txt + python app.py + ``` + + **Problems with this approach:** + - Different Python versions on different machines + - `pip install` without hashes can pull different package versions + - Virtual environment is not portable + - No guarantee of reproducibility over time + +#### 1.3: Write a Nix Derivation for Your Python App + +1. **Create a Nix derivation:** + + Create `default.nix` in `labs/lab18/app_python/`: + +
+ 📚 Where to learn Nix Python derivation syntax + + - [nix.dev - Python](https://nix.dev/tutorials/nixos/building-and-running-python-apps) + - [nixpkgs Python documentation](https://nixos.org/manual/nixpkgs/stable/#python) + - [Nix Pills - Chapter 6: Our First Derivation](https://nixos.org/guides/nix-pills/our-first-derivation.html) + + **Key concepts you need:** + - `python3Packages.buildPythonApplication` - Function to build Python apps + - `propagatedBuildInputs` - Python dependencies (Flask/FastAPI) + - `makeWrapper` - Wraps Python script with interpreter + - `pname` - Package name + - `version` - Package version + - `src` - Source code location (use `./.` for current directory) + - `format = "other"` - For apps without setup.py + + **Translating requirements.txt to Nix:** + Your Lab 1 `requirements.txt` might have: + ``` + Flask==3.1.0 + Werkzeug>=2.0 + click + ``` + + In Nix, you reference packages from nixpkgs (not exact PyPI versions): + - `Flask==3.1.0` → `pkgs.python3Packages.flask` + - `fastapi==0.115.0` → `pkgs.python3Packages.fastapi` + - `uvicorn[standard]` → `pkgs.python3Packages.uvicorn` + + **Note:** Nix uses versions from the pinned nixpkgs, not PyPI directly. This is intentional for reproducibility. + + **Example structure (Flask):** + ```nix + { pkgs ? import {} }: + + pkgs.python3Packages.buildPythonApplication { + pname = "devops-info-service"; + version = "1.0.0"; + src = ./.; + + format = "other"; + + propagatedBuildInputs = with pkgs.python3Packages; [ + flask + ]; + + nativeBuildInputs = [ pkgs.makeWrapper ]; + + installPhase = '' + mkdir -p $out/bin + cp app.py $out/bin/devops-info-service + + # Wrap with Python interpreter so it can execute + wrapProgram $out/bin/devops-info-service \ + --prefix PYTHONPATH : "$PYTHONPATH" + ''; + } + ``` + + **Example for FastAPI:** + ```nix + propagatedBuildInputs = with pkgs.python3Packages; [ + fastapi + uvicorn + ]; + ``` + + **Hint:** If you get "command not found" errors, make sure you're using `makeWrapper` in the installPhase. + +
+ +2. **Build your application with Nix:** + + ```bash + nix-build + ``` + + This creates a `result` symlink pointing to the Nix store path. + +3. **Run the Nix-built application:** + + ```bash + ./result/bin/devops-info-service + ``` + + Visit `http://localhost:5000` (or your configured port) - it should work identically to your Lab 1 version! + +#### 1.4: Prove Reproducibility (Compare with Lab 1 approach) + +1. **Record the Nix store path:** + + ```bash + readlink result + ``` + + Note the store path (e.g., `/nix/store/abc123-devops-info-service-1.0.0/`) + +2. **Build again and compare:** + + ```bash + rm result + nix-build + readlink result + ``` + + **Observation:** The store path is **identical**! But wait - did Nix rebuild it or reuse it? + + **Answer: Nix reused the cached build!** Same inputs = same hash = reuse existing store path. + +3. **Force an actual rebuild to prove reproducibility:** + + ```bash + # First, find your build's store path + STORE_PATH=$(readlink result) + echo "Original store path: $STORE_PATH" + + # Delete it from the Nix store + nix-store --delete $STORE_PATH + + # Now rebuild (this forces actual compilation) + rm result + nix-build + readlink result + ``` + + **Observation:** Same store path returns! Nix rebuilt it from scratch and got the exact same hash. -**Objective:** Understand IPFS concepts and run a local node. +3. **Compare with traditional pip approach:** -**Requirements:** + **Demonstrate pip's limitations:** -1. **Study IPFS Concepts** - - Content addressing vs location addressing - - CIDs (Content Identifiers) - - Pinning and garbage collection - - IPFS gateways + ```bash + # Test 1: Install without version pins (shows immediate non-reproducibility) + echo "flask" > requirements-unpinned.txt # No version specified -2. **Run Local IPFS Node** - - Use Docker to run IPFS node - - Access the Web UI - - Understand node configuration + python -m venv venv1 + source venv1/bin/activate + pip install -r requirements-unpinned.txt + pip freeze | grep -i flask > freeze1.txt + deactivate -3. **Add Content Locally** - - Add a file to your local IPFS node - - Retrieve the CID - - Access via local gateway + # Simulate time passing: clear pip cache + pip cache purge 2>/dev/null || rm -rf ~/.cache/pip + + python -m venv venv2 + source venv2/bin/activate + pip install -r requirements-unpinned.txt + pip freeze | grep -i flask > freeze2.txt + deactivate + + # Compare Flask versions + diff freeze1.txt freeze2.txt + ``` + + **Observation:** + - Without version pins, you get whatever's latest + - **Even with pinned versions** in requirements.txt, you only pin direct dependencies + - Transitive dependencies (dependencies of your dependencies) can still drift + - Over weeks/months, `pip install -r requirements.txt` can produce different environments + + **The fundamental problem:** + ``` + Lab 1 approach: requirements.txt pins what YOU install + Problem: Doesn't pin what FLASK installs (Werkzeug, Click, etc.) + Result: Different machines = different transitive dependency versions + + Nix approach: Pins EVERYTHING in the entire dependency tree + Result: Bit-for-bit identical on all machines, forever + ``` + +4. **Understand Nix's caching behavior:** + + **Key insight:** Nix uses content-addressable storage: + ``` + Store path format: /nix/store/-- + Example: /nix/store/abc123xyz-devops-info-service-1.0.0 + + The is computed from: + - All source code + - All dependencies (transitively!) + - Build instructions + - Compiler flags + - Everything needed to reproduce the build + + Same inputs → Same hash → Reuse existing build (cache hit) + Different inputs → Different hash → New build required + ``` + +5. **Nix's guarantee:** + + ```bash + # Hash the entire Nix output + nix-hash --type sha256 result + ``` + + This hash will be **identical** on any machine, any time, forever - if the inputs don't change. + + This is why Nix can safely share binary caches (cache.nixos.org) - the hash proves the content! + +**📊 Comparison Table - Lab 1 vs Lab 18:** + +| Aspect | Lab 1 (pip + venv) | Lab 18 (Nix) | +|--------|-------------------|--------------| +| Python version | System-dependent | Pinned in derivation | +| Dependency resolution | Runtime (`pip install`) | Build-time (pure) | +| Reproducibility | Approximate (with lockfiles) | Bit-for-bit identical | +| Portability | Requires same OS + Python | Works anywhere Nix runs | +| Binary cache | No | Yes (cache.nixos.org) | +| Isolation | Virtual environment | Sandboxed build | +| Store path | N/A | Content-addressable hash | + +#### 1.5: Optional - Go Application (If you completed Lab 1 Bonus)
-💡 Hints +🎁 For students who built the Go version in Lab 1 Bonus -**IPFS Concepts:** -- **Content Addressing:** Files identified by hash of content, not location -- **CID:** Unique identifier derived from content hash (e.g., `QmXxx...` or `bafyxxx...`) -- **Pinning:** Marking content to keep it (prevent garbage collection) -- **Gateway:** HTTP interface to IPFS network +If you implemented the compiled language bonus in Lab 1, you can also build it with Nix: -**Run IPFS with Docker:** -```bash -docker run -d --name ipfs \ - -p 4001:4001 \ - -p 8080:8080 \ - -p 5001:5001 \ - ipfs/kubo:latest - -# Web UI at http://localhost:5001/webui -# Gateway at http://localhost:8080 -``` +1. **Copy your Go app:** + ```bash + mkdir -p labs/lab18/app_go + cp -r app_go/* labs/lab18/app_go/ + cd labs/lab18/app_go + ``` -**Add Content:** -```bash -# Create test file -echo "Hello IPFS from DevOps course!" > hello.txt +2. **Create `default.nix` for Go:** + ```nix + { pkgs ? import {} }: -# Add to IPFS -docker exec ipfs ipfs add /hello.txt -# Returns: added QmXxx... hello.txt + pkgs.buildGoModule { + pname = "devops-info-service-go"; + version = "1.0.0"; + src = ./.; -# Access via gateway -curl http://localhost:8080/ipfs/QmXxx... -``` + vendorHash = null; # or use pkgs.lib.fakeHash if you have dependencies + } + ``` -**Resources:** -- [IPFS Docs](https://docs.ipfs.tech/) -- [IPFS Concepts](https://docs.ipfs.tech/concepts/) +3. **Build and compare binary size:** + ```bash + nix-build + ls -lh result/bin/ + ``` + + Compare this with your multi-stage Docker build from Lab 2 Bonus!
+In `labs/submission18.md`, document: +- Installation steps and verification output +- Your `default.nix` file with explanations of each field +- Store path from multiple builds (prove they're identical) +- Comparison table: `pip install` vs Nix derivation +- Why does `requirements.txt` provide weaker guarantees than Nix? +- Screenshots showing your Lab 1 app running from Nix-built version +- Explanation of the Nix store path format and what each part means +- **Reflection:** How would Nix have helped in Lab 1 if you had used it from the start? + --- -### Task 2 — 4EVERLAND Setup (3 pts) +### Task 2 — Reproducible Docker Images (Revisiting Lab 2) (4 pts) + +**Objective:** Use Nix's `dockerTools` to containerize your DevOps Info Service and compare with your traditional Dockerfile from Lab 2. + +**Why This Matters:** In Lab 2, you created a `Dockerfile` that built your Python app. While Docker provides isolation, it's **not reproducible**: +- Build timestamps differ between builds +- Base image tags like `python:3.13-slim` can point to different versions over time +- `apt-get` installs latest packages, which change +- Two builds of the same Dockerfile can produce different image hashes + +Nix's `dockerTools` creates **truly reproducible** container images with content-addressable layers. + +#### 2.1: Review Your Lab 2 Dockerfile + +1. **Find your Dockerfile from Lab 2:** + + ```bash + # From repository root directory + cat app_python/Dockerfile + ``` + + You likely have something like: + ```dockerfile + FROM python:3.13-slim + RUN useradd -m appuser + WORKDIR /app + COPY requirements.txt . + RUN pip install -r requirements.txt + COPY app.py . + USER appuser + EXPOSE 5000 + CMD ["python", "app.py"] + ``` + +
+ 💡 Don't have your Lab 2 Dockerfile? + + If you lost your Lab 2 work, create a minimal Dockerfile now: + + ```dockerfile + FROM python:3.13-slim + WORKDIR /app + COPY requirements.txt app.py ./ + RUN pip install -r requirements.txt + EXPOSE 5000 + CMD ["python", "app.py"] + ``` + + Save as `app_python/Dockerfile`. + +
+ +2. **Test Lab 2 Dockerfile reproducibility:** + + ```bash + # Make sure you're in repository root + cd ~/path/to/DevOps-Core-Course # Adjust to your path + + # Build from app_python directory + docker build -t lab2-app:v1 ./app_python + docker inspect lab2-app:v1 | grep Created + + # Wait a few seconds, then rebuild + sleep 5 + docker build -t lab2-app:v2 ./app_python + docker inspect lab2-app:v2 | grep Created + ``` + + **Observation:** Different creation timestamps! The image hashes are different even though the content is identical. + +#### 2.2: Build Docker Image with Nix + +1. **Create a Nix Docker image using `dockerTools`:** + + Create `labs/lab18/app_python/docker.nix`: + +
+ 📚 Where to learn about dockerTools + + - [nix.dev - Building Docker images](https://nix.dev/tutorials/nixos/building-and-running-docker-images.html) + - [nixpkgs dockerTools documentation](https://ryantm.github.io/nixpkgs/builders/images/dockertools/) + + **Key concepts:** + - `pkgs.dockerTools.buildLayeredImage` - Builds efficient layered images + - `name` - Image name + - `tag` - Image tag (optional, defaults to latest) + - `contents` - Packages/derivations to include in the image + - `config.Cmd` - Default command to run + - `config.ExposedPorts` - Ports to expose + + **Critical for reproducibility:** + - **DO NOT** use `created = "now"` - this breaks reproducibility! + - **DO** use `created = "1970-01-01T00:00:01Z"` for reproducible builds + - **DO** use exact derivations (from Task 1) instead of arbitrary packages + + **Example structure:** + ```nix + { pkgs ? import {} }: + + let + app = import ./default.nix { inherit pkgs; }; + in + pkgs.dockerTools.buildLayeredImage { + name = "devops-info-service-nix"; + tag = "1.0.0"; + + contents = [ app ]; + + config = { + Cmd = [ "${app}/bin/devops-info-service" ]; + ExposedPorts = { + "5000/tcp" = {}; + }; + }; + + created = "1970-01-01T00:00:01Z"; # Reproducible timestamp + } + ``` + +
+ +2. **Build the Nix Docker image:** + + ```bash + cd labs/lab18/app_python + nix-build docker.nix + ``` + + This creates a tarball in `result`. + +3. **Load into Docker:** + + ```bash + docker load < result + ``` + + Output shows the image was loaded with a specific tag. + +4. **Run both containers side-by-side:** + + ```bash + # First, clean up any existing containers to avoid port conflicts + docker stop lab2-container nix-container 2>/dev/null || true + docker rm lab2-container nix-container 2>/dev/null || true + + # Run Lab 2 traditional Docker image on port 5000 + docker run -d -p 5000:5000 --name lab2-container lab2-app:v1 + + # Run Nix-built image on port 5001 (mapped to container's 5000) + docker run -d -p 5001:5000 --name nix-container devops-info-service-nix:1.0.0 + ``` + + Test both: + ```bash + curl http://localhost:5000/health # Lab 2 version + curl http://localhost:5001/health # Nix version + ``` + + Both should work identically! + + **Troubleshooting:** + - If port 5000 is in use: `lsof -i :5000` to find the process + - Container won't start: Check logs with `docker logs lab2-container` + - Permission denied: Make sure Docker daemon is running + +#### 2.3: Compare Reproducibility - Lab 2 vs Lab 18 + +**Test 1: Rebuild Reproducibility** -**Objective:** Set up 4EVERLAND account and explore the platform. +1. **Rebuild Nix image multiple times:** -**Requirements:** + ```bash + rm result + nix-build docker.nix + sha256sum result -1. **Create Account** - - Sign up at [4everland.org](https://www.4everland.org/) - - Connect with GitHub or wallet - - Explore dashboard + rm result + nix-build docker.nix + sha256sum result + ``` -2. **Understand Services** - - Hosting: Deploy websites/apps - - Storage: IPFS pinning - - Gateway: Access IPFS content + **Observation:** Identical SHA256 hashes! The tarball is bit-for-bit identical. -3. **Explore Free Tier** - - Understand limits and capabilities - - Review pricing for reference +2. **Compare with Lab 2 Dockerfile:** + + ```bash + # Make sure you're in repository root + # Build Lab 2 Dockerfile twice and compare saved image hashes + + docker build -t lab2-app:test1 ./app_python/ + docker save lab2-app:test1 | sha256sum + + sleep 2 # Wait a moment + + docker build -t lab2-app:test2 ./app_python/ + docker save lab2-app:test2 | sha256sum + ``` + + **Observation:** Different hashes! Even though the Dockerfile and source are identical, Lab 2's approach is not reproducible. + +**Test 2: Image Size Comparison** + +```bash +docker images | grep -E "lab2-app|devops-info-service-nix" +``` + +Create a comparison table: + +| Metric | Lab 2 Dockerfile | Lab 18 Nix dockerTools | +|--------|------------------|------------------------| +| Image size | ~150MB (with python:3.13-slim) | ~50-80MB (minimal closure) | +| Reproducibility | ❌ Different hashes each build | ✅ Identical hashes | +| Build caching | Layer-based (timestamp-dependent) | Content-addressable | +| Base image dependency | Yes (python:3.13-slim) | No base image needed | + +**Test 3: Layer Analysis** + +1. **Examine Lab 2 image layers:** + + ```bash + docker history lab2-app:v1 + ``` + + Note the timestamps in the "CREATED" column - they vary between builds! + +2. **Examine Nix image layers:** + + ```bash + docker history devops-info-service-nix:1.0.0 + ``` + + Nix uses content-addressable layers - same content = same layer hash. + +#### 2.4: Advanced Comparison - Multi-Stage Builds
-💡 Hints +🎁 Optional: Compare with Lab 2 Bonus Multi-Stage Build -**4EVERLAND Services:** -- **Hosting:** Deploy from Git repos, automatic builds -- **Bucket (Storage):** Upload files, get IPFS CIDs -- **Gateway:** Access content via 4everland.link +If you completed the Lab 2 bonus with Go and multi-stage builds, you can compare: -**Dashboard:** -- Projects: Your deployed sites -- Bucket: File storage -- Domains: Custom domain setup +**Your Lab 2 multi-stage Dockerfile:** +```dockerfile +FROM golang:1.22 AS builder +COPY . . +RUN go build -o app main.go -**Free Tier Includes:** -- 100 deployments/month -- 5GB storage -- 100GB bandwidth +FROM alpine:latest +COPY --from=builder /app/app /app +ENTRYPOINT ["/app"] +``` + +**Problems:** +- `golang:1.22` and `alpine:latest` change over time +- Build includes timestamps +- Not reproducible across machines + +**Nix equivalent (fully reproducible):** +```nix +pkgs.dockerTools.buildLayeredImage { + name = "go-app-nix"; + contents = [ goApp ]; # Built in Task 1.5 + config.Cmd = [ "${goApp}/bin/go-app" ]; + created = "1970-01-01T00:00:01Z"; +} +``` -**Resources:** -- [4EVERLAND Docs](https://docs.4everland.org/) +Same result size, but **fully reproducible**!
+**📊 Comprehensive Comparison - Lab 2 vs Lab 18:** + +| Aspect | Lab 2 Traditional Dockerfile | Lab 18 Nix dockerTools | +|--------|------------------------------|------------------------| +| **Base images** | `python:3.13-slim` (changes over time) | No base image (pure derivations) | +| **Timestamps** | Different on each build | Fixed or deterministic | +| **Package installation** | `pip install` at build time | Nix store paths (immutable) | +| **Reproducibility** | ❌ Same Dockerfile → Different images | ✅ Same docker.nix → Identical images | +| **Caching** | Layer-based (breaks on timestamp) | Content-addressable (perfect caching) | +| **Image size** | ~150MB+ with full base image | ~50-80MB with minimal closure | +| **Portability** | Requires Docker | Requires Nix (then loads to Docker) | +| **Security** | Base image vulnerabilities | Minimal dependencies, easier auditing | +| **Lab 2 Learning** | Best practices, non-root user | Build on Lab 2 knowledge | + +In `labs/submission18.md`, document: +- Your `docker.nix` file with explanations of each field +- Side-by-side comparison: Lab 2 Dockerfile vs Nix docker.nix +- SHA256 hash comparison proving Nix reproducibility +- Image size comparison table with analysis +- `docker history` output for both approaches +- Screenshots showing both containers running simultaneously +- **Analysis:** Why can't traditional Dockerfiles achieve bit-for-bit reproducibility? +- **Reflection:** If you could redo Lab 2 with Nix, what would you do differently? +- Practical scenarios where Nix's reproducibility matters (CI/CD, security audits, rollbacks) + --- -### Task 3 — Deploy Static Content (4 pts) +### Bonus Task — Modern Nix with Flakes (Includes Lab 10 Comparison) (2 pts) + +**Objective:** Modernize your Nix expressions using Flakes for better dependency locking and reproducibility. Compare Nix Flakes with Helm's version pinning approach from Lab 10. + +**Why This Matters:** Nix Flakes are the modern standard (2026) for Nix projects. They provide: +- Automatic dependency locking via `flake.lock` +- Standardized project structure +- Better reproducibility across time +- Easier sharing and collaboration + +**Comparison with Lab 10:** In Lab 10 (Helm), you used `values.yaml` to pin image versions. Flakes take this concept further by locking **all** dependencies, not just container images. + +#### Bonus.1: Convert to Flake + +1. **Create a `flake.nix`:** + + Create `labs/lab18/app_python/flake.nix`: + +
+ 📚 Where to learn about Flakes + + - [Zero to Nix - Flakes](https://zero-to-nix.com/concepts/flakes) + - [NixOS Wiki - Flakes](https://wiki.nixos.org/wiki/Flakes) + - [Nix Flakes explained](https://nix.dev/concepts/flakes) + + **Key structure:** + ```nix + { + description = "DevOps Info Service - Reproducible Build"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11"; # Pin exact nixpkgs version + }; + + outputs = { self, nixpkgs }: + let + # ⚠️ Architecture note: This example uses x86_64-linux + # - Works on: Linux (x86_64), WSL2 + # - Mac Intel: Change to "x86_64-darwin" + # - Mac M1/M2/M3: Change to "aarch64-darwin" + # - For multi-system support, see: https://github.com/numtide/flake-utils + system = "x86_64-linux"; + pkgs = nixpkgs.legacyPackages.${system}; + in + { + packages.${system} = { + default = import ./default.nix { inherit pkgs; }; + dockerImage = import ./docker.nix { inherit pkgs; }; + }; + + # Development shell with all dependencies + devShells.${system}.default = pkgs.mkShell { + buildInputs = with pkgs; [ + python313 + python313Packages.flask # or fastapi + ]; + }; + }; + } + ``` + + **Platform-specific adjustments:** + - **Linux/WSL2**: Use `system = "x86_64-linux";` (shown above) + - **Mac Intel**: Use `system = "x86_64-darwin";` + - **Mac ARM (M1/M2/M3)**: Use `system = "aarch64-darwin";` + + **Hint:** Use `nix flake init` to generate a template, then modify it. + +
+ +2. **Generate lock file:** + + ```bash + cd labs/lab18/app_python + nix flake update + ``` + + This creates `flake.lock` with pinned dependencies. + +3. **Build using flake:** + + ```bash + nix build # Builds default package + nix build .#dockerImage # Builds Docker image + ./result/bin/devops-info-service # Run the app + ``` + +#### Bonus.2: Compare with Lab 10 Helm Values + +**Lab 10 Helm approach to version pinning:** + +In `k8s/mychart/values.yaml`: +```yaml +image: + repository: yourusername/devops-info-service + tag: "1.0.0" # Pin specific version + pullPolicy: IfNotPresent + +# Environment-specific overrides +# values-prod.yaml: +image: + tag: "1.0.0" # Explicit version for prod +``` + +**Limitations:** +- Only pins the container image tag +- Doesn't lock Python dependencies inside the image +- Doesn't lock Helm chart dependencies +- Image tag `1.0.0` could point to different content if rebuilt + +**Nix Flakes approach:** + +`flake.lock` locks **everything**: +```json +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1704321342, + "narHash": "sha256-abc123...", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "52e3e80afff4b16ccb7c52e9f0f5220552f03d04", + "type": "github" + } + } + } +} +``` + +This locks: +- ✅ Exact nixpkgs revision (all 80,000+ packages) +- ✅ Python version and all dependencies +- ✅ Build tools and compilers +- ✅ Everything in the closure + +**Combined Approach:** + +You can use both together! +1. Build reproducible image with Nix: `nix build .#dockerImage` +2. Load to Docker and tag: `docker load < result` +3. Reference in Helm with content hash: `image.tag: "sha256-abc123..."` + +This gives you: +- Helm's declarative Kubernetes deployment +- Nix's perfect reproducibility for the image + +Create a comparison table in your submission. + +#### Bonus.3: Test Cross-Machine Reproducibility + +1. **Commit your flake to git:** + + ```bash + git add flake.nix flake.lock default.nix docker.nix + git commit -m "feat: add Nix flake for reproducible builds" + git push + ``` + +2. **Test on another machine or ask a classmate:** + + ```bash + # Build directly from GitHub + nix build github:yourusername/DevOps-Core-Course?dir=labs/lab18/app_python#default + ``` + +3. **Compare store paths:** + + ```bash + readlink result + ``` + + Both machines should get **identical store paths** - same hash, same content! + +#### Bonus.4: Add Development Shell + +1. **Enter the dev shell:** + + ```bash + nix develop + ``` + + This gives you an isolated environment with exact Python version and dependencies. -**Objective:** Deploy a static site to 4EVERLAND. +2. **Compare with Lab 1 virtual environment:** -**Requirements:** + **Lab 1 approach:** + ```bash + python -m venv venv + source venv/bin/activate + pip install -r requirements.txt + ``` -1. **Use the Provided Static Site** - - A course landing page is provided at `labs/lab18/index.html` - - Review the HTML/CSS to understand the structure - - You may customize it or create your own + **Lab 18 Nix approach:** + ```bash + nix develop + # Python and all dependencies instantly available + # Same environment on every machine + ``` -2. **Deploy via 4EVERLAND** - - Connect your GitHub repository - - Configure build settings - - Deploy to IPFS via 4EVERLAND +3. **Try it:** -3. **Verify Deployment** - - Access via 4EVERLAND URL - - Access via IPFS gateway - - Note the CID + ```bash + nix develop + python --version # Exact pinned version + python -c "import flask; print(flask.__version__)" + ``` -4. **Test Permanence** - - Understand that content with same hash = same CID - - Make a change, redeploy, observe new CID + Exit and enter again - same versions, always! + +**📊 Dependency Management Comparison:** + +| Aspect | Lab 1 (venv + requirements.txt) | Lab 10 (Helm values.yaml) | Lab 18 (Nix Flakes) | +|--------|--------------------------------|---------------------------|---------------------| +| **Locks Python version** | ❌ Uses system Python | ❌ Uses image Python | ✅ Pinned in flake | +| **Locks dependencies** | ⚠️ Approximate (versions drift) | ❌ Only image tag | ✅ Exact hashes | +| **Locks build tools** | ❌ No | ❌ No | ✅ Yes | +| **Reproducibility** | ⚠️ Probabilistic | ⚠️ Tag-based | ✅ Cryptographic | +| **Cross-machine** | ❌ Varies | ⚠️ Depends on image | ✅ Identical | +| **Dev environment** | ✅ Yes (venv) | ❌ No | ✅ Yes (nix develop) | +| **Time-stable** | ❌ Packages update | ⚠️ Tags can change | ✅ Locked forever | + +In `labs/submission18.md`, document: +- Your complete `flake.nix` with explanations +- `flake.lock` snippet showing locked dependencies (especially nixpkgs revision) +- Build outputs from `nix build` +- Proof that builds are identical across machines/time +- Dev shell experience: Compare `nix develop` vs Lab 1's `venv` +- Comparison with Lab 10 Helm values.yaml approach (Bonus.2) +- **Reflection:** How do Flakes improve upon traditional dependency management? +- Practical scenarios where flake.lock prevented a "works on my machine" problem + +--- + +## Troubleshooting Common Issues
-💡 Hints - -**Provided Static Site:** -The course provides a beautiful landing page at `labs/lab18/index.html` that you can deploy. It includes: -- Modern responsive design -- Course curriculum overview -- Learning roadmap -- "Deployed on IPFS" badge - -**Deployment Steps:** -1. Go to 4EVERLAND Dashboard → Hosting -2. Click "New Project" -3. Import from GitHub -4. Select your repository and branch -5. Configure: - - Framework: None (static) - - Build command: (leave empty for static) - - Output directory: `labs/lab18` (or root if you moved the file) -6. Deploy - -**Alternative: Create Your Own** -You can also create your own static site. Keep it simple: -```html - - - - My DevOps Portfolio - - -

Welcome to My DevOps Journey

-

Deployed on IPFS via 4EVERLAND

- - +🔧 Python app doesn't run: "command not found" or "No such file or directory" + +**Problem:** Your `app.py` doesn't have a shebang line and isn't being wrapped with Python interpreter. + +**Solution:** Ensure you're using `makeWrapper` in your `default.nix`: + +```nix +nativeBuildInputs = [ pkgs.makeWrapper ]; + +installPhase = '' + mkdir -p $out/bin + cp app.py $out/bin/devops-info-service + + wrapProgram $out/bin/devops-info-service \ + --prefix PYTHONPATH : "$PYTHONPATH" +''; ``` -**Access URLs:** -- 4EVERLAND: `https://your-project.4everland.app` -- IPFS Gateway: `https://ipfs.4everland.link/ipfs/CID` +Alternatively, add a shebang to your `app.py`: +```python +#!/usr/bin/env python3 +```
---- +
+🔧 "error: hash mismatch in fixed-output derivation" + +**Problem:** The hash you specified doesn't match the actual content. + +**Solution:** +1. Use `pkgs.lib.fakeHash` initially to get the correct hash +2. Nix will fail and tell you the expected hash +3. Replace `fakeHash` with the correct hash from the error message + +Example: +```nix +vendorHash = pkgs.lib.fakeHash; # Start with this +# Error will say: "got: sha256-abc123..." +# Then use: vendorHash = "sha256-abc123..."; +``` -### Task 4 — IPFS Pinning (4 pts) +
-**Objective:** Use 4EVERLAND's storage (Bucket) for IPFS pinning. +
+🔧 Docker image doesn't load or fails to run -**Requirements:** +**Common causes:** -1. **Upload Files to Bucket** - - Upload multiple files (images, documents, etc.) - - Get CIDs for each file +1. **Image tarball not built:** Check `result` is a `.tar.gz` file + ```bash + file result + # Should show: gzip compressed data + ``` -2. **Create a Directory Structure** - - Upload a folder with multiple files - - Understand directory CIDs +2. **Wrong Cmd path:** Verify the app path in docker.nix + ```nix + config.Cmd = [ "${app}/bin/devops-info-service" ]; + # Make sure this matches your installPhase output + ``` -3. **Access via Multiple Gateways** - - Access your content via: - - 4EVERLAND gateway - - Public IPFS gateways (ipfs.io, dweb.link) - - Understand gateway differences +3. **Missing dependencies in image:** Add required packages to `contents` + ```nix + contents = [ app pkgs.coreutils ]; # Add tools if needed + ``` -4. **Verify Pinning** - - Confirm content is pinned - - Understand pinning vs local storage +
-💡 Hints +🔧 Port conflicts when running containers -**Bucket Upload:** -1. Dashboard → Bucket -2. Create new bucket -3. Upload files or folders -4. Get CID from file details +**Problem:** Port 5000 or 5001 already in use. -**Multiple Gateways:** +**Solution:** ```bash -# 4EVERLAND -https://ipfs.4everland.link/ipfs/QmXxx... - -# IPFS.io -https://ipfs.io/ipfs/QmXxx... +# Find what's using the port +lsof -i :5000 -# Cloudflare -https://cloudflare-ipfs.com/ipfs/QmXxx... +# Stop old containers +docker stop $(docker ps -aq) 2>/dev/null -# DWeb.link -https://dweb.link/ipfs/QmXxx... +# Or use different ports +docker run -d -p 5002:5000 --name my-container my-image ``` -**Directory Upload:** -- Upload entire folder -- Get directory CID -- Access files: `gateway/ipfs/DirCID/filename` +
+ +
+🔧 Flakes don't work: "experimental features" error -**Pinning Importance:** -- Unpinned content may be garbage collected -- Pinning services keep content available -- Multiple pins = more redundancy +**Problem:** Flakes not enabled in your Nix configuration. + +**Solution:** +```bash +# Check if flakes are enabled +nix flake --help + +# If error, enable flakes: +mkdir -p ~/.config/nix +echo "experimental-features = nix-command flakes" >> ~/.config/nix/nix.conf + +# Restart terminal +```
---- +
+🔧 Build fails on macOS: "unsupported system" -### Task 5 — IPNS & Updates (3 pts) +**Problem:** Flake hardcodes `x86_64-linux` but you're on macOS. -**Objective:** Understand mutable content with IPNS. +**Solution:** Change the system in `flake.nix`: +```nix +# For Mac Intel: +system = "x86_64-darwin"; -**Requirements:** +# For Mac M1/M2/M3: +system = "aarch64-darwin"; +``` -1. **Understand IPNS** - - IPFS = immutable (content changes = new CID) - - IPNS = mutable pointer to IPFS content - - IPNS name stays same, content can change +
-2. **Explore 4EVERLAND Domains** - - Custom domains for your deployment - - How 4EVERLAND handles updates +
+🔧 "cannot build derivation: no builder for this system" + +**Problem:** Trying to build Linux binaries on macOS or vice versa. -3. **Update Deployment** - - Make changes to your static site - - Redeploy - - Observe: same URL, new CID +**Solution:** Either: +1. Match your system architecture in the flake +2. Use Docker builds which work cross-platform +3. Use Nix's cross-compilation features (advanced) + +
-💡 Hints +🔧 Don't have Lab 1/2 artifacts to use + +**No problem!** Create a minimal example: -**IPFS vs IPNS:** -- **IPFS CID:** `QmXxx...` - changes when content changes -- **IPNS Name:** `/ipns/k51xxx...` - stays same, points to current CID +1. **Create simple Flask app:** + ```python + # app.py + from flask import Flask, jsonify + app = Flask(__name__) -**4EVERLAND Handles This:** -- Your project URL stays constant -- Behind scenes, updates the IPNS pointer -- Users always get latest version + @app.route('/health') + def health(): + return jsonify({"status": "healthy"}) -**Domain Configuration:** -1. Dashboard → Hosting → Your Project -2. Settings → Domains -3. Add custom domain or use provided subdomain + if __name__ == '__main__': + app.run(host='0.0.0.0', port=5000) + ``` + +2. **Create requirements.txt:** + ``` + flask + ``` + +3. **Create basic Dockerfile:** + ```dockerfile + FROM python:3.13-slim + WORKDIR /app + COPY requirements.txt app.py ./ + RUN pip install -r requirements.txt + EXPOSE 5000 + CMD ["python", "app.py"] + ``` + +Now you can proceed with the lab using these minimal examples!
--- -### Task 6 — Documentation & Analysis (3 pts) +## How to Submit -**Objective:** Document your work and analyze decentralized hosting. +1. Create a branch for this lab and push it: -**Create `4EVERLAND.md` with:** + ```bash + git switch -c feature/lab18 + # create labs/submission18.md with your findings + git add labs/submission18.md labs/lab18/ + git commit -m "docs: add lab18 submission - Nix reproducible builds" + git push -u origin feature/lab18 + ``` -1. **Deployment Summary** - - What you deployed - - URLs (4EVERLAND and IPFS gateways) - - CIDs obtained +2. **Open a PR (GitHub) or MR (GitLab)** from your fork's `feature/lab18` branch → **course repository's main branch**. -2. **Screenshots** - - 4EVERLAND dashboard - - Deployed site - - Bucket storage - - Multiple gateway access +3. In the PR/MR description, include: -3. **Centralized vs Decentralized Comparison** + ```text + Platform: [GitHub / GitLab] -| Aspect | Traditional Hosting | IPFS/4EVERLAND | -|--------|---------------------|----------------| -| Content addressing | | | -| Single point of failure | | | -| Censorship resistance | | | -| Update mechanism | | | -| Cost model | | | -| Speed/latency | | | -| Best use cases | | | + - [x] Task 1 — Build Reproducible Artifacts from Scratch (6 pts) + - [x] Task 2 — Reproducible Docker Images with Nix (4 pts) + - [ ] Bonus Task — Modern Nix with Flakes (2 pts) [if completed] + ``` -4. **Use Case Analysis** - - When decentralized hosting makes sense - - When traditional hosting is better - - Your recommendations +4. **Copy the PR/MR URL** and submit it via **Moodle before the deadline**. --- -## Checklist +## Acceptance Criteria -- [ ] IPFS concepts understood -- [ ] Local IPFS node running -- [ ] Content added to local IPFS -- [ ] 4EVERLAND account created -- [ ] Static site deployed via 4EVERLAND -- [ ] Files uploaded to Bucket -- [ ] Content accessed via multiple gateways -- [ ] IPNS/updates understood -- [ ] `4EVERLAND.md` documentation complete -- [ ] Comparison analysis complete +- ✅ Branch `feature/lab18` exists with commits for each task +- ✅ File `labs/submission18.md` contains required outputs and analysis for all completed tasks +- ✅ Directory `labs/lab18/` contains your application code and Nix expressions +- ✅ Nix derivations successfully build reproducible artifacts +- ✅ Docker image built with Nix and compared to traditional Dockerfile +- ✅ Hash comparisons prove reproducibility +- ✅ **Bonus (if attempted):** `flake.nix` and `flake.lock` present and working +- ✅ PR/MR from `feature/lab18` → **course repo main branch** is open +- ✅ PR/MR link submitted via Moodle before the deadline --- -## Rubric - -| Criteria | Points | -|----------|--------| -| **IPFS Fundamentals** | 3 pts | -| **4EVERLAND Setup** | 3 pts | -| **Static Deployment** | 4 pts | -| **IPFS Pinning** | 4 pts | -| **IPNS & Updates** | 3 pts | -| **Documentation** | 3 pts | -| **Total** | **20 pts** | +## Rubric (12 pts max) -**Grading:** -- **18-20:** Excellent understanding, thorough deployment, insightful analysis -- **16-17:** Working deployment, good documentation -- **14-15:** Basic deployment, incomplete analysis -- **<14:** Incomplete deployment +| Criterion | Points | +| --------------------------------------------------- | -----: | +| Task 1 — Build Reproducible Artifacts from Scratch | **6** | +| Task 2 — Reproducible Docker Images with Nix | **4** | +| Bonus Task — Modern Nix with Flakes | **2** | +| **Total** | **12** | --- -## Resources +## Guidelines + +- Use clear Markdown headers to organize sections in `submission18.md` +- Include command outputs and written analysis for each task +- Explain WHY Nix provides better reproducibility than traditional tools +- Compare before/after results when proving reproducibility +- Document challenges encountered and how you solved them +- Include code snippets with explanations, not just paste
-📚 IPFS Documentation +📚 Helpful Resources + +**Official Documentation:** +- [nix.dev - Official tutorials](https://nix.dev/) +- [Zero to Nix - Beginner-friendly guide](https://zero-to-nix.com/) +- [Nix Pills - Deep dive](https://nixos.org/guides/nix-pills/) +- [NixOS Package Search](https://search.nixos.org/) + +**Docker with Nix:** +- [Building Docker images - nix.dev](https://nix.dev/tutorials/nixos/building-and-running-docker-images.html) +- [dockerTools reference](https://ryantm.github.io/nixpkgs/builders/images/dockertools/) + +**Flakes:** +- [Nix Flakes - NixOS Wiki](https://wiki.nixos.org/wiki/Flakes) +- [Flakes - Zero to Nix](https://zero-to-nix.com/concepts/flakes) +- [Practical Nix Flakes](https://serokell.io/blog/practical-nix-flakes) -- [IPFS Docs](https://docs.ipfs.tech/) -- [IPFS Concepts](https://docs.ipfs.tech/concepts/) -- [Content Addressing](https://docs.ipfs.tech/concepts/content-addressing/) -- [IPNS](https://docs.ipfs.tech/concepts/ipns/) +**Community:** +- [awesome-nix - Curated resources](https://github.com/nix-community/awesome-nix) +- [NixOS Discourse](https://discourse.nixos.org/)
-🌐 4EVERLAND +💡 Nix Tips -- [4EVERLAND Docs](https://docs.4everland.org/) -- [Hosting Guide](https://docs.4everland.org/hosting/overview) -- [Bucket (Storage)](https://docs.4everland.org/storage/bucket) +1. **Store paths are content-addressable:** Same inputs = same output hash +2. **Use `nix-shell -p pkg` for quick testing** before adding to derivations +3. **Garbage collect unused builds:** `nix-collect-garbage -d` +4. **Search for packages:** `nix search nixpkgs golang` +5. **Read error messages carefully:** Nix errors are verbose but informative +6. **Use `lib.fakeHash` initially** when you don't know the hash yet +7. **Avoid network access in builds:** Nix sandboxes block network by default +8. **Pin nixpkgs version** for maximum reproducibility
-🔗 Public Gateways +🔧 Troubleshooting + +**If Nix installation fails:** +- Ensure you have multi-user support (daemon mode recommended) +- Check `/nix` directory permissions +- Try the Determinate Systems installer instead of official + +**If builds fail with "hash mismatch":** +- Update the hash in your derivation to match the error message +- Use `lib.fakeHash` to discover the correct hash + +**If Docker load fails:** +- Verify result is a valid tarball: `file result` +- Check Docker daemon is running: `docker info` +- Try `docker load -i result` instead of `docker load < result` + +**If flakes don't work:** +- Ensure experimental features are enabled in `~/.config/nix/nix.conf` +- Run `nix flake check` to validate flake syntax +- Make sure your flake is in a git repository -- [IPFS Gateway Checker](https://ipfs.github.io/public-gateway-checker/) -- [Gateway List](https://docs.ipfs.tech/concepts/ipfs-gateway/#gateway-providers) +**If cross-machine builds differ:** +- Check nixpkgs input is locked in `flake.lock` +- Verify both machines use same Nix version +- Ensure no `created = "now"` or timestamps in image builds
---- +
+🎯 Understanding Reproducibility + +**What makes a build reproducible?** +- ✅ Deterministic inputs (exact versions, hashes) +- ✅ Isolated environment (no system dependencies) +- ✅ No timestamps or random values +- ✅ Same compiler, same flags, same libraries +- ✅ Content-addressable storage + +**Why traditional tools fail:** +```bash +# Docker - timestamps in layers +docker build . # Different timestamp = different image hash + +# npm - lockfiles help but aren't perfect +npm install # Still uses local cache, system libraries + +# apt/yum - version drift +apt-get install nodejs # Gets different version next week +``` -**Good luck!** 🌐 +**How Nix succeeds:** +```bash +# Nix - pure, sandboxed, content-addressed +nix-build # Same inputs = bit-for-bit identical output + # Today, tomorrow, on any machine +``` + +**Real-world impact:** +- **CI/CD:** No more "works on my machine" +- **Security:** Audit exact dependency tree +- **Rollback:** Atomic updates with perfect rollbacks +- **Collaboration:** Everyone gets identical environment + +
+ +
+🌟 Advanced Concepts (Optional Reading) + +**Content-Addressable Store:** +- Every package has a unique hash based on its inputs +- `/nix/store/abc123...` where `abc123` = hash of inputs +- Same inputs = same hash = reuse existing build + +**Sandboxing:** +- Builds run in isolated namespaces +- No network access (except for fixed-output derivations) +- No access to `/home`, `/tmp`, or system paths +- Only declared dependencies are available + +**Lazy Evaluation:** +- Nix expressions are lazily evaluated +- Only builds what's actually needed +- Enables massive codebase (all of nixpkgs) without performance issues + +**Binary Cache:** +- cache.nixos.org provides pre-built binaries +- If your build matches a cached hash, download instead of rebuild +- Set up private caches for your team + +**Cross-Compilation:** +- Nix makes cross-compilation trivial +- `pkgs.pkgsCross.aarch64-multiplatform.hello` +- Same reproducibility guarantees across architectures -> **Remember:** Decentralized hosting trades some convenience for resilience and censorship resistance. Content-addressed storage ensures integrity - the same content always has the same identifier. +
diff --git a/labs/lab18/index.html b/labs/lab18/index.html deleted file mode 100644 index b3de65bc8b..0000000000 --- a/labs/lab18/index.html +++ /dev/null @@ -1,927 +0,0 @@ - - - - - - DevOps Core Course | Production-Grade Practices - - - - - - - -
- -
- -
-
-
-
-
- 2026 Edition — 7th Year — Evolved every semester -
-

Master Production-Grade DevOps Practices

-

16 lectures and hands-on labs covering Kubernetes, GitOps, CI/CD, Monitoring, and beyond. 18 weeks of learning to build real-world skills.

- -
-
-
- -
-
-
-
7
-
Years Running
-
-
-
1000+
-
Students Trained
-
-
-
16
-
Lectures & Labs
-
-
-
18
-
Weeks of Learning
-
-
-
- -
-
-

Why This Course?

-

Build production-ready skills through hands-on practice with tools used by top tech companies worldwide.

-
-
-
-
-

Cloud-Native Architecture

-

Master Kubernetes, Helm, StatefulSets, and container orchestration for scalable deployments.

-
-
-
-

GitOps & Automation

-

Implement ArgoCD, Argo Rollouts, and progressive delivery for safe, automated deployments.

-
-
-
🔒
-

Security & Secrets

-

Learn HashiCorp Vault, Kubernetes Secrets, and secure configuration management practices.

-
-
-
📊
-

Observability

-

Build monitoring stacks with Prometheus, Grafana, Loki, and implement effective alerting.

-
-
-
-

Infrastructure as Code

-

Automate infrastructure with Terraform and Ansible for reproducible environments.

-
-
-
🌐
-

Beyond Kubernetes

-

Explore edge computing with Fly.io and decentralized hosting with IPFS and 4EVERLAND.

-
-
-
- -
-
-

Lectures & Labs

-

16 lectures with corresponding hands-on labs, plus 2 bonus labs as exam alternatives.

-
-
-
-
01
-
-

Web Application Development

-

Python/Go, Best Practices

-
-
-
-
02
-
-

Containerization

-

Docker, Multi-stage Builds

-
-
-
-
03
-
-

Continuous Integration

-

GitHub Actions, Snyk

-
-
-
-
04
-
-

Infrastructure as Code

-

Terraform, Cloud Providers

-
-
-
-
05
-
-

Configuration Management

-

Ansible Basics

-
-
-
-
06
-
-

Continuous Deployment

-

Ansible Advanced

-
-
-
-
07
-
-

Logging

-

Promtail, Loki, Grafana

-
-
-
-
08
-
-

Monitoring

-

Prometheus, Grafana

-
-
-
-
09
-
-

Kubernetes Basics

-

Minikube, Deployments, Services

-
-
-
-
10
-
-

Helm Charts

-

Templating, Hooks

-
-
-
-
11
-
-

Secrets Management

-

K8s Secrets, HashiCorp Vault

-
-
-
-
12
-
-

Configuration & Storage

-

ConfigMaps, PVCs

-
-
-
-
13
-
-

GitOps

-

ArgoCD

-
-
-
-
14
-
-

Progressive Delivery

-

Argo Rollouts

-
-
-
-
15
-
-

StatefulSets

-

Persistent Storage, Headless Services

-
-
-
-
16
-
-

Cluster Monitoring

-

Kube-Prometheus, Init Containers

-
-
-
-
17
-
-

Fly.io Edge Deployment

-

Global Distribution, PaaS

- Exam Alternative -
-
-
-
18
-
-

4EVERLAND & IPFS

-

Decentralized Hosting

- Exam Alternative -
-
-
-
- -
-
-

Learning Roadmap

-

A structured 16-week journey from foundations to advanced production patterns, plus 2 weeks for bonus labs or exam preparation.

-
-
-
-
- Phase - 1 -
-
-

Foundations (Weeks 1-6)

-

Build core skills in containerization, CI/CD, and infrastructure automation.

-
- Docker - GitHub Actions - Terraform - Ansible -
-
-
-
-
- Phase - 2 -
-
-

Observability (Weeks 7-8)

-

Master logging and monitoring for production visibility.

-
- Prometheus - Grafana - Loki - Alerting -
-
-
-
-
- Phase - 3 -
-
-

Kubernetes Core (Weeks 9-12)

-

Deep dive into Kubernetes orchestration and package management.

-
- Kubernetes - Helm - Secrets - ConfigMaps -
-
-
-
-
- Phase - 4 -
-
-

Advanced Patterns (Weeks 13-16)

-

Implement GitOps, progressive delivery, stateful workloads, and production monitoring.

-
- ArgoCD - Argo Rollouts - StatefulSets - Vault -
-
-
-
-
- Bonus - +2 -
-
-

Bonus Labs / Exam Prep (Weeks 17-18)

-

Complete exam alternative labs or prepare for the final exam.

-
- Fly.io - IPFS - 4EVERLAND - Edge Computing -
-
-
-
-
- -
-
-

Ready to Start Your DevOps Journey?

-

Join 1000+ students who have built production-ready skills through this battle-tested curriculum.

- - Get Started Free → - -
-
-
- -
-
-

© 2020–2026 DevOps Core Course. 7 years of continuous improvement. Open source educational content.

- -
-
- -
-
🌐
-
- Deployed on
- IPFS via 4EVERLAND -
-
- - diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml new file mode 100644 index 0000000000..477836502e --- /dev/null +++ b/monitoring/docker-compose.yml @@ -0,0 +1,92 @@ +services: + loki: + image: grafana/loki:3.0.0 + command: ["-config.file=/etc/loki/config.yml"] + ports: + - "3100:3100" + volumes: + - ./loki/config.yml:/etc/loki/config.yml:ro + - loki-data:/loki + networks: [logging] + deploy: + resources: + limits: + cpus: "1.00" + memory: 1024M + healthcheck: + test: ["CMD-SHELL", "wget -qO- http://localhost:3100/ready || exit 1"] + interval: 10s + timeout: 3s + retries: 10 + + promtail: + image: grafana/promtail:3.0.0 + command: ["-config.file=/etc/promtail/config.yml"] + ports: + - "9080:9080" + volumes: + - ./promtail/config.yml:/etc/promtail/config.yml:ro + - promtail-positions:/tmp + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + networks: [logging] + depends_on: [loki] + deploy: + resources: + limits: + cpus: "0.25" + memory: 256M + + grafana: + image: grafana/grafana:12.3.1 + ports: + - "3002:3002" + volumes: + - grafana-data:/var/lib/grafana + environment: + - GF_SERVER_HTTP_PORT=3002 + - GF_AUTH_ANONYMOUS_ENABLED=false + - GF_SECURITY_ADMIN_USER=${GRAFANA_ADMIN_USER} + - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD} + - GF_SECURITY_ALLOW_EMBEDDING=true + networks: [logging] + depends_on: [loki] + deploy: + resources: + limits: + cpus: "0.50" + memory: 512M + healthcheck: + test: ["CMD-SHELL", "wget -qO- http://localhost:3002/api/health | grep -q ok"] + interval: 10s + timeout: 5s + retries: 10 + + devops-info-service: + image: wkwtfigo/devops-info-service:latest + ports: + - "8000:5000" + environment: + - HOST=0.0.0.0 + - PORT=5000 + - DEBUG=FALSE + - SERVICE_NAME=devops-info-service + - SERVICE_VERSION=1.0.0 + labels: + logging: "promtail" + app: "devops-python" + networks: [logging] + depends_on: [loki] + deploy: + resources: + limits: + cpus: "0.50" + memory: 256M + +volumes: + loki-data: + grafana-data: + promtail-positions: + +networks: + logging: \ No newline at end of file diff --git a/monitoring/docs/LAB07.md b/monitoring/docs/LAB07.md new file mode 100644 index 0000000000..9c36bb45b3 --- /dev/null +++ b/monitoring/docs/LAB07.md @@ -0,0 +1,464 @@ +# LAB07 — Observability & Logging with Loki Stack + +## Architecture + +### Components +- **devops-info-service (FastAPI)** — application that writes structured JSON logs to stdout +- **Promtail** — collects container logs from Docker and ships them to Loki +- **Loki** — stores logs and provides LogQL query API +- **Grafana** — UI for log exploration and dashboards (Loki datasource) + +### Diagram (data flow) + +```text ++--------------------------+ +| devops-info-service | +| (Docker container) | +| JSON logs -> stdout | ++------------+-------------+ + | + | Docker logs (/var/lib/docker/containers/*/*.log) + v ++------------+-------------+ +| Promtail (Docker) | +| - docker_sd_configs | +| - filters: logging=... | +| - relabel: app, container| ++------------+-------------+ + | + | push HTTP + v ++------------+-------------+ +| Loki (Docker) | +| - TSDB storage | +| - retention 7d | ++------------+-------------+ + | + | LogQL queries + v ++--------------------------+ +| Grafana (Docker) | +| - Loki datasource | +| - Dashboard panels | ++--------------------------+ +``` + +## Setup Guide + +> Repository structure: +> +- `monitoring/docker-compose.yml` +- `monitoring/loki/config.yml` +- `monitoring/promtail/config.yml` +- `monitoring/docs/LAB07.md` + +### 1) Start stack + +From `monitoring/` directory: + +```bash +docker compose up -d +docker compose ps +``` + +### 2) Verify Loki/Promtail are reachable + +```bash +curl -s http://localhost:3100/ready +curl -s http://localhost:9080/targets +``` + +### 3) Open Grafana and add Loki datasource + +- Grafana: `http://localhost:3002` (port configured via `GF_SERVER_HTTP_PORT`) +- Add datasource: + - **Connections → Data sources → Loki** + - URL: `http://loki:3100` + - Save & Test + +### 4) Generate logs + +```bash +for i in {1..20}; do curl -s http://localhost:8000/ >/dev/null; done +for i in {1..20}; do curl -s http://localhost:8000/health >/dev/null; done +``` + +### 5) Confirm logs in Explore + +In Grafana → Explore → Loki: + +``` +{app="devops-python"} +``` + +**Screenshots:** + +![](/monitoring/docs/screenshots/grafana_logs.png) +![](/monitoring/docs/screenshots/grafana_containers.png) + +## Configuration + +**Goal:** local single-node Loki with TSDB + filesystem storage and retention. + +Snippet (`monitoring/loki/config.yml`): + +```yaml +auth_enabled: false + +schema_config: + configs: + - from: 2024-01-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +limits_config: + retention_period: 168h +``` + +**Why:** + +- `auth_enabled: false` — simplifies local lab setup +- `store: tsdb` + `object_store: filesystem` — single-node storage without external dependencies +- `retention_period: 168h` — required 7 days retention policy + +--- + +## Promtail config (highlights) + +**Goal:** scrape Docker container logs and attach meaningful labels (`app`, `container`). + +Snippet (`monitoring/promtail/config.yml`): + +```yaml +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + filters: + - name: label + values: ["logging=promtail"] + + relabel_configs: + - source_labels: ['__meta_docker_container_name'] + target_label: container + regex: '/(.*)' + replacement: '$1' + + - source_labels: ['__meta_docker_container_label_app'] + target_label: app +``` + +**Why:** + +- `docker_sd_configs` + `docker.sock` — dynamic discovery of containers +- `filters logging=promtail` — avoid collecting logs from every container (only targeted services) +- relabel rules: + - `container` label helps identify source container + - `app` label enables app-level queries (`{app="devops-python"}`) + +## Application Logging + +### JSON logging requirement + +The app logs are structured as JSON for easy parsing and querying in Loki/Grafana. + +**Implemented fields:** + +- `asctime`, `levelname`, `name`, `message` +- `service`, `version`, `hostname` +- request context (for HTTP requests): + - `method`, `path`, `status_code`, `client_ip`, `duration_ms` + +### Implementation approach + +- Use `python-json-logger` to format logs as JSON. +- Add `DefaultFieldsFilter` to prevent missing-field errors (ensures default values exist for log fields). +- Add FastAPI middleware to log each request with timing and status code. + +Snippet (`app.py`): + +```python +class DefaultFieldsFilter(logging.Filter): + def filter(self, record): + if not hasattr(record, "service"): + record.service = SERVICE_NAME + if not hasattr(record, "version"): + record.version = SERVICE_VERSION + if not hasattr(record, "hostname"): + record.hostname = socket.gethostname() + for k in ("method", "path", "status_code", "client_ip", "duration_ms"): + if not hasattr(record, k): + setattr(record, k, None) + return True +``` + +Middleware snippet: + +```python +@app.middleware("http") +async def access_log_middleware(request: Request, call_next): + start = time.perf_counter() + client_ip = request.client.host if request.client else "unknown" + response = await call_next(request) + duration_ms = int((time.perf_counter() - start) * 1000) + + logger.info( + "http_request", + extra={ + "service": SERVICE_NAME, + "version": SERVICE_VERSION, + "hostname": socket.gethostname(), + "method": request.method, + "path": request.url.path, + "status_code": response.status_code, + "client_ip": client_ip, + "duration_ms": duration_ms, + }, + ) + return response +``` + +**Screenshots:** + +![](/monitoring/docs/screenshots/log_line.png) + +## Dashboard + +Dashboard contains 4 panels required by the lab. + +### Panel 1 — Logs Table (all apps) + +Shows recent logs from all matching apps. + +Query: + +```graphql +{app=~"devops-.*"} +``` + +Explanation: + +- Uses regex to include multiple apps (python + bonus, if present). + +--- + +### Panel 2 — Request Rate (logs per second) + +Time series showing how many logs per second are produced per app. + +Query: + +```graphql +sum by (app) (rate({app=~"devops-.*"}[1m])) +``` + +Explanation: + +- `rate([...])` estimates log lines per second over the last minute. +- `sum by(app)` groups the series by application label. + +--- + +### Panel 3 — Error logs + +Shows only error logs. + +Query: + +```graphql +{app=~"devops-.*"} | json | level="error" +``` + +Alternative (if you store uppercase): + +```graphql +{app=~"devops-.*"} | json | level=~"error|ERROR" +``` + +Explanation: + +- Parse JSON (`| json`) and filter by severity field. + +--- + +### Panel 4 — Log level distribution + +Counts logs by severity level. + +Query: + +``` +sum by (level) (count_over_time({app=~"devops-.*"} | json [5m])) +``` + +Explanation: + +- `count_over_time` counts how many log lines over a time window. +- Group by the `level` field. + +**Screenshot:** + +![](/monitoring/docs/screenshots/dashboard.png) + +## Production Config + +### Security (Grafana authentication) + +Anonymous auth is disabled, admin user/password are set via `.env` (not committed). + +Snippet (`docker-compose.yml`): + +```yaml +environment: + - GF_AUTH_ANONYMOUS_ENABLED=false + - GF_SECURITY_ADMIN_USER=${GRAFANA_ADMIN_USER} + - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD} +``` + +`.env` example (NOT committed): + +``` +GRAFANA_ADMIN_USER=admin +GRAFANA_ADMIN_PASSWORD=******** +``` + +![](/monitoring/docs/screenshots/auth.png) + +### Resources + +Resource limits configured for each service. + +Snippet (`docker-compose.yml`): + +```yaml +deploy: + resources: + limits: + cpus: "0.50" + memory: 512M +``` + +### Retention + +Loki retention is configured to 7 days (`168h`). + +Snippet: + +```yaml +limits_config: + retention_period: 168h +``` + +### Healthchecks + +Loki and Grafana healthchecks ensure container health status is visible. + +Snippet: + +```yaml +healthcheck: + test: ["CMD-SHELL", "wget -qO- http://localhost:3100/ready || exit 1"] + interval: 10s + timeout: 3s + retries: 10 +``` + +Grafana: + +```yaml +healthcheck: + test: ["CMD-SHELL", "wget -qO- http://localhost:3002/api/health | grep -q ok"] + interval: 10s + timeout: 5s + retries: 10 +``` + +**Screenshot:** + +![](/monitoring/docs/screenshots/docker_compose_ps.png) + +## Testing + +### 1) Check containers are up + +```bash +docker compose ps +``` + +### 2) Health endpoints + +```bash +curl -s http://localhost:3100/ready +curl -s http://localhost:3002/api/health +``` + +### 3) Promtail targets + +```bash +curl -s http://localhost:9080/targets +``` + +### 4) Generate traffic and confirm logs + +```bash +curl -s http://localhost:8000/ >/dev/null +curl -s http://localhost:8000/health >/dev/null +``` + +### 5) Example LogQL queries + +1. All logs of python app: + +```graphql +{app="devops-python"} +``` + +![](/monitoring/docs/screenshots/query2.png) + +2. Only errors: + +```graphql +{app="devops-python"} | json | level="error" +``` + +![](/monitoring/docs/screenshots/query3.png) + +3. Filter by request method: + +```graphql +{app="devops-python"} | json | method="GET" +``` + +![](/monitoring/docs/screenshots/query1.png) + +## Challenges + +### 1) JSON logger crash due to missing fields + +**Problem:** `python-json-logger` formatter may crash if referenced fields are not present on every record. + +**Solution:** Added `DefaultFieldsFilter` that sets defaults for required fields (`method`, `path`, etc.) so that non-request logs (e.g., startup logs) do not break formatting. + +### 2) Error panel returned “No data” + +**Problem:** LogQL filtered by the wrong field name/value (e.g., using `levelname="ERROR"` while logs had `level="error"`). + +**Solution:** Verified field names in Grafana “Log line” view and updated LogQL to match actual JSON keys/values: + +```graphql +{app=~"devops-.*"} | json | level="error" +``` + +### 3) Collecting too many container logs + +**Problem:** Promtail scraped logs from all containers by default. + +**Solution:** Added docker discovery filter to only scrape containers labeled `logging=promtail`. \ No newline at end of file diff --git a/monitoring/docs/screenshots/auth.png b/monitoring/docs/screenshots/auth.png new file mode 100644 index 0000000000..ae70b6a696 Binary files /dev/null and b/monitoring/docs/screenshots/auth.png differ diff --git a/monitoring/docs/screenshots/dashboard.png b/monitoring/docs/screenshots/dashboard.png new file mode 100644 index 0000000000..35f97a73e0 Binary files /dev/null and b/monitoring/docs/screenshots/dashboard.png differ diff --git a/monitoring/docs/screenshots/docker_compose_ps.png b/monitoring/docs/screenshots/docker_compose_ps.png new file mode 100644 index 0000000000..897d93ebf2 Binary files /dev/null and b/monitoring/docs/screenshots/docker_compose_ps.png differ diff --git a/monitoring/docs/screenshots/grafana_containers.png b/monitoring/docs/screenshots/grafana_containers.png new file mode 100644 index 0000000000..b8d9c2cdc7 Binary files /dev/null and b/monitoring/docs/screenshots/grafana_containers.png differ diff --git a/monitoring/docs/screenshots/grafana_logs.png b/monitoring/docs/screenshots/grafana_logs.png new file mode 100644 index 0000000000..22c9523acb Binary files /dev/null and b/monitoring/docs/screenshots/grafana_logs.png differ diff --git a/monitoring/docs/screenshots/log_generation_1.png b/monitoring/docs/screenshots/log_generation_1.png new file mode 100644 index 0000000000..cb2922d4c3 Binary files /dev/null and b/monitoring/docs/screenshots/log_generation_1.png differ diff --git a/monitoring/docs/screenshots/log_line.png b/monitoring/docs/screenshots/log_line.png new file mode 100644 index 0000000000..c3c486302f Binary files /dev/null and b/monitoring/docs/screenshots/log_line.png differ diff --git a/monitoring/docs/screenshots/query1.png b/monitoring/docs/screenshots/query1.png new file mode 100644 index 0000000000..fd25745e13 Binary files /dev/null and b/monitoring/docs/screenshots/query1.png differ diff --git a/monitoring/docs/screenshots/query2.png b/monitoring/docs/screenshots/query2.png new file mode 100644 index 0000000000..cee3a97510 Binary files /dev/null and b/monitoring/docs/screenshots/query2.png differ diff --git a/monitoring/docs/screenshots/query3.png b/monitoring/docs/screenshots/query3.png new file mode 100644 index 0000000000..3e90a929d0 Binary files /dev/null and b/monitoring/docs/screenshots/query3.png differ diff --git a/monitoring/loki/config.yml b/monitoring/loki/config.yml new file mode 100644 index 0000000000..bb2f18056a --- /dev/null +++ b/monitoring/loki/config.yml @@ -0,0 +1,40 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + +common: + path_prefix: /loki + replication_factor: 1 + ring: + kvstore: + store: inmemory + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + +schema_config: + configs: + - from: 2024-01-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +storage_config: + filesystem: + directory: /loki/chunks + tsdb_shipper: + active_index_directory: /loki/tsdb-index + cache_location: /loki/tsdb-cache + +compactor: + working_directory: /loki/compactor + retention_enabled: true + delete_request_store: filesystem + +limits_config: + retention_period: 168h \ No newline at end of file diff --git a/monitoring/promtail/config.yml b/monitoring/promtail/config.yml new file mode 100644 index 0000000000..337b76f8e2 --- /dev/null +++ b/monitoring/promtail/config.yml @@ -0,0 +1,30 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + filters: + - name: label + values: ["logging=promtail"] + + relabel_configs: + - source_labels: ['__meta_docker_container_name'] + target_label: container + regex: '/(.*)' + replacement: '$1' + + - source_labels: ['__meta_docker_container_label_app'] + target_label: app + + - target_label: job + replacement: docker \ No newline at end of file diff --git a/pulumi/.gitignore b/pulumi/.gitignore new file mode 100644 index 0000000000..a3807e5bdb --- /dev/null +++ b/pulumi/.gitignore @@ -0,0 +1,2 @@ +*.pyc +venv/ diff --git a/pulumi/Pulumi.yaml b/pulumi/Pulumi.yaml new file mode 100644 index 0000000000..a7676ee10d --- /dev/null +++ b/pulumi/Pulumi.yaml @@ -0,0 +1,11 @@ +name: pulumi-yc +description: A minimal Python Pulumi program +runtime: + name: python + options: + toolchain: pip + virtualenv: venv +config: + pulumi:tags: + value: + pulumi:template: python diff --git a/pulumi/__main__.py b/pulumi/__main__.py new file mode 100644 index 0000000000..b8261ed35c --- /dev/null +++ b/pulumi/__main__.py @@ -0,0 +1,117 @@ +"""A Python Pulumi program""" + +import pulumi +import pulumi_yandex as yandex + +cfg = pulumi.Config() + +vm_name = cfg.get("vmName") or "lab-vm" +vm_user = cfg.get("vmUser") or "ubuntu" + +ssh_allow_cidr = cfg.require("sshAllowCidr") +ssh_public_key_path = cfg.require("sshPublicKeyPath") + +subnet_cidr = cfg.get("subnetCidr") or "10.10.0.0/24" + +cores = int(cfg.get("cores") or 2) +memory_gb = int(cfg.get("memoryGb") or 2) +core_fraction = int(cfg.get("coreFraction") or 20) + +labels = { + "project": "lab04", + "tool": "pulumi", +} + +# Ubuntu 24.04 image (family) +image = yandex.get_compute_image(family="ubuntu-2404-lts") + +net = yandex.VpcNetwork(f"{vm_name}-net", labels=labels) + +subnet = yandex.VpcSubnet( + f"{vm_name}-subnet", + network_id=net.id, + zone=cfg.get("yandex:zone") or "ru-central1-a", + v4_cidr_blocks=[subnet_cidr], + labels=labels, +) + +sg = yandex.VpcSecurityGroup( + f"{vm_name}-sg", + network_id=net.id, + labels=labels, + ingresses=[ + yandex.VpcSecurityGroupIngressArgs( + protocol="TCP", + port=22, + v4_cidr_blocks=[ssh_allow_cidr], + description="SSH from my IP", + ), + yandex.VpcSecurityGroupIngressArgs( + protocol="TCP", + port=80, + v4_cidr_blocks=["0.0.0.0/0"], + description="HTTP", + ), + yandex.VpcSecurityGroupIngressArgs( + protocol="TCP", + port=5000, + v4_cidr_blocks=["0.0.0.0/0"], + description="App port 5000", + ), + ], + egresses=[ + yandex.VpcSecurityGroupEgressArgs( + protocol="ANY", + v4_cidr_blocks=["0.0.0.0/0"], + description="Allow all egress", + ) + ], +) + +ip = yandex.VpcAddress( + f"{vm_name}-ip", + labels=labels, + external_ipv4_address=yandex.VpcAddressExternalIpv4AddressArgs( + zone_id=cfg.get("yandex:zone") or "ru-central1-a" + ), +) + +def ip_addr(ext): + if isinstance(ext, dict): + return ext.get("address") + return getattr(ext, "address", None) + +public_ip = ip.external_ipv4_address.apply(ip_addr) + +with open(ssh_public_key_path, "r", encoding="utf-8") as f: + ssh_pub = f.read().strip() + +vm = yandex.ComputeInstance( + vm_name, + labels=labels, + resources=yandex.ComputeInstanceResourcesArgs( + cores=cores, + memory=memory_gb, + core_fraction=core_fraction, + ), + boot_disk=yandex.ComputeInstanceBootDiskArgs( + initialize_params=yandex.ComputeInstanceBootDiskInitializeParamsArgs( + image_id=image.id, + size=20, + ) + ), + network_interfaces=[ + yandex.ComputeInstanceNetworkInterfaceArgs( + subnet_id=subnet.id, + nat=True, + nat_ip_address=public_ip, + security_group_ids=[sg.id], + ) + ], + metadata={ + "ssh-keys": f"{vm_user}:{ssh_pub}", + }, +) + +pulumi.export("public_ip", public_ip) +pulumi.export("ssh_command", pulumi.Output.concat("ssh -i ~/.ssh/id_ed25519 ", vm_user, "@", public_ip)) diff --git a/pulumi/requirements.txt b/pulumi/requirements.txt new file mode 100644 index 0000000000..bc4e43087b --- /dev/null +++ b/pulumi/requirements.txt @@ -0,0 +1 @@ +pulumi>=3.0.0,<4.0.0 diff --git a/terraform/.gitignore b/terraform/.gitignore new file mode 100644 index 0000000000..e62b514aea --- /dev/null +++ b/terraform/.gitignore @@ -0,0 +1,5 @@ +.terraform/ +terraform.tfstate +terraform.tfstate.* +*.tfvars +key.json diff --git a/terraform/.terraform.lock.hcl b/terraform/.terraform.lock.hcl new file mode 100644 index 0000000000..912d5475ba --- /dev/null +++ b/terraform/.terraform.lock.hcl @@ -0,0 +1,23 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/yandex-cloud/yandex" { + version = "0.186.0" + constraints = "~> 0.120" + hashes = [ + "h1:sR0wAL+16ZL2MK1+VcHb52hZ6J1W/sqiLx13+SoNFO4=", + "zh:000942a71dbe6a86a4b3ec38d34ed008ff515b9884da36c85b864ecc65ef49fc", + "zh:0a2cfab26d3ee2bda9d9e92a3a9661f2f58b68062b4f144901479586736b526b", + "zh:19a0fc96dd999e9c39039d87958b361cd146fa06772b050fa62fc74c193c853c", + "zh:399ed2db949a4f804694f83d67b5b47fdf3d21614a5498a6c19201be6d994c0c", + "zh:4cb8d0941b91c31869e4ad77e627ccf45858a1d0bf29443aadb1d50d156d7db6", + "zh:62d01b753c0da4bfb81fc2dbd53f8abd1110c1fad3c76ac4ce2653a8451bd81f", + "zh:697eda2f4fce12803c48f83bf3768ff8c3dd363989bd042e860bae7118101510", + "zh:8cbbfd83c195d242e56f4858b1e7d0bba2bf4322aacea7ae6f568061d00763c7", + "zh:a5f24b7667e5c21a6e48cced044ce0d748ff0023f51394d7ef09fa97ccddd5f4", + "zh:c2db88c26696a3e68e427234afa772b1212dda749a4f526d73a2d4b45eb1821b", + "zh:c70219f67973bf1708aa384b6e643adbd4a28494b150597831a4c60911c62bd1", + "zh:e2682659d66c8f0957641494415c69fd2f85b3aee4045278a58df9acf3a15b83", + "zh:e74cf2cf5b2b108c18ed8c2fe4e8ae2d686faeb0a91f4a7de96416c5e5afea97", + ] +} diff --git a/terraform/README.md b/terraform/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/terraform/main.tf b/terraform/main.tf new file mode 100644 index 0000000000..0327bec63f --- /dev/null +++ b/terraform/main.tf @@ -0,0 +1,93 @@ +provider "yandex" { + service_account_key_file = var.sa_key_path + cloud_id = var.cloud_id + folder_id = var.folder_id + zone = var.zone +} + +data "yandex_compute_image" "ubuntu" { + family = "ubuntu-2404-lts" +} + +resource "yandex_vpc_network" "net" { + name = "${var.vm_name}-net" + labels = var.labels +} + +resource "yandex_vpc_subnet" "subnet" { + name = "${var.vm_name}-subnet" + zone = var.zone + network_id = yandex_vpc_network.net.id + v4_cidr_blocks = [var.subnet_cidr] + labels = var.labels +} + +resource "yandex_vpc_security_group" "sg" { + name = "${var.vm_name}-sg" + network_id = yandex_vpc_network.net.id + labels = var.labels + + ingress { + protocol = "TCP" + description = "SSH from my IP" + v4_cidr_blocks = [var.ssh_allow_cidr] + port = 22 + } + + ingress { + protocol = "TCP" + description = "HTTP" + v4_cidr_blocks = ["0.0.0.0/0"] + port = 80 + } + + ingress { + protocol = "TCP" + description = "App port 5000" + v4_cidr_blocks = ["0.0.0.0/0"] + port = 5000 + } + + egress { + protocol = "ANY" + description = "Allow all egress" + v4_cidr_blocks = ["0.0.0.0/0"] + } +} + +resource "yandex_vpc_address" "public_ip" { + name = "${var.vm_name}-ip" + + external_ipv4_address { + zone_id = var.zone + } +} + +resource "yandex_compute_instance" "vm" { + name = var.vm_name + labels = var.labels + + resources { + cores = var.cores + memory = var.memory_gb + core_fraction = var.core_fraction + } + + boot_disk { + initialize_params { + image_id = data.yandex_compute_image.ubuntu.id + size = 20 + } + } + + network_interface { + subnet_id = yandex_vpc_subnet.subnet.id + nat = true + nat_ip_address = yandex_vpc_address.public_ip.external_ipv4_address[0].address + security_group_ids = [yandex_vpc_security_group.sg.id] + } + + metadata = { + ssh-keys = "${var.vm_user}:${file(var.ssh_public_key_path)}" + } +} diff --git a/terraform/outputs.tf b/terraform/outputs.tf new file mode 100644 index 0000000000..2ef914e1bd --- /dev/null +++ b/terraform/outputs.tf @@ -0,0 +1,7 @@ +output "public_ip" { + value = yandex_vpc_address.public_ip.external_ipv4_address[0].address +} + +output "ssh_command" { + value = "ssh -i ~/.ssh/id_ed25519 ${var.vm_user}@${yandex_vpc_address.public_ip.external_ipv4_address[0].address}" +} diff --git a/terraform/variables.tf b/terraform/variables.tf new file mode 100644 index 0000000000..af87c75254 --- /dev/null +++ b/terraform/variables.tf @@ -0,0 +1,62 @@ +variable "cloud_id" { + type = string +} + +variable "folder_id" { + type = string +} + +variable "zone" { + type = string + default = "ru-central1-a" +} + +variable "vm_name" { + type = string + default = "lab-vm" +} + +variable "vm_user" { + type = string + default = "ubuntu" +} + +variable "ssh_public_key_path" { + type = string +} + +variable "sa_key_path" { + type = string +} + +variable "ssh_allow_cidr" { + description = "188.130.155.177/32" + type = string +} + +variable "subnet_cidr" { + type = string + default = "10.10.0.0/24" +} + +variable "cores" { + type = number + default = 2 +} + +variable "memory_gb" { + type = number + default = 2 +} + +variable "core_fraction" { + type = number + default = 20 +} + +variable "labels" { + type = map(string) + default = { + project = "lab04" + } +} diff --git a/terraform/version.tf b/terraform/version.tf new file mode 100644 index 0000000000..e9239daeb1 --- /dev/null +++ b/terraform/version.tf @@ -0,0 +1,10 @@ +terraform { + required_version = ">= 1.5.0" + + required_providers { + yandex = { + source = "yandex-cloud/yandex" + version = "~> 0.120" + } + } +}