diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 7a0a02d..929ee5a 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -6,9 +6,7 @@ on: - master env: - # Use docker.io for Docker Hub if empty REGISTRY: ghcr.io - # github.repository as / IMAGE_NAME: ${{ github.repository }} jobs: @@ -18,49 +16,36 @@ jobs: permissions: contents: read packages: write - # This is used to complete the identity challenge - # with sigstore/fulcio when running outside of PRs. id-token: write steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v6 - # Install the cosign tool except on PR - # https://github.com/sigstore/cosign-installer - name: Install cosign if: github.event_name != 'pull_request' - uses: sigstore/cosign-installer@v3.3.0 - with: - cosign-release: 'v2.2.2' # optional + uses: sigstore/cosign-installer@v4 - # Workaround: https://github.com/docker/build-push-action/issues/461 - name: Setup Docker buildx - uses: docker/setup-buildx-action@79abd3f86f79a9d68a23c75a09a9a85889262adf + uses: docker/setup-buildx-action@v3 - # Login against a Docker registry except on PR - # https://github.com/docker/login-action - name: Log into registry ${{ env.REGISTRY }} if: github.event_name != 'pull_request' - uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c + uses: docker/login-action@v3 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - # Extract metadata (tags, labels) for Docker - # https://github.com/docker/metadata-action - name: Extract Docker metadata id: meta - uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 + uses: docker/metadata-action@v5 with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - # Build and push Docker image with Buildx (don't push on PR) - # https://github.com/docker/build-push-action - name: Build and push Docker image id: build-and-push - uses: docker/build-push-action@ac9327eae2b366085ac7f6a2d02df8aa8ead720a + uses: docker/build-push-action@v6 with: context: . platforms: linux/amd64,linux/arm64 @@ -70,16 +55,6 @@ jobs: cache-from: type=gha cache-to: type=gha,mode=max - - # Sign the resulting Docker image digest except on PRs. - # This will only write to the public Rekor transparency log when the Docker - # repository is public to avoid leaking data. If you would like to publish - # transparency data even for private images, pass --force to cosign below. - # https://github.com/sigstore/cosign - name: Sign the published Docker image if: ${{ github.event_name != 'pull_request' }} - env: - COSIGN_EXPERIMENTAL: "true" - # This step uses the identity token to provision an ephemeral certificate - # against the sigstore community Fulcio instance. run: echo "${{ steps.meta.outputs.tags }}" | xargs -I {} cosign sign --yes {}@${{ steps.build-and-push.outputs.digest }} diff --git a/.github/workflows/release_slim.yaml b/.github/workflows/release_slim.yaml deleted file mode 100644 index 9dcf124..0000000 --- a/.github/workflows/release_slim.yaml +++ /dev/null @@ -1,90 +0,0 @@ -name: release_slim - -on: - push: - branches: - - master - paths: - - 'Slim/**' - workflow_dispatch: -env: - # Use docker.io for Docker Hub if empty - REGISTRY: ghcr.io - # github.repository as / - IMAGE_NAME: ${{ github.repository }}-slim - -jobs: - build: - - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - # This is used to complete the identity challenge - # with sigstore/fulcio when running outside of PRs. - id-token: write - defaults: - run: - working-directory: ./Slim - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - # Install the cosign tool except on PR - # https://github.com/sigstore/cosign-installer - - name: Install cosign - if: github.event_name != 'pull_request' - uses: sigstore/cosign-installer@v3.3.0 - with: - cosign-release: 'v2.2.2' # optional - - # Workaround: https://github.com/docker/build-push-action/issues/461 - - name: Setup Docker buildx - uses: docker/setup-buildx-action@79abd3f86f79a9d68a23c75a09a9a85889262adf - - # Login against a Docker registry except on PR - # https://github.com/docker/login-action - - name: Log into registry ${{ env.REGISTRY }} - if: github.event_name != 'pull_request' - uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - # Extract metadata (tags, labels) for Docker - # https://github.com/docker/metadata-action - - name: Extract Docker metadata - id: meta - uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - - # Build and push Docker image with Buildx (don't push on PR) - # https://github.com/docker/build-push-action - - name: Build and push Docker image - id: build-and-push - uses: docker/build-push-action@ac9327eae2b366085ac7f6a2d02df8aa8ead720a - with: - context: ./Slim - file: ./Slim/Dockerfile - platforms: linux/amd64,linux/arm64 - push: ${{ github.event_name != 'pull_request' }} - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max - - - # Sign the resulting Docker image digest except on PRs. - # This will only write to the public Rekor transparency log when the Docker - # repository is public to avoid leaking data. If you would like to publish - # transparency data even for private images, pass --force to cosign below. - # https://github.com/sigstore/cosign - - name: Sign the published Docker image - if: ${{ github.event_name != 'pull_request' }} - env: - COSIGN_EXPERIMENTAL: "true" - # This step uses the identity token to provision an ephemeral certificate - # against the sigstore community Fulcio instance. - run: echo "${{ steps.meta.outputs.tags }}" | xargs -I {} cosign sign --yes {}@${{ steps.build-and-push.outputs.digest }} diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index e986919..abc33f4 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -1,9 +1,9 @@ name: test on: - pull_request: - paths-ignore: - - 'Slim/**' + pull_request: + paths-ignore: + - 'Slim/**' jobs: test: @@ -11,12 +11,13 @@ jobs: steps: - name: checkout - uses: actions/checkout@ec3a7ce113134d7a93b817d10a8272cb61118579 # v2.4.0 - with: - fetch-depth: 1 + uses: actions/checkout@v6 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 - name: build image - run: docker build -t doks-debug . + run: docker buildx build --platform linux/amd64 --load -t debug-pod . - name: smoke test - run: docker run --rm doks-debug sleep 1 + run: docker run --rm debug-pod curl --version | head -1 diff --git a/Dockerfile b/Dockerfile index 718e374..76d6277 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,46 +1,54 @@ -FROM debian:12 AS builder +FROM debian:13 AS builder -# this builder part is the work of Yury Muski, from https://github.com/yurymuski/curl-http3 -LABEL maintainer="Yury Muski " +# Build curl with HTTP/3 support using ngtcp2 (non-experimental) backend. +# Debian 13 ships OpenSSL 3.5 which has native QUIC API support for ngtcp2. +# https://github.com/curl/curl/blob/master/docs/HTTP3.md#ngtcp2-version WORKDIR /opt -ARG CURL_VERSION=curl-8_2_1 -# https://github.com/curl/curl/blob/master/docs/HTTP3.md#quiche-version -ARG QUICHE_VERSION=0.18.0 +ARG CURL_VERSION=curl-8_18_0 +ARG NGTCP2_VERSION=v1.20.0 +ARG NGHTTP3_VERSION=v1.15.0 RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get update && \ apt-get full-upgrade --auto-remove --purge -y && \ - apt-get install -y build-essential git autoconf libtool cmake golang-go curl libnghttp2-dev zlib1g-dev; + apt-get install -y build-essential git autoconf libtool pkg-config \ + libssl-dev libnghttp2-dev zlib1g-dev libpsl-dev; +# Build nghttp3 +RUN git clone -b $NGHTTP3_VERSION https://github.com/ngtcp2/nghttp3 && \ + cd nghttp3 && \ + git submodule update --init && \ + autoreconf -fi && \ + ./configure --prefix=/usr/local --enable-lib-only && \ + make --jobs=$(nproc) && \ + make install -# install rust & cargo -RUN curl https://sh.rustup.rs -sSf | sh -s -- -y -q; - -RUN git clone --recursive https://github.com/cloudflare/quiche - -# build quiche -RUN export PATH="$HOME/.cargo/bin:$PATH" && \ - cd quiche && \ - git checkout $QUICHE_VERSION && \ - cargo build --package quiche --release --features ffi,pkg-config-meta,qlog && \ - mkdir quiche/deps/boringssl/src/lib && \ - ln -vnf $(find target/release -name libcrypto.a -o -name libssl.a) quiche/deps/boringssl/src/lib/ - -# add curl -RUN git clone https://github.com/curl/curl -RUN cd curl && \ +# Build ngtcp2 (with system OpenSSL 3.5+) +RUN git clone -b $NGTCP2_VERSION https://github.com/ngtcp2/ngtcp2 && \ + cd ngtcp2 && \ + autoreconf -fi && \ + ./configure PKG_CONFIG_PATH=/usr/local/lib/pkgconfig \ + --prefix=/usr/local --enable-lib-only --with-openssl && \ + make --jobs=$(nproc) && \ + make install + +# Build curl with HTTP/3 (ngtcp2 + nghttp3) + HTTP/2 (nghttp2) + TLS (OpenSSL) +RUN git clone https://github.com/curl/curl && \ + cd curl && \ git checkout $CURL_VERSION && \ autoreconf -fi && \ - ./configure LDFLAGS="-Wl,-rpath,/opt/quiche/target/release" --with-openssl=/opt/quiche/quiche/deps/boringssl/src --with-quiche=/opt/quiche/target/release --with-nghttp2 --with-zlib && \ - make && \ - make DESTDIR="/debian/" install + ./configure PKG_CONFIG_PATH=/usr/local/lib/pkgconfig \ + --with-openssl --with-nghttp3 --with-ngtcp2 --with-nghttp2 --with-zlib && \ + make --jobs=$(nproc) && \ + make install + +FROM debian:13-slim -# match doks-debug version with DOKS worker node image version for kernel -# tooling compatibility reasons -FROM debian:stable-slim +# Specify the version of crictl to install +ARG CRICTL_VERSION="v1.33.0" LABEL org.opencontainers.image.source=https://github.com/nosportugal/debug-pod LABEL org.opencontainers.image.description="A debian image with some debugging tools installed." @@ -50,20 +58,17 @@ WORKDIR /root # use same dpkg path-exclude settings that come by default with ubuntu:focal # image that we previously used -RUN echo 'path-exclude=/usr/share/locale/*/LC_MESSAGES/*.mo' > /etc/dpkg/dpkg.cfg.d/excludes -RUN echo 'path-exclude=/usr/share/doc/*' > /etc/dpkg/dpkg.cfg.d/excludes -RUN echo 'path-include=/usr/share/doc/*/copyright' > /etc/dpkg/dpkg.cfg.d/excludes -RUN echo 'path-include=/usr/share/doc/*/changelog.Debian.*' > /etc/dpkg/dpkg.cfg.d/excludes - -RUN echo 'deb http://deb.debian.org/debian bullseye-backports main' > /etc/apt/sources.list.d/backports.list +RUN echo 'path-exclude=/usr/share/locale/*/LC_MESSAGES/*.mo' >> /etc/dpkg/dpkg.cfg.d/excludes +RUN echo 'path-exclude=/usr/share/doc/*' >> /etc/dpkg/dpkg.cfg.d/excludes +RUN echo 'path-include=/usr/share/doc/*/copyright' >> /etc/dpkg/dpkg.cfg.d/excludes +RUN echo 'path-include=/usr/share/doc/*/changelog.Debian.*' >> /etc/dpkg/dpkg.cfg.d/excludes RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get update && \ apt-get full-upgrade --auto-remove --purge -y && \ apt-get install -y \ - apt-transport-https \ ca-certificates \ - software-properties-common \ + curl \ httping \ man \ man-db \ @@ -72,7 +77,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ gnupg \ atop \ htop \ - dstat \ + sysstat \ jq \ dnsutils \ tcpdump \ @@ -91,26 +96,28 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ bpftool \ nmap \ redis-tools \ - kafkacat \ + kcat \ nghttp2 \ - zlib1g && \ + libpsl5t64 \ + zlib1g \ + wget && \ rm -rf /var/lib/apt/lists/* -COPY --from=builder /debian/usr/local/ /usr/local/ -COPY --from=builder /opt/quiche/target/release /opt/quiche/target/release +COPY --from=builder /usr/local/ /usr/local/ # Resolve any issues of C-level lib # location caches ("shared library cache") RUN ldconfig -RUN install -m 0755 -d /etc/apt/keyrings && \ - . /etc/os-release && \ - curl -fsSL "https://download.docker.com/linux/$ID/gpg" | gpg --dearmor -o "/etc/apt/keyrings/$ID.gpg" && \ - chmod a+r "/etc/apt/keyrings/$ID.gpg" && \ - echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/$ID.gpg] https://download.docker.com/linux/$ID $VERSION_CODENAME stable" | \ - tee /etc/apt/sources.list.d/docker.list > /dev/null && \ - apt-get update -qq && \ - apt-get install -y docker-ce +# Install crictl +RUN wget https://github.com/kubernetes-sigs/cri-tools/releases/download/${CRICTL_VERSION}/crictl-${CRICTL_VERSION}-linux-amd64.tar.gz && \ + tar zxvf crictl-${CRICTL_VERSION}-linux-amd64.tar.gz -C /usr/local/bin && \ + rm -f crictl-${CRICTL_VERSION}-linux-amd64.tar.gz + +# Specify the default image endpoint for crictl +RUN echo 'runtime-endpoint: unix:///run/containerd/containerd.sock' >> /etc/crictl.yaml +RUN echo 'image-endpoint: unix:///run/containerd/containerd.sock' >> /etc/crictl.yaml +RUN echo 'timeout: 2' >> /etc/crictl.yaml # for httpie RUN curl -SsL https://packages.httpie.io/deb/KEY.gpg | gpg --dearmor -o /usr/share/keyrings/httpie.gpg && \ diff --git a/LICENSE b/LICENSE index 9432ffc..2ce255c 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,7 @@ MIT License Copyright (c) 2021 DigitalOcean +Copyright (c) 2025-2026 NOS Portugal Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index f28338a..d1c5139 100644 --- a/README.md +++ b/README.md @@ -1,52 +1,261 @@ -# The ultimate debug pod +# debug-pod -A Docker image with Kubernetes toolink for investigation and troubleshooting your cluster. +> The ultimate Kubernetes debugging toolkit — a single container image packed with everything you need to investigate, diagnose, and troubleshoot your clusters. -![main build](https://github.com/digitalocean/doks-debug/actions/workflows/test.yaml/badge.svg) ![main release](https://github.com/digitalocean/doks-debug/actions/workflows/release.yaml/badge.svg) +Built on **Debian 13 (Trixie)** with a custom **curl 8.18** compiled with **HTTP/3 (QUIC)** support. -## Purpose +## What is this? -This is an image based on the DOKS team's pod, full of tooling to make diagnostics and tests inside a container/kubernetes pod. +When something goes wrong inside a Kubernetes cluster, you often need tools that aren't available in your application containers. Instead of installing dozens of packages into your workloads, just drop a debug pod into any namespace and start investigating immediately. -This way you won't have to install a bunch of tooling on your pods. +This image is maintained by [NOS Portugal](https://github.com/nosportugal) and originally inspired by DigitalOcean's [doks-debug](https://github.com/digitalocean/doks-debug) project. -## Usage +--- -The easiest way to start a pod in the current context and namespace is: +## Quick Start + +### One-liner + +Spin up an interactive debug shell in your current namespace: + +```bash +kubectl run --rm -it debug-pod \ + --pod-running-timeout=300s \ + --image=ghcr.io/nosportugal/debug-pod:master +``` + +### Shell alias (recommended) + +Add this to your shell profile (`~/.bashrc`, `~/.zshrc`, etc.) for one-word access: + +```bash +alias debug-pod='kubectl run --rm -it debug-pod --pod-running-timeout=300s --image=ghcr.io/nosportugal/debug-pod:master' +``` + +Then you can jump into any cluster and namespace instantly: ```bash -kubectl run --rm -it debug-pod --pod-running-timeout 300 --image=ghcr.io/nosportugal/debug-pod:master +debug-pod --context production -n my-app ``` -You can also have at hand this nice alias: +### Using `kubectl debug` (Kubernetes 1.25+) + +Attach a debug container to a running pod without restarting it: ```bash -alias debug-pod='kubectl run --rm -it debug-pod --pod-running-timeout 300 --image=ghcr.io/nosportugal/debug-pod:master' +kubectl debug -it \ + --image=ghcr.io/nosportugal/debug-pod:master \ + --target= ``` -Then you can do stuff from anywhere. The most useful example that I can think of is: +### Advanced: DaemonSet / Deployment + +For persistent debugging across nodes, use the provided manifests in the [`k8s/`](k8s/) directory: ```bash -debug-pod --context some-cluster -n some-namespace +# Deploy to every node as a DaemonSet (privileged, host-networked) +kubectl apply -f k8s/daemonset.yaml + +# Or deploy a single replica +kubectl apply -f k8s/deployment.yaml ``` +These manifests run in `kube-system` with `hostPID`, `hostNetwork`, `hostIPC`, and a host filesystem mount at `/host` — ideal for deep node-level debugging. They also mount the containerd socket for `crictl` access. + +--- + ## Tooling -Once you're in, you have access to the set of tools listed in the `Dockerfile`. This includes: - - - [`vim`](https://github.com/vim/vim) - is a greatly improved version of the good old UNIX editor Vi. - - [`screen`](https://www.gnu.org/software/screen/) - is a full-screen window manager that multiplexes a physical terminal between several processes, typically interactive shells. - - [`curl`](https://github.com/curl/curl) - is a command-line tool for transferring data specified with URL syntax. - - [`jq`](https://github.com/stedolan/jq) - is a lightweight and flexible command-line JSON processor. - - [`dnsutils`](https://packages.debian.org/stretch/dnsutils) - includes various client programs related to DNS that are derived from the BIND source tree, specifically [`dig`](https://linux.die.net/man/1/dig), [`nslookup`](https://linux.die.net/man/1/nslookup), and [`nsupdate`](https://linux.die.net/man/8/nsupdate). - - [`iputils-ping`](https://packages.debian.org/stretch/iputils-ping) - includes the [`ping`](https://linux.die.net/man/8/ping) tool that sends ICMP `ECHO_REQUEST` packets to a host in order to test if the host is reachable via the network. - - [`tcpdump`](https://www.tcpdump.org/) - a powerful command-line packet analyzer; and libpcap, a portable C/C++ library for network traffic capture. - - [`traceroute`](https://linux.die.net/man/8/traceroute) - tracks the route packets taken from an IP network on their way to a given host. - - [`net-tools`](https://packages.debian.org/stretch/net-tools) - includes the important tools for controlling the network subsystem of the Linux kernel, specifically [`arp`](http://man7.org/linux/man-pages/man8/arp.8.html), [`ifconfig`](https://linux.die.net/man/8/ifconfig), and [`netstat`](https://linux.die.net/man/8/netstat). - - [`netcat`](https://linux.die.net/man/1/nc) - is a multi-tool for interacting with TCP and UDP; it can open TCP connections, send UDP packets, listen on arbitrary TCP and UDP ports, do port scanning, and deal with both IPv4 and IPv6. - - [`iproute2`](https://wiki.linuxfoundation.org/networking/iproute2) - is a collection of utilities for controlling TCP / IP networking and traffic control in Linux. - - [`strace`](https://github.com/strace/strace) - is a diagnostic, debugging and instructional userspace utility with a traditional command-line interface for Linux. It is used to monitor and tamper with interactions between processes and the Linux kernel, which include system calls, signal deliveries, and changes of process state. - - [`docker`](https://docs.docker.com/engine/reference/commandline/cli/) - is the CLI tool used for interacting with Docker containers on the system. - - [`dstat`](http://dag.wiee.rs/home-made/dstat/) - is a versatile replacement for vmstat, iostat, netstat and ifstat. Dstat overcomes some of their limitations and adds some extra features, more counters and flexibility. Dstat is handy for monitoring systems during performance tuning tests, benchmarks or troubleshooting. - - [`htop`](https://hisham.hm/htop/) - is interactive process viewer for Unix systems. - - [`atop`](https://www.atoptool.nl/) - is an advanced interactive monitor for Linux-systems to view the load on system-level and process-level. +The image ships with **40+ tools** organized by category. Everything is ready to use out of the box. + +### HTTP & API + +| Tool | Description | +|------|-------------| +| [`curl`](https://github.com/curl/curl) | HTTP client built from source with **HTTP/3 (QUIC)** support via ngtcp2/nghttp3 and OpenSSL 3.5 | +| [`httpie`](https://httpie.io/) | User-friendly HTTP client with JSON support, syntax highlighting, and intuitive CLI | +| [`wget`](https://www.gnu.org/software/wget/) | File retrieval via HTTP, HTTPS, FTP, and FTPS | +| [`httpstat`](https://github.com/b4b4r07/httpstat) | curl statistics visualizer — shows DNS, TCP, TLS, and transfer timings at a glance | +| [`httping`](https://github.com/flok99/httping) | Measures HTTP(S) latency and throughput to a web server | + +### DNS & Network Diagnostics + +| Tool | Description | +|------|-------------| +| [`dnsutils`](https://packages.debian.org/trixie/dnsutils) | DNS client tools: `dig`, `nslookup`, `nsupdate` | +| [`iputils-ping`](https://packages.debian.org/trixie/iputils-ping) | `ping` — ICMP reachability testing | +| [`traceroute`](https://linux.die.net/man/8/traceroute) | Trace the route packets take to a host | +| [`mtr`](https://github.com/traviscross/mtr) | Combines `traceroute` and `ping` in a single real-time diagnostic | +| [`nmap`](https://nmap.org/) | Network exploration and port scanning | +| [`ncat`](https://nmap.org/ncat/) | Nmap's netcat — TCP/UDP connections, port scanning, proxying | +| [`telnet`](https://linux.die.net/man/1/telnet) | Quick TCP connectivity checks | +| [`tcpdump`](https://www.tcpdump.org/) | Packet capture and analysis | +| [`dsniff`](https://www.monkey.org/~dugsong/dsniff/) | Network auditing and penetration testing tools | + +### Network Configuration + +| Tool | Description | +|------|-------------| +| [`iproute2`](https://wiki.linuxfoundation.org/networking/iproute2) | Modern Linux networking: `ip`, `ss`, `tc`, `bridge` | +| [`net-tools`](https://packages.debian.org/trixie/net-tools) | Classic networking: `ifconfig`, `netstat`, `arp`, `route` | +| [`conntrack`](https://conntrack-tools.netfilter.org/) | Inspect and manage Netfilter connection tracking entries | + +### TLS & Security + +| Tool | Description | +|------|-------------| +| [`openssl`](https://www.openssl.org/) | TLS/SSL toolkit — inspect certificates, test connections, generate keys | +| [`gnupg`](https://gnupg.org/) | GPG encryption and signing | + +### Performance & Monitoring + +| Tool | Description | +|------|-------------| +| [`htop`](https://htop.dev/) | Interactive process viewer | +| [`atop`](https://www.atoptool.nl/) | Advanced system and process monitor with historical data | +| [`sysstat`](https://github.com/sysstat/sysstat) | `sar`, `iostat`, `mpstat`, `pidstat` — CPU, memory, I/O, and network stats | +| [`hey`](https://github.com/rakyll/hey) | HTTP load generator and benchmarking tool | +| [`speedtest`](https://www.speedtest.net/apps/cli) | Ookla's Speedtest CLI for internet bandwidth testing | + +### System & Process Debugging + +| Tool | Description | +|------|-------------| +| [`strace`](https://github.com/strace/strace) | Trace system calls and signals between processes and the kernel | +| [`bpftool`](https://github.com/libbpf/bpftool) | Inspect and manipulate eBPF programs and maps | +| [`psmisc`](https://gitlab.com/psmisc/psmisc) | Process utilities: `pstree`, `killall`, `fuser` | + +### Data Services + +| Tool | Description | +|------|-------------| +| [`kcat`](https://github.com/edenhill/kcat) | Apache Kafka producer and consumer (formerly kafkacat) | +| [`redis-tools`](https://redis.io/docs/getting-started/) | `redis-cli` — connect and interact with Redis instances | + +### Kubernetes & Container Runtime + +| Tool | Description | +|------|-------------| +| [`crictl`](https://github.com/kubernetes-sigs/cri-tools/blob/master/docs/crictl.md) | CRI-compatible container runtime CLI, pre-configured for containerd | + +### Cloud CLI + +| Tool | Description | +|------|-------------| +| [`az`](https://learn.microsoft.com/en-us/cli/azure/) | Azure CLI for managing Azure resources directly from the pod | + +### General Utilities + +| Tool | Description | +|------|-------------| +| [`vim`](https://github.com/vim/vim) | Powerful text editor | +| [`screen`](https://www.gnu.org/software/screen/) | Terminal multiplexer for managing multiple shell sessions | +| [`jq`](https://github.com/stedolan/jq) | Lightweight command-line JSON processor | +| [`man`](https://linux.die.net/man/) | Manual pages for installed tools | + +--- + +## Tips and Tricks + +### Access the host filesystem + +When running with the provided DaemonSet / Deployment manifests, the host root filesystem is mounted at `/host`. You can chroot into it to interact with the node directly: + +```bash +chroot /host /bin/bash +``` + +### Inspect kubelet and system services + +After chrooting into the host: + +```bash +systemctl status kubelet +journalctl -u kubelet --since "5 minutes ago" +journalctl -u containerd -f +``` + +### Test HTTP/3 (QUIC) connectivity + +```bash +curl --http3 -I https://cloudflare.com +curl --http3 -I https://google.com +``` + +### Inspect TLS certificates + +```bash +# Check a remote certificate +openssl s_client -connect example.com:443 -servername example.com /dev/null | openssl x509 -noout -text + +# Quick expiry check +openssl s_client -connect example.com:443 -servername example.com /dev/null | openssl x509 -noout -dates +``` + +### DNS troubleshooting + +```bash +dig example.com +dig @8.8.8.8 example.com +short +nslookup my-service.my-namespace.svc.cluster.local +``` + +### Capture and analyze traffic + +```bash +# Capture DNS traffic +tcpdump -i any port 53 -nn + +# Capture HTTP traffic to a specific host +tcpdump -i any host 10.0.0.1 and port 80 -A +``` + +### Kafka operations + +```bash +# List topics +kcat -b kafka-broker:9092 -L + +# Consume messages +kcat -b kafka-broker:9092 -t my-topic -C -o beginning -c 10 +``` + +### Container runtime inspection + +```bash +# List running containers +crictl ps + +# Get container logs +crictl logs + +# Inspect a pod sandbox +crictl inspectp +``` + +### HTTP load testing + +```bash +# Send 1000 requests with 50 concurrent workers +hey -n 1000 -c 50 https://my-service:8080/health +``` + +### Visualize HTTP timing + +```bash +httpstat https://my-service:8080/api/health +``` + +--- + +## Building locally + +```bash +docker buildx build --platform "linux/amd64" --output type=docker --tag debug-pod -f Dockerfile . +``` + +--- + +## Credits + +Originally forked from [digitalocean/doks-debug](https://github.com/digitalocean/doks-debug). Maintained by [NOS Portugal](https://github.com/nosportugal). diff --git a/Slim/Dockerfile b/Slim/Dockerfile deleted file mode 100644 index cf578a1..0000000 --- a/Slim/Dockerfile +++ /dev/null @@ -1,39 +0,0 @@ -FROM debian:12-slim - -LABEL maintainer="Slim Debug Pod" - -# Install essential network debugging tools -RUN export DEBIAN_FRONTEND=noninteractive && \ - apt-get update && \ - apt-get install -y --no-install-recommends \ - # Basic network tools - curl \ - wget \ - netcat-openbsd \ - telnet \ - iputils-ping \ - traceroute \ - dnsutils \ - iperf3 \ - socat \ - jq \ - openssl \ - # Analysis tools - tcpdump \ - net-tools \ - iproute2 \ - lsof \ - # Basic utilities - vim \ - nano \ - bash \ - procps && \ - # Cleanup to reduce size - apt-get clean && \ - rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* - -# Definir bash como shell padrão -SHELL ["/bin/bash", "-c"] - -# Keep container running for debugging -CMD ["tail", "-f", "/dev/null"] \ No newline at end of file diff --git a/k8s/daemonset.yaml b/k8s/daemonset.yaml index 6bd1527..201b794 100644 --- a/k8s/daemonset.yaml +++ b/k8s/daemonset.yaml @@ -1,31 +1,32 @@ apiVersion: apps/v1 kind: DaemonSet metadata: - name: doks-debug + name: debug-pod namespace: kube-system labels: - app: doks-debug + app: debug-pod spec: selector: matchLabels: - name: doks-debug + name: debug-pod template: metadata: labels: - name: doks-debug + name: debug-pod annotations: - clusterlint.digitalocean.com/disabled-checks: "hostpath-volume" + cluster-autoscaler.kubernetes.io/safe-to-evict: "true" spec: + dnsPolicy: ClusterFirstWithHostNet hostPID: true hostIPC: true hostNetwork: true tolerations: - operator: Exists containers: - - name: doks-debug + - name: debug-pod securityContext: privileged: true - image: digitalocean/doks-debug:latest + image: ghcr.io/nosportugal/debug-pod:master command: [ "sleep", "infinity" ] resources: requests: @@ -37,15 +38,16 @@ spec: volumeMounts: - name: host mountPath: /host - - name: docker - mountPath: /var/run/docker.sock + - name: containerd + mountPath: /run/containerd/containerd.sock + terminationGracePeriodSeconds: 0 volumes: - name: host hostPath: path: / - - name: docker + - name: containerd hostPath: - path: /var/run/docker.sock + path: /run/containerd/containerd.sock type: Socket updateStrategy: rollingUpdate: diff --git a/k8s/deployment.yaml b/k8s/deployment.yaml index 50816d0..a8b5a00 100644 --- a/k8s/deployment.yaml +++ b/k8s/deployment.yaml @@ -1,32 +1,33 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: doks-debug + name: debug-pod namespace: kube-system labels: - app: doks-debug + app: debug-pod spec: replicas: 1 selector: matchLabels: - name: doks-debug + name: debug-pod template: metadata: labels: - name: doks-debug + name: debug-pod annotations: - clusterlint.digitalocean.com/disabled-checks: "hostpath-volume" + cluster-autoscaler.kubernetes.io/safe-to-evict: "true" spec: + dnsPolicy: ClusterFirstWithHostNet hostPID: true hostIPC: true hostNetwork: true tolerations: - operator: Exists containers: - - name: doks-debug + - name: debug-pod securityContext: privileged: true - image: digitalocean/doks-debug:latest + image: ghcr.io/nosportugal/debug-pod:master command: [ "sleep", "infinity" ] resources: requests: @@ -38,15 +39,16 @@ spec: volumeMounts: - name: host mountPath: /host - - name: docker - mountPath: /var/run/docker.sock + - name: containerd + mountPath: /run/containerd/containerd.sock + terminationGracePeriodSeconds: 0 volumes: - name: host hostPath: path: / - - name: docker + - name: containerd hostPath: - path: /var/run/docker.sock + path: /run/containerd/containerd.sock type: Socket strategy: rollingUpdate: diff --git a/script/env b/script/env index 2f30365..4492b08 100755 --- a/script/env +++ b/script/env @@ -1,3 +1,3 @@ #!/usr/bin/env bash set -euo pipefail -export DOCKER_REPO="digitalocean/doks-debug" \ No newline at end of file +export DOCKER_REPO="nosportugal/debug-pod" \ No newline at end of file