diff --git a/talks/20170301_Container-Monitoring/README.md b/talks/20170301_Container-Monitoring/README.md new file mode 100644 index 0000000..c87c6ee --- /dev/null +++ b/talks/20170301_Container-Monitoring/README.md @@ -0,0 +1,53 @@ +# Overview + +## Prometheus + +Prometheus, a [Cloud Native Computing Foundation](https://cncf.io/) project, is a systems and service monitoring system. It collects metrics +from configured targets at given intervals, evaluates rule expressions, +displays the results, and can trigger alerts if some condition is observed +to be true. + +### Highlights + +- Consumes up to 800,000 metrics per second on a single server +- Static Go Binary +- Support both pull and push methods +- Large number of clients (cAdvisor, node-exporter, experimental docker support in 1.13) +- Supports advanced rule evaluation such as linear predictions and quantile analysis + +### Caveats + +- No down-sampling support +- No official long-term storage support (two week default) +- No clustering (must run multiple instances for redundancy) +- Must run separate alert manager (or depend on Grafana) + +### Pull versus Push + +- Both are scalable +- Adopt the model that makes sense in your environment +- Personally prefer pull, no reconfiguring of clients to deploy a new instance + +## Grafana + +Creates beautiful charts from time series databases, including Prometheus and other. + +### Highlights + +- Prefered Prometheus visualization tool +- Built-in alerting as of 4.0 +- Query syntax is identical to Prometheus +- Canned dashboards for cAdvisor, node-exporter, Redis etc available on their website + +## cAdvisor + +- Container exporter from Google +- Native Prometheus support at /metrics +- Provides network, storage, CPU and memory metrics per container for Prometheus +- Canned dashboard available for Grafana + +## Custom Endpoints + +- Build /metrics endpoints directly into your application +- Expose metrics on latency, number of calls and other metrics directly to Prometheus + diff --git a/talks/20170301_Container-Monitoring/docker-compose.yml b/talks/20170301_Container-Monitoring/docker-compose.yml new file mode 100644 index 0000000..5e2dd04 --- /dev/null +++ b/talks/20170301_Container-Monitoring/docker-compose.yml @@ -0,0 +1,34 @@ +version: '2' +services: + + prometheus: + image: prom/prometheus + ports: + - 19090:9090 + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml + # #- ./prometheus-data:/prometheus-data + # - ./prometheus-data:/prometheus/data + + + cadvisor: + image: google/cadvisor:latest + ports: + - 9080:8080 + + node-exporter: + image: prom/node-exporter + ports: + - 9100:9100 + + grafana: + image: grafana/grafana + ports: + - 3000:3000 + environment: + - GF_SECURITY_ADMIN_PASSWORD=secret + - GF_AUTH_ANONYMOUS_ENABLED=true + volumes: + - ./grafana:/var/lib/grafana + links: + - prometheus diff --git a/talks/20170301_Container-Monitoring/grafana/grafana.db b/talks/20170301_Container-Monitoring/grafana/grafana.db new file mode 100644 index 0000000..76d671c Binary files /dev/null and b/talks/20170301_Container-Monitoring/grafana/grafana.db differ diff --git a/talks/20170301_Container-Monitoring/grafana/sessions/1/f/1fcab8fe71c1164f b/talks/20170301_Container-Monitoring/grafana/sessions/1/f/1fcab8fe71c1164f new file mode 100644 index 0000000..d89e3ff Binary files /dev/null and b/talks/20170301_Container-Monitoring/grafana/sessions/1/f/1fcab8fe71c1164f differ diff --git a/talks/20170301_Container-Monitoring/prometheus.yml b/talks/20170301_Container-Monitoring/prometheus.yml new file mode 100644 index 0000000..c500100 --- /dev/null +++ b/talks/20170301_Container-Monitoring/prometheus.yml @@ -0,0 +1,45 @@ +# my global config +global: + scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. + evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. + # scrape_timeout is set to the global default (10s). + + # Attach these labels to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). + external_labels: + monitor: 'dcmap-monitor' + +# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. +rule_files: + # - "first.rules" + # - "second.rules" + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: + # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: 'prometheus' + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + scrape_interval: 60s + + static_configs: + - targets: ['prometheus:9090'] + + # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: 'cadvisor' + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + scrape_interval: 60s + + static_configs: + - targets: ['cadvisor:8080'] + + # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: 'node-exporter' + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + scrape_interval: 60s + + static_configs: + - targets: ['node-exporter:9100']