From 593b5832f22b52694548a2d9d7a2e2d694c9992d Mon Sep 17 00:00:00 2001 From: Andrew Wright Date: Fri, 3 Apr 2020 10:53:35 +1100 Subject: [PATCH 1/8] Start work on allowing metric configuration from file --- base/config.go | 88 +++++++++++++++++++++++-- base/controller.go | 161 ++++++++++++++++++++------------------------- helpers/string.go | 14 ++++ main.go | 5 +- rds/controller.go | 4 +- 5 files changed, 174 insertions(+), 98 deletions(-) diff --git a/base/config.go b/base/config.go index d907205..f961e5a 100644 --- a/base/config.go +++ b/base/config.go @@ -1,5 +1,39 @@ package base +import ( + "strings" + "time" + + "github.com/CoverGenius/cloudwatch-prometheus-exporter/helpers" + "github.com/aws/aws-sdk-go/service/cloudwatch" +) + +/* +Example config + +metrics: + AWS/ELB: + - metric: RequestCount + help: "This is some help about the metric" + dimensions: [AvailabilityZone, LoadBalancerName] + resource_type_selection: "elasticloadbalancing:loadbalancer" + resource_id_dimension: LoadBalancerName + statistics: [Sum] +*/ +type configMetric struct { + Metric string `yaml:"metric"` // The Cloudwatch metric to use + Help string `yaml:"help"` // Custom help text for the generated metric + Dimensions []*string `yaml:"dimensions"` // The resource dimensions to generate individual series for (via labels) + Statistics []*string `yaml:"statistics"` // List of AWS statistics to use. + OutputName string `yaml:"output_name"` // Allows override of the generate metric name + RangeSeconds int `yaml:"range_seconds"` // How far back to request data for in seconds. + PeriodSeconds int `yaml:"period_seconds"` // Granularity of results from cloudwatch API. +} + +type metric struct { + Data map[string][]*configMetric `yaml:",omitempty,inline"` // Map from namespace to list of metrics to scrape. +} + // Config represents the exporter configuration passed which is read at runtime from a YAML file. type Config struct { Listen string `yaml:"listen,omitempty"` // TCP Dial address for Prometheus HTTP API to listen on @@ -10,11 +44,55 @@ type Config struct { Regions []*string `yaml:"regions"` // Which AWS regions to query resources and metrics for PollInterval uint8 `yaml:"poll_interval,omitempty"` // How often to fetch new data from the Cloudwatch API. LogLevel uint8 `yaml:"log_level,omitempty"` // Logging verbosity level - Metrics metric `yaml:"metrics,omitempty"` // Map of per metric configuration overrides + Metrics metric `yaml:"metrics"` // Map of per metric configuration overrides } -type metric struct { - Data map[string]struct { - Period int `yaml:"length,omitempty"` // How far back to request data for in minutes. - } `yaml:",omitempty,inline"` +// LoadConfig reads the config file located at path and reads it into the Config struct +func LoadConfig(path string) (*Config, error) { + c := Config{} + helpers.YAMLDecode(&path, &c) + return &c, nil +} + +func (c *Config) ConstructMetrics() map[string]map[string]*MetricDescription { + mds := make(map[string]map[string]*MetricDescription) + for namespace, metrics := range c.Metrics.Data { + mds[namespace] = make(map[string]*MetricDescription) + for _, metric := range metrics { + + name := metric.OutputName + if name == "" { + name = helpers.ToSnakeCase(metric.Metric) + name = strings.ToLower(strings.TrimPrefix(namespace, "AWS/")) + "_" + name + } + + period := metric.PeriodSeconds + if period == 0 { + period = int(c.Period) * int(time.Minute) + } + + rangeSeconds := metric.RangeSeconds + if rangeSeconds == 0 { + rangeSeconds = int(c.Period) * int(time.Minute) + } + + // TODO one for each stat + // TODO read defaults for namespace + // TODO handle dimensions + mds[namespace][metric.Metric] = &MetricDescription{ + Help: &metric.Help, + OutputName: &name, + Dimensions: []*cloudwatch.Dimension{}, + PeriodSeconds: period, + RangeSeconds: rangeSeconds, + Statistics: metric.Statistics, + + namespace: &namespace, + awsMetric: &metric.Metric, + } + + } + + } + return mds } diff --git a/base/controller.go b/base/controller.go index 6d27843..755fe8d 100644 --- a/base/controller.go +++ b/base/controller.go @@ -25,6 +25,7 @@ import ( ) var ( + // TODO move this to the metric results = make(map[string]prometheus.Collector) ) @@ -49,12 +50,16 @@ type DimensionDescription struct { // MetricDescription describes a single Cloudwatch metric with one or more // statistics to be monitored for relevant resources type MetricDescription struct { - Help *string - OutputName *string - Dimensions []*cloudwatch.Dimension - Period int - Statistic []*string - Data map[string][]*string + Help *string + OutputName *string + Dimensions []*cloudwatch.Dimension + PeriodSeconds int + RangeSeconds int + Statistics []*string + + namespace *string + awsMetric *string + timestamps map[prometheus.Collector]*time.Time } // RegionDescription describes an AWS region which will be monitored via cloudwatch @@ -90,8 +95,6 @@ type ResourceDescription struct { Type *string Parent *NamespaceDescription Mutex sync.RWMutex - Query []*cloudwatch.MetricDataQuery - timestamps map[prometheus.Collector]*time.Time } func (md *MetricDescription) metricName(stat string) *string { @@ -113,17 +116,6 @@ func (md *MetricDescription) metricName(stat string) *string { return &name } -func (rd *RegionDescription) setRequestTime() error { - log.Debug("Setting request time ...") - td := TimeDescription{} - t := time.Now().Round(time.Minute * 5) - start := t.Add(time.Minute * -time.Duration(*rd.Period)) - td.StartTime = &start - td.EndTime = &t - rd.Time = &td - return nil -} - // BuildARN returns the AWS ARN of a resource in a region given the input service and resource func (rd *RegionDescription) BuildARN(s *string, r *string) (string, error) { a := arn.ARN{ @@ -200,13 +192,12 @@ func (rd *RegionDescription) CreateNamespaceDescriptions() error { // GatherMetrics queries the Cloudwatch API for metrics related to the resources in this region func (rd *RegionDescription) GatherMetrics(cw *cloudwatch.CloudWatch) { log.Infof("Gathering metrics for region %s...", *rd.Region) - rd.setRequestTime() ndc := make(chan *NamespaceDescription) for _, namespace := range rd.Namespaces { // Initialize metric containers if they don't already exist for _, metric := range namespace.Metrics { - for _, stat := range metric.Statistic { + for _, stat := range metric.Statistics { metric.initializeMetric(*stat) } } @@ -216,15 +207,13 @@ func (rd *RegionDescription) GatherMetrics(cw *cloudwatch.CloudWatch) { // GatherMetrics queries the Cloudwatch API for metrics related to this AWS namespace in the parent region func (nd *NamespaceDescription) GatherMetrics(cw *cloudwatch.CloudWatch, ndc chan *NamespaceDescription) { - for _, r := range nd.Resources { - resource := r - go func(rd *ResourceDescription, ndc chan *NamespaceDescription) { - resource.Parent = nd - result, err := resource.getData(cw) + for _, md := range nd.Metrics { + go func(md *MetricDescription, ndc chan *NamespaceDescription) { + result, err := md.getData(cw, nd.Resources) h.LogError(err) - resource.saveData(result) + md.saveData(result) ndc <- nd - }(resource, ndc) + }(md, ndc) } } @@ -275,48 +264,49 @@ func (rd *ResourceDescription) BuildDimensions(dd []*DimensionDescription) error } // BuildQuery constructs and saves the cloudwatch query for all the metrics associated with the resource -func (rd *ResourceDescription) BuildQuery() error { +func (md *MetricDescription) BuildQuery(rds []*ResourceDescription) ([]*cloudwatch.MetricDataQuery, error) { query := []*cloudwatch.MetricDataQuery{} - for key, value := range rd.Parent.Metrics { - dimensions := rd.Dimensions - dimensions = append(dimensions, value.Dimensions...) - for _, stat := range value.Statistic { + for _, resource := range rds { + dimensions := resource.Dimensions + dimensions = append(dimensions, md.Dimensions...) + for _, stat := range md.Statistics { + id := strings.ToLower(*stat + "-" + *resource.ID) cm := &cloudwatch.MetricDataQuery{ - Id: value.metricName(*stat), + Id: aws.String(strings.Replace(id, "-", "_", -1)), MetricStat: &cloudwatch.MetricStat{ Metric: &cloudwatch.Metric{ - MetricName: aws.String(key), - Namespace: rd.Parent.Namespace, + MetricName: md.awsMetric, + Namespace: md.namespace, Dimensions: dimensions, }, Stat: stat, - Period: aws.Int64(int64(value.Period)), + Period: aws.Int64(int64(md.PeriodSeconds) / int64(time.Minute)), }, // We hardcode the label so that we can rely on the ordering in // saveData. - Label: aws.String((&awsLabels{key, *stat}).String()), + Label: aws.String((&awsLabels{*stat, *resource.Name, *resource.ID, *resource.Type}).String()), ReturnData: aws.Bool(true), } query = append(query, cm) } } - rd.Query = query - - return nil + return query, nil } type awsLabels struct { - metric string statistic string + name string + id string + rType string } func (l *awsLabels) String() string { - return fmt.Sprintf("%s %s", l.metric, l.statistic) + return fmt.Sprintf("%s %s %s %s", l.statistic, l.name, l.id, l.rType) } func awsLabelsFromString(s string) (*awsLabels, error) { stringLabels := strings.Split(s, " ") - if len(stringLabels) < 2 { + if len(stringLabels) < 4 { return nil, fmt.Errorf("Expected at least two labels, got %s", s) } labels := awsLabels{ @@ -326,20 +316,27 @@ func awsLabelsFromString(s string) (*awsLabels, error) { return &labels, nil } -func (rd *ResourceDescription) saveData(c *cloudwatch.GetMetricDataOutput) { +func (md *MetricDescription) saveData(c *cloudwatch.GetMetricDataOutput) { for _, data := range c.MetricDataResults { if len(data.Values) <= 0 { continue } - values, err := rd.filterValues(data) - if len(values) <= 0 || err != nil { + labels, err := awsLabelsFromString(*data.Label) + if err != nil { h.LogError(err) continue } - labels, err := awsLabelsFromString(*data.Label) - if err != nil { + promLabels := prometheus.Labels{ + "name": labels.name, + "id": labels.id, + "type": labels.rType, + "region": "TODO", + } + + values, err := md.filterValues(data, &promLabels) + if len(values) <= 0 || err != nil { h.LogError(err) continue } @@ -364,7 +361,7 @@ func (rd *ResourceDescription) saveData(c *cloudwatch.GetMetricDataOutput) { continue } - err = rd.updateMetric(*data.Id, value) + err = md.updateMetric(*md.OutputName, value, &promLabels) if err != nil { h.LogError(err) continue @@ -372,52 +369,44 @@ func (rd *ResourceDescription) saveData(c *cloudwatch.GetMetricDataOutput) { } } -func (rd *ResourceDescription) filterValues(data *cloudwatch.MetricDataResult) ([]*float64, error) { +func (md *MetricDescription) filterValues(data *cloudwatch.MetricDataResult, labels *prometheus.Labels) ([]*float64, error) { // In the case of a counter we need to remove any datapoints which have // already been added to the counter, otherwise if the poll intervals // overlap we will double count some data. values := data.Values - if counter, ok := results[*data.Id].(*prometheus.CounterVec); ok == true { - counter, err := counter.GetMetricWith(prometheus.Labels{ - "name": *rd.Name, - "id": *rd.ID, - "type": *rd.Type, - "region": *rd.Parent.Parent.Region, - }) + // TODO remove the result object and store on metric instead + if counter, ok := results[*md.OutputName].(*prometheus.CounterVec); ok == true { + counter, err := counter.GetMetricWith(*labels) if err != nil { return nil, err } - rd.Mutex.Lock() - defer rd.Mutex.Unlock() - if rd.timestamps == nil { - rd.timestamps = make(map[prometheus.Collector]*time.Time) + //md.Mutex.Lock() + //defer md.Mutex.Unlock() + // TODO mutex? + if md.timestamps == nil { + md.timestamps = make(map[prometheus.Collector]*time.Time) } - if lastTimestamp, ok := rd.timestamps[counter]; ok == true { + if lastTimestamp, ok := md.timestamps[counter]; ok == true { values = h.NewValues(data.Values, data.Timestamps, *lastTimestamp) } if len(values) > 0 { // AWS returns the data in descending order - rd.timestamps[counter] = data.Timestamps[0] + md.timestamps[counter] = data.Timestamps[0] } } return values, nil } -func (rd *ResourceDescription) updateMetric(name string, value float64) error { - labels := prometheus.Labels{ - "name": *rd.Name, - "id": *rd.ID, - "type": *rd.Type, - "region": *rd.Parent.Parent.Region, - } - rd.Parent.Parent.Mutex.Lock() - defer rd.Parent.Parent.Mutex.Unlock() +func (md *MetricDescription) updateMetric(name string, value float64, labels *prometheus.Labels) error { + // TODO mutex? + //rd.Parent.Parent.Mutex.Lock() + //defer rd.Parent.Parent.Mutex.Unlock() if metric, ok := results[name]; ok == true { switch m := metric.(type) { case *prometheus.GaugeVec: - m.With(labels).Set(value) + m.With(*labels).Set(value) case *prometheus.CounterVec: - m.With(labels).Add(value) + m.With(*labels).Add(value) default: return fmt.Errorf("Could not resolve type of metric %s", name) } @@ -503,23 +492,17 @@ func (rd *RegionDescription) TagsFound(tl interface{}) bool { return false } -func (rd *ResourceDescription) getData(cw *cloudwatch.CloudWatch) (*cloudwatch.GetMetricDataOutput, error) { - rd.BuildQuery() +func (md *MetricDescription) getData(cw *cloudwatch.CloudWatch, rds []*ResourceDescription) (*cloudwatch.GetMetricDataOutput, error) { + query, err := md.BuildQuery(rds) + h.LogError(err) - startTime := rd.Parent.Parent.Time.StartTime - if val, ok := rd.Parent.Parent.Config.Metrics.Data[*rd.Parent.Namespace]; ok { - // Some resources don't have any data for a while (e.g. S3), in these cases the Period parameter - // can be used to override the window used when querying the Cloudwatch API. - if val.Period > 0 { - time := rd.Parent.Parent.Time.EndTime.Add(-time.Duration(val.Period) * time.Minute) - startTime = &time - } - } + t := time.Now().Round(time.Minute * 5) + start := t.Add(-time.Duration(md.RangeSeconds)) input := cloudwatch.GetMetricDataInput{ - StartTime: startTime, - EndTime: rd.Parent.Parent.Time.EndTime, - MetricDataQueries: rd.Query, + StartTime: &start, + EndTime: &t, + MetricDataQueries: query, } result, err := cw.GetMetricData(&input) h.LogError(err) diff --git a/helpers/string.go b/helpers/string.go index adf4b94..05ed0a3 100644 --- a/helpers/string.go +++ b/helpers/string.go @@ -1,5 +1,10 @@ package helpers +import ( + "regexp" + "strings" +) + // StringPointers converts a slice of string values into a slice of string pointers // // This function complements aws.StringSlice but works with variadic arguments so that an array literal is not required. @@ -10,3 +15,12 @@ func StringPointers(strings ...string) []*string { } return sp } + +var matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)") +var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])") + +func ToSnakeCase(str string) string { + snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}") + snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}") + return strings.ToLower(snake) +} diff --git a/main.go b/main.go index c60cfd5..49763d0 100644 --- a/main.go +++ b/main.go @@ -33,7 +33,7 @@ func init() { flag.StringVar(&config, "config", "config.yaml", "Path to config file") } -func run(nd map[string]*base.NamespaceDescription, cw *cloudwatch.CloudWatch, rd *base.RegionDescription, pi uint8) { +func run(nd map[string]*base.NamespaceDescription, cw *cloudwatch.CloudWatch, rd *base.RegionDescription, pi uint8, cfg map[string]map[string]*base.MetricDescription) { var delay uint8 = 0 for { select { @@ -93,6 +93,7 @@ func processConfig(p *string) *base.Config { func main() { flag.Parse() c := processConfig(&config) + mds := c.ConstructMetrics() for _, region := range c.Regions { r := region @@ -104,7 +105,7 @@ func main() { rdd = append(rdd, &rd) rd.Init(session, c.Tags, r, &c.Period) - go run(rd.Namespaces, cw, &rd, c.PollInterval) + go run(rd.Namespaces, cw, &rd, c.PollInterval, mds) } http.Handle("/metrics", promhttp.Handler()) diff --git a/rds/controller.go b/rds/controller.go index 96f2d90..eb995d7 100644 --- a/rds/controller.go +++ b/rds/controller.go @@ -34,11 +34,11 @@ func CreateResourceDescription(nd *b.NamespaceDescription, dbi *rds.DBInstance) } // CreateResourceList fetches a list of all RDS databases in the region -func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) error { +func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup, metrics map[string]*b.MetricDescription) error { defer wg.Done() log.Debug("Creating RDS resource list ...") nd.Resources = []*b.ResourceDescription{} - nd.Metrics = GetMetrics() + nd.Metrics = metrics session := rds.New(nd.Parent.Session) input := rds.DescribeDBInstancesInput{} result, err := session.DescribeDBInstances(&input) From cb4001efe5a132050713c29776a40df8c6032bab Mon Sep 17 00:00:00 2001 From: Andrew Wright Date: Fri, 3 Apr 2020 13:40:49 +1100 Subject: [PATCH 2/8] One stat per metric description --- base/config.go | 38 ++++++++------- base/controller.go | 114 +++++++++++++++++++-------------------------- rds/controller.go | 2 +- 3 files changed, 69 insertions(+), 85 deletions(-) diff --git a/base/config.go b/base/config.go index f961e5a..691fcea 100644 --- a/base/config.go +++ b/base/config.go @@ -54,10 +54,10 @@ func LoadConfig(path string) (*Config, error) { return &c, nil } -func (c *Config) ConstructMetrics() map[string]map[string]*MetricDescription { - mds := make(map[string]map[string]*MetricDescription) +func (c *Config) ConstructMetrics() map[string][]*MetricDescription { + mds := make(map[string][]*MetricDescription) for namespace, metrics := range c.Metrics.Data { - mds[namespace] = make(map[string]*MetricDescription) + mds[namespace] = []*MetricDescription{} for _, metric := range metrics { name := metric.OutputName @@ -76,23 +76,27 @@ func (c *Config) ConstructMetrics() map[string]map[string]*MetricDescription { rangeSeconds = int(c.Period) * int(time.Minute) } - // TODO one for each stat - // TODO read defaults for namespace - // TODO handle dimensions - mds[namespace][metric.Metric] = &MetricDescription{ - Help: &metric.Help, - OutputName: &name, - Dimensions: []*cloudwatch.Dimension{}, - PeriodSeconds: period, - RangeSeconds: rangeSeconds, - Statistics: metric.Statistics, - - namespace: &namespace, - awsMetric: &metric.Metric, + // TODO check this works + if metric.Statistics == nil || len(metric.Statistics) < 1 { + metric.Statistics = helpers.StringPointers("Average") } - } + for _, stat := range metric.Statistics { + // TODO read defaults for namespace + // TODO handle dimensions + mds[namespace] = append(mds[namespace], &MetricDescription{ + Help: &metric.Help, + OutputName: &name, + Dimensions: []*cloudwatch.Dimension{}, + PeriodSeconds: period, + RangeSeconds: rangeSeconds, + Statistic: stat, + namespace: &namespace, + awsMetric: &metric.Metric, + }) + } + } } return mds } diff --git a/base/controller.go b/base/controller.go index 755fe8d..ad215f0 100644 --- a/base/controller.go +++ b/base/controller.go @@ -24,17 +24,6 @@ import ( log "github.com/sirupsen/logrus" ) -var ( - // TODO move this to the metric - results = make(map[string]prometheus.Collector) -) - -// TimeDescription represents an interval with a specific start and finish time -type TimeDescription struct { - StartTime *time.Time - EndTime *time.Time -} - // TagDescription represents an AWS tag key value pair type TagDescription struct { Key *string `yaml:"name"` @@ -55,11 +44,12 @@ type MetricDescription struct { Dimensions []*cloudwatch.Dimension PeriodSeconds int RangeSeconds int - Statistics []*string + Statistic *string namespace *string awsMetric *string timestamps map[prometheus.Collector]*time.Time + promMetric prometheus.Collector } // RegionDescription describes an AWS region which will be monitored via cloudwatch @@ -71,7 +61,6 @@ type RegionDescription struct { AccountID *string Filters []*ec2.Filter Namespaces map[string]*NamespaceDescription - Time *TimeDescription Mutex sync.RWMutex Period *uint8 } @@ -81,9 +70,9 @@ type RegionDescription struct { type NamespaceDescription struct { Namespace *string Resources []*ResourceDescription - Parent *RegionDescription Mutex sync.RWMutex - Metrics map[string]*MetricDescription + Parent *RegionDescription + Metrics []*MetricDescription } // ResourceDescription describes a single AWS resource which will be monitored via @@ -93,13 +82,12 @@ type ResourceDescription struct { ID *string Dimensions []*cloudwatch.Dimension Type *string - Parent *NamespaceDescription Mutex sync.RWMutex } -func (md *MetricDescription) metricName(stat string) *string { +func (md *MetricDescription) metricName() *string { suffix := "" - switch stat { + switch *md.Statistic { case "Average": // For backwards compatibility we have to omit the _avg suffix = "" @@ -197,9 +185,7 @@ func (rd *RegionDescription) GatherMetrics(cw *cloudwatch.CloudWatch) { for _, namespace := range rd.Namespaces { // Initialize metric containers if they don't already exist for _, metric := range namespace.Metrics { - for _, stat := range metric.Statistics { - metric.initializeMetric(*stat) - } + metric.initializeMetric() } go namespace.GatherMetrics(cw, ndc) } @@ -217,10 +203,10 @@ func (nd *NamespaceDescription) GatherMetrics(cw *cloudwatch.CloudWatch, ndc cha } } -func (md *MetricDescription) initializeMetric(stat string) { - name := *md.metricName(stat) - if _, ok := results[name]; ok == true { - // metric is already initialized +func (md *MetricDescription) initializeMetric() { + stat := *md.Statistic + name := *md.metricName() + if md.promMetric != nil { return } @@ -242,7 +228,7 @@ func (md *MetricDescription) initializeMetric(stat string) { []string{"name", "id", "type", "region"}, ) } - results[name] = promMetric + md.promMetric = promMetric if err := prometheus.Register(promMetric); err != nil { log.Fatalf("Error registering metric %s: %s", name, err) } @@ -269,49 +255,48 @@ func (md *MetricDescription) BuildQuery(rds []*ResourceDescription) ([]*cloudwat for _, resource := range rds { dimensions := resource.Dimensions dimensions = append(dimensions, md.Dimensions...) - for _, stat := range md.Statistics { - id := strings.ToLower(*stat + "-" + *resource.ID) - cm := &cloudwatch.MetricDataQuery{ - Id: aws.String(strings.Replace(id, "-", "_", -1)), - MetricStat: &cloudwatch.MetricStat{ - Metric: &cloudwatch.Metric{ - MetricName: md.awsMetric, - Namespace: md.namespace, - Dimensions: dimensions, - }, - Stat: stat, - Period: aws.Int64(int64(md.PeriodSeconds) / int64(time.Minute)), + // TODO clean this up + id := strings.ToLower(*resource.ID) + cm := &cloudwatch.MetricDataQuery{ + Id: aws.String(strings.Replace(id, "-", "_", -1)), + MetricStat: &cloudwatch.MetricStat{ + Metric: &cloudwatch.Metric{ + MetricName: md.awsMetric, + Namespace: md.namespace, + Dimensions: dimensions, }, - // We hardcode the label so that we can rely on the ordering in - // saveData. - Label: aws.String((&awsLabels{*stat, *resource.Name, *resource.ID, *resource.Type}).String()), - ReturnData: aws.Bool(true), - } - query = append(query, cm) + Stat: md.Statistic, + Period: aws.Int64(int64(md.PeriodSeconds) / int64(time.Minute)), + }, + // We hardcode the label so that we can rely on the ordering in + // saveData. + Label: aws.String((&awsLabels{*resource.Name, *resource.ID, *resource.Type}).String()), + ReturnData: aws.Bool(true), } + query = append(query, cm) } return query, nil } type awsLabels struct { - statistic string - name string - id string - rType string + name string + id string + rType string } func (l *awsLabels) String() string { - return fmt.Sprintf("%s %s %s %s", l.statistic, l.name, l.id, l.rType) + return fmt.Sprintf("%s %s %s", l.name, l.id, l.rType) } func awsLabelsFromString(s string) (*awsLabels, error) { stringLabels := strings.Split(s, " ") - if len(stringLabels) < 4 { + if len(stringLabels) < 3 { return nil, fmt.Errorf("Expected at least two labels, got %s", s) } labels := awsLabels{ - metric: stringLabels[len(stringLabels)-2], - statistic: stringLabels[len(stringLabels)-1], + name: stringLabels[len(stringLabels)-3], + id: stringLabels[len(stringLabels)-2], + rType: stringLabels[len(stringLabels)-1], } return &labels, nil } @@ -342,7 +327,7 @@ func (md *MetricDescription) saveData(c *cloudwatch.GetMetricDataOutput) { } value := 0.0 - switch labels.statistic { + switch *md.Statistic { case "Average": value, err = h.Average(values) case "Sum": @@ -354,7 +339,7 @@ func (md *MetricDescription) saveData(c *cloudwatch.GetMetricDataOutput) { case "SampleCount": value, err = h.Sum(values) default: - err = fmt.Errorf("Unknown Statistic type: %s", labels.statistic) + err = fmt.Errorf("Unknown Statistic type: %s", *md.Statistic) } if err != nil { h.LogError(err) @@ -374,8 +359,7 @@ func (md *MetricDescription) filterValues(data *cloudwatch.MetricDataResult, lab // already been added to the counter, otherwise if the poll intervals // overlap we will double count some data. values := data.Values - // TODO remove the result object and store on metric instead - if counter, ok := results[*md.OutputName].(*prometheus.CounterVec); ok == true { + if counter, ok := md.promMetric.(*prometheus.CounterVec); ok == true { counter, err := counter.GetMetricWith(*labels) if err != nil { return nil, err @@ -401,17 +385,13 @@ func (md *MetricDescription) updateMetric(name string, value float64, labels *pr // TODO mutex? //rd.Parent.Parent.Mutex.Lock() //defer rd.Parent.Parent.Mutex.Unlock() - if metric, ok := results[name]; ok == true { - switch m := metric.(type) { - case *prometheus.GaugeVec: - m.With(*labels).Set(value) - case *prometheus.CounterVec: - m.With(*labels).Add(value) - default: - return fmt.Errorf("Could not resolve type of metric %s", name) - } - } else { - return fmt.Errorf("Couldn't save metric %s", name) + switch m := md.promMetric.(type) { + case *prometheus.GaugeVec: + m.With(*labels).Set(value) + case *prometheus.CounterVec: + m.With(*labels).Add(value) + default: + return fmt.Errorf("Could not resolve type of metric %s", name) } return nil } diff --git a/rds/controller.go b/rds/controller.go index eb995d7..12ae7c0 100644 --- a/rds/controller.go +++ b/rds/controller.go @@ -34,7 +34,7 @@ func CreateResourceDescription(nd *b.NamespaceDescription, dbi *rds.DBInstance) } // CreateResourceList fetches a list of all RDS databases in the region -func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup, metrics map[string]*b.MetricDescription) error { +func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup, metrics []*b.MetricDescription) error { defer wg.Done() log.Debug("Creating RDS resource list ...") nd.Resources = []*b.ResourceDescription{} From 31440a831e41b4bdf7fbd9afdbecf86b23ababa7 Mon Sep 17 00:00:00 2001 From: Andrew Wright Date: Fri, 3 Apr 2020 15:05:34 +1100 Subject: [PATCH 3/8] Default metrics --- base/config.go | 24 +++++++++++++++---- base/controller.go | 57 ++++++++++++++++++++++++++-------------------- main.go | 10 +++++--- rds/controller.go | 3 +-- 4 files changed, 59 insertions(+), 35 deletions(-) diff --git a/base/config.go b/base/config.go index 691fcea..a97fdc3 100644 --- a/base/config.go +++ b/base/config.go @@ -21,7 +21,7 @@ metrics: statistics: [Sum] */ type configMetric struct { - Metric string `yaml:"metric"` // The Cloudwatch metric to use + AWSMetric string `yaml:"metric"` // The Cloudwatch metric to use Help string `yaml:"help"` // Custom help text for the generated metric Dimensions []*string `yaml:"dimensions"` // The resource dimensions to generate individual series for (via labels) Statistics []*string `yaml:"statistics"` // List of AWS statistics to use. @@ -54,15 +54,27 @@ func LoadConfig(path string) (*Config, error) { return &c, nil } -func (c *Config) ConstructMetrics() map[string][]*MetricDescription { +func (c *Config) ConstructMetrics(defaults map[string]map[string]*string) map[string][]*MetricDescription { mds := make(map[string][]*MetricDescription) for namespace, metrics := range c.Metrics.Data { + + if len(metrics) <= 0 { + if n_defaults, ok := defaults[namespace]; ok == true { + for key, help := range n_defaults { + metrics = append(metrics, &configMetric{ + AWSMetric: key, + Help: *help, + }) + } + } + } + mds[namespace] = []*MetricDescription{} for _, metric := range metrics { name := metric.OutputName if name == "" { - name = helpers.ToSnakeCase(metric.Metric) + name = helpers.ToSnakeCase(metric.AWSMetric) name = strings.ToLower(strings.TrimPrefix(namespace, "AWS/")) + "_" + name } @@ -82,8 +94,10 @@ func (c *Config) ConstructMetrics() map[string][]*MetricDescription { } for _, stat := range metric.Statistics { - // TODO read defaults for namespace + // TODO read defaults for namespace (the metrics) // TODO handle dimensions + // TODO move metricName function here / apply to output name + // TODO create new metric function (which inits metrics?) mds[namespace] = append(mds[namespace], &MetricDescription{ Help: &metric.Help, OutputName: &name, @@ -93,7 +107,7 @@ func (c *Config) ConstructMetrics() map[string][]*MetricDescription { Statistic: stat, namespace: &namespace, - awsMetric: &metric.Metric, + awsMetric: &metric.AWSMetric, }) } } diff --git a/base/controller.go b/base/controller.go index ad215f0..b7e16bf 100644 --- a/base/controller.go +++ b/base/controller.go @@ -50,6 +50,7 @@ type MetricDescription struct { awsMetric *string timestamps map[prometheus.Collector]*time.Time promMetric prometheus.Collector + mutex sync.RWMutex } // RegionDescription describes an AWS region which will be monitored via cloudwatch @@ -62,7 +63,6 @@ type RegionDescription struct { Filters []*ec2.Filter Namespaces map[string]*NamespaceDescription Mutex sync.RWMutex - Period *uint8 } // NamespaceDescription describes an AWS namespace to be monitored via cloudwatch @@ -83,6 +83,7 @@ type ResourceDescription struct { Dimensions []*cloudwatch.Dimension Type *string Mutex sync.RWMutex + Region *string } func (md *MetricDescription) metricName() *string { @@ -143,9 +144,8 @@ func (rd *RegionDescription) saveAccountID() error { // Init initializes a region and its nested namspaces in preparation for collection // cloudwatchc metrics for that region. -func (rd *RegionDescription) Init(s *session.Session, td []*TagDescription, r *string, p *uint8) error { +func (rd *RegionDescription) Init(s *session.Session, td []*TagDescription, r *string) error { log.Infof("Initializing region %s ...", *r) - rd.Period = p rd.Session = s rd.Tags = td rd.Region = r @@ -231,6 +231,8 @@ func (md *MetricDescription) initializeMetric() { md.promMetric = promMetric if err := prometheus.Register(promMetric); err != nil { log.Fatalf("Error registering metric %s: %s", name, err) + } else { + log.Debugf("Registered metric %s", name) } } @@ -249,16 +251,20 @@ func (rd *ResourceDescription) BuildDimensions(dd []*DimensionDescription) error return nil } +func (rd *ResourceDescription) queryID() *string { + // Cloudwatch calls need a snake-case-unique-id + id := strings.ToLower(*rd.ID) + return aws.String(strings.Replace(id, "-", "_", -1)) +} + // BuildQuery constructs and saves the cloudwatch query for all the metrics associated with the resource func (md *MetricDescription) BuildQuery(rds []*ResourceDescription) ([]*cloudwatch.MetricDataQuery, error) { query := []*cloudwatch.MetricDataQuery{} for _, resource := range rds { dimensions := resource.Dimensions dimensions = append(dimensions, md.Dimensions...) - // TODO clean this up - id := strings.ToLower(*resource.ID) cm := &cloudwatch.MetricDataQuery{ - Id: aws.String(strings.Replace(id, "-", "_", -1)), + Id: resource.queryID(), MetricStat: &cloudwatch.MetricStat{ Metric: &cloudwatch.Metric{ MetricName: md.awsMetric, @@ -270,7 +276,7 @@ func (md *MetricDescription) BuildQuery(rds []*ResourceDescription) ([]*cloudwat }, // We hardcode the label so that we can rely on the ordering in // saveData. - Label: aws.String((&awsLabels{*resource.Name, *resource.ID, *resource.Type}).String()), + Label: aws.String((&awsLabels{*resource.Name, *resource.ID, *resource.Type, *resource.Region}).String()), ReturnData: aws.Bool(true), } query = append(query, cm) @@ -279,24 +285,26 @@ func (md *MetricDescription) BuildQuery(rds []*ResourceDescription) ([]*cloudwat } type awsLabels struct { - name string - id string - rType string + name string + id string + rType string + region string } func (l *awsLabels) String() string { - return fmt.Sprintf("%s %s %s", l.name, l.id, l.rType) + return fmt.Sprintf("%s %s %s %s", l.name, l.id, l.rType, l.region) } func awsLabelsFromString(s string) (*awsLabels, error) { stringLabels := strings.Split(s, " ") - if len(stringLabels) < 3 { + if len(stringLabels) < 4 { return nil, fmt.Errorf("Expected at least two labels, got %s", s) } labels := awsLabels{ - name: stringLabels[len(stringLabels)-3], - id: stringLabels[len(stringLabels)-2], - rType: stringLabels[len(stringLabels)-1], + name: stringLabels[len(stringLabels)-4], + id: stringLabels[len(stringLabels)-3], + rType: stringLabels[len(stringLabels)-2], + region: stringLabels[len(stringLabels)-1], } return &labels, nil } @@ -317,7 +325,7 @@ func (md *MetricDescription) saveData(c *cloudwatch.GetMetricDataOutput) { "name": labels.name, "id": labels.id, "type": labels.rType, - "region": "TODO", + "region": labels.region, } values, err := md.filterValues(data, &promLabels) @@ -346,7 +354,7 @@ func (md *MetricDescription) saveData(c *cloudwatch.GetMetricDataOutput) { continue } - err = md.updateMetric(*md.OutputName, value, &promLabels) + err = md.updateMetric(value, &promLabels) if err != nil { h.LogError(err) continue @@ -364,9 +372,8 @@ func (md *MetricDescription) filterValues(data *cloudwatch.MetricDataResult, lab if err != nil { return nil, err } - //md.Mutex.Lock() - //defer md.Mutex.Unlock() - // TODO mutex? + md.mutex.Lock() + defer md.mutex.Unlock() if md.timestamps == nil { md.timestamps = make(map[prometheus.Collector]*time.Time) } @@ -381,17 +388,14 @@ func (md *MetricDescription) filterValues(data *cloudwatch.MetricDataResult, lab return values, nil } -func (md *MetricDescription) updateMetric(name string, value float64, labels *prometheus.Labels) error { - // TODO mutex? - //rd.Parent.Parent.Mutex.Lock() - //defer rd.Parent.Parent.Mutex.Unlock() +func (md *MetricDescription) updateMetric(value float64, labels *prometheus.Labels) error { switch m := md.promMetric.(type) { case *prometheus.GaugeVec: m.With(*labels).Set(value) case *prometheus.CounterVec: m.With(*labels).Add(value) default: - return fmt.Errorf("Could not resolve type of metric %s", name) + return fmt.Errorf("Could not resolve type of metric %s", *md.OutputName) } return nil } @@ -474,6 +478,9 @@ func (rd *RegionDescription) TagsFound(tl interface{}) bool { func (md *MetricDescription) getData(cw *cloudwatch.CloudWatch, rds []*ResourceDescription) (*cloudwatch.GetMetricDataOutput, error) { query, err := md.BuildQuery(rds) + if len(query) < 1 { + return &cloudwatch.GetMetricDataOutput{}, nil + } h.LogError(err) t := time.Now().Round(time.Minute * 5) diff --git a/main.go b/main.go index 49763d0..61dc912 100644 --- a/main.go +++ b/main.go @@ -33,7 +33,7 @@ func init() { flag.StringVar(&config, "config", "config.yaml", "Path to config file") } -func run(nd map[string]*base.NamespaceDescription, cw *cloudwatch.CloudWatch, rd *base.RegionDescription, pi uint8, cfg map[string]map[string]*base.MetricDescription) { +func run(nd map[string]*base.NamespaceDescription, cw *cloudwatch.CloudWatch, rd *base.RegionDescription, pi uint8, cfg map[string][]*base.MetricDescription) { var delay uint8 = 0 for { select { @@ -93,7 +93,11 @@ func processConfig(p *string) *base.Config { func main() { flag.Parse() c := processConfig(&config) - mds := c.ConstructMetrics() + defaults := map[string]map[string]*string{ + "AWS/RDS": rds.Metrics, + } + + mds := c.ConstructMetrics(defaults) for _, region := range c.Regions { r := region @@ -103,7 +107,7 @@ func main() { Config: c, } rdd = append(rdd, &rd) - rd.Init(session, c.Tags, r, &c.Period) + rd.Init(session, c.Tags, r) go run(rd.Namespaces, cw, &rd, c.PollInterval, mds) } diff --git a/rds/controller.go b/rds/controller.go index 12ae7c0..9fd60f4 100644 --- a/rds/controller.go +++ b/rds/controller.go @@ -24,8 +24,7 @@ func CreateResourceDescription(nd *b.NamespaceDescription, dbi *rds.DBInstance) rd.ID = dbi.DBInstanceIdentifier rd.Name = dbi.DBInstanceIdentifier rd.Type = aws.String("rds") - rd.Parent = nd - rd.BuildQuery() + rd.Region = nd.Parent.Region nd.Mutex.Lock() nd.Resources = append(nd.Resources, &rd) nd.Mutex.Unlock() From 60b6f000213155ae15b5405af028a8277c9fd8b6 Mon Sep 17 00:00:00 2001 From: Andrew Wright Date: Fri, 3 Apr 2020 15:23:03 +1100 Subject: [PATCH 4/8] testing diff --- helpers/string.go | 1 + main.go | 24 ++-- rds/metrics.go | 275 ++++++---------------------------------------- 3 files changed, 41 insertions(+), 259 deletions(-) diff --git a/helpers/string.go b/helpers/string.go index 05ed0a3..c7dffee 100644 --- a/helpers/string.go +++ b/helpers/string.go @@ -16,6 +16,7 @@ func StringPointers(strings ...string) []*string { return sp } +// TODO fix this function up var matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)") var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])") diff --git a/main.go b/main.go index 61dc912..087c544 100644 --- a/main.go +++ b/main.go @@ -10,14 +10,8 @@ import ( "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/CoverGenius/cloudwatch-prometheus-exporter/base" - "github.com/CoverGenius/cloudwatch-prometheus-exporter/ec2" - "github.com/CoverGenius/cloudwatch-prometheus-exporter/elasticache" - "github.com/CoverGenius/cloudwatch-prometheus-exporter/elb" - "github.com/CoverGenius/cloudwatch-prometheus-exporter/elbv2" h "github.com/CoverGenius/cloudwatch-prometheus-exporter/helpers" - "github.com/CoverGenius/cloudwatch-prometheus-exporter/network" "github.com/CoverGenius/cloudwatch-prometheus-exporter/rds" - "github.com/CoverGenius/cloudwatch-prometheus-exporter/s3" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/cloudwatch" @@ -39,16 +33,16 @@ func run(nd map[string]*base.NamespaceDescription, cw *cloudwatch.CloudWatch, rd select { case <-time.After(time.Duration(delay) * time.Minute): var wg sync.WaitGroup - wg.Add(8) + wg.Add(1) log.Debug("Creating list of resources ...") - go elasticache.CreateResourceList(nd["AWS/ElastiCache"], &wg) - go rds.CreateResourceList(nd["AWS/RDS"], &wg) - go ec2.CreateResourceList(nd["AWS/EC2"], &wg) - go network.CreateResourceList(nd["AWS/NATGateway"], &wg) - go elb.CreateResourceList(nd["AWS/ELB"], &wg) - go elbv2.CreateResourceList(nd["AWS/ApplicationELB"], &wg) - go elbv2.CreateResourceList(nd["AWS/NetworkELB"], &wg) - go s3.CreateResourceList(nd["AWS/S3"], &wg) + //go elasticache.CreateResourceList(nd["AWS/ElastiCache"], &wg) + go rds.CreateResourceList(nd["AWS/RDS"], &wg, cfg["AWS/RDS"]) + //go ec2.CreateResourceList(nd["AWS/EC2"], &wg) + //go network.CreateResourceList(nd["AWS/NATGateway"], &wg) + //go elb.CreateResourceList(nd["AWS/ELB"], &wg) + //go elbv2.CreateResourceList(nd["AWS/ApplicationELB"], &wg) + //go elbv2.CreateResourceList(nd["AWS/NetworkELB"], &wg) + //go s3.CreateResourceList(nd["AWS/S3"], &wg) wg.Wait() log.Debug("Gathering metrics ...") delay = pi diff --git a/rds/metrics.go b/rds/metrics.go index 677a506..0ed5e49 100644 --- a/rds/metrics.go +++ b/rds/metrics.go @@ -1,248 +1,35 @@ package rds -import ( - b "github.com/CoverGenius/cloudwatch-prometheus-exporter/base" - h "github.com/CoverGenius/cloudwatch-prometheus-exporter/helpers" - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/service/cloudwatch" -) +import "github.com/aws/aws-sdk-go/aws" -var metrics = map[string]*b.MetricDescription{ - "BinLogDiskUsage": { - Help: aws.String("The amount of disk space occupied by binary logs on the master. Applies to MySQL read replicas"), - OutputName: aws.String("rds_bin_log_disk_usage"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Maximum"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "BurstBalance": { - Help: aws.String("The percent of General Purpose SSD (gp2) burst-bucket I/O credits available"), - OutputName: aws.String("rds_burst_balance"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Minimum"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "CPUCreditBalance": { - Help: aws.String("The number of earned CPU credits that an instance has accrued. This represents the number of credits currently available."), - OutputName: aws.String("rds_cpu_credit_balance"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Minimum"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "CPUCreditUsage": { - Help: aws.String("The number of CPU credits spent by the instance for CPU utilization. One CPU credit equals one vCPU running at 100 percent utilization for one minute or an equivalent combination of vCPUs, utilization, and time"), - OutputName: aws.String("rds_cpu_credit_usage"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "CPUSurplusCreditBalance": { - Help: aws.String("The number of surplus credits that have been spent by an unlimited instance when its CPUCreditBalance value is zero"), - OutputName: aws.String("rds_cpu_surplus_credit_balance"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "CPUSurplusCreditsCharged": { - Help: aws.String("The number of spent surplus credits that are not paid down by earned CPU credits, and which thus incur an additional charge"), - OutputName: aws.String("rds_cpu_surplus_credits_charged"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "CPUUtilization": { - Help: aws.String("The percentage of CPU utilization"), - OutputName: aws.String("rds_cpu_utilization"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Maximum"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "DatabaseConnections": { - Help: aws.String("The number of database connections in use"), - OutputName: aws.String("rds_database_connections"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Maximum"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "DBLoad": { - Help: aws.String("The number of active sessions for the DB engine. Typically, you want the data for the average number of active sessions"), - OutputName: aws.String("rds_db_load"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "DBLoadCPU": { - Help: aws.String("The number of active sessions where the wait event type is CPU"), - OutputName: aws.String("rds_db_load_cpu"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "DBLoadNonCPU": { - Help: aws.String("The number of active sessions where the wait event type is not CPU"), - OutputName: aws.String("rds_db_load_non_cpu"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "DiskQueueDepth": { - Help: aws.String("The number of outstanding IOs (read/write requests) waiting to access the disk"), - OutputName: aws.String("rds_disk_queue_depth"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Maximum"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "FreeableMemory": { - Help: aws.String("The amount of available random access memory"), - OutputName: aws.String("rds_freeable_memory"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "FreeStorageSpace": { - Help: aws.String("The amount of available storage space"), - OutputName: aws.String("rds_free_storage_space"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "MaximumUsedTransactionIDs": { - Help: aws.String("The maximum transaction ID that has been used. Applies to PostgreSQL"), - OutputName: aws.String("rds_maximum_used_transaction_ids"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Maximum"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "NetworkReceiveThroughput": { - Help: aws.String("The incoming (Receive) network traffic on the DB instance, including both customer database traffic and Amazon RDS traffic used for monitoring and replication"), - OutputName: aws.String("rds_network_receive_throughput"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "NetworkTransmitThroughput": { - Help: aws.String("The outgoing (Transmit) network traffic on the DB instance, including both customer database traffic and Amazon RDS traffic used for monitoring and replication"), - OutputName: aws.String("rds_network_transmit_throughput"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "OldestReplicationSlotLag": { - Help: aws.String("The lagging size of the replica lagging the most in terms of WAL data received. Applies to PostgreSQL"), - OutputName: aws.String("rds_oldest_replication_slot_lag"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "ReadIOPS": { - Help: aws.String("The average number of disk read I/O operations per second"), - OutputName: aws.String("rds_read_iops"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "ReadLatency": { - Help: aws.String("The amount of time taken per disk I/O operation"), - OutputName: aws.String("rds_read_latency"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Maximum"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "ReadThroughput": { - Help: aws.String("The number of bytes read from disk per second"), - OutputName: aws.String("rds_read_throughput"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Maximum"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "ReplicaLag": { - Help: aws.String("The amount of time a Read Replica DB instance lags behind the source DB instance. Applies to MySQL, MariaDB, and PostgreSQL Read Replicas"), - OutputName: aws.String("rds_replica_lag"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Maximum"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "ReplicationSlotDiskUsage": { - Help: aws.String("The disk space used by replication slot files. Applies to PostgreSQL"), - OutputName: aws.String("rds_replication_slot_disk_usage"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "SwapUsage": { - Help: aws.String("The amount of swap space used on the DB instance. This metric is not available for SQL Server"), - OutputName: aws.String("rds_swap_usage"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "TransactionLogsDiskUsage": { - Help: aws.String("The disk space used by transaction logs. Applies to PostgreSQL"), - OutputName: aws.String("rds_transaction_logs_disk_usage"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "TransactionLogsGeneration": { - Help: aws.String("The size of transaction logs generated per second. Applies to PostgreSQL"), - OutputName: aws.String("rds_transaction_logs_generation"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "WriteIOPS": { - Help: aws.String("The average number of disk write I/O operations per second"), - OutputName: aws.String("rds_write_iops"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "WriteLatency": { - Help: aws.String("The amount of time taken per disk I/O operation"), - OutputName: aws.String("rds_write_latency"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Maximum"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, - "WriteThroughput": { - Help: aws.String("The number of bytes written to disk per second"), - OutputName: aws.String("rds_write_throughput"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Maximum"), - Period: 5, - Dimensions: []*cloudwatch.Dimension{}, - }, -} - -// GetMetrics returns a map of MetricDescriptions to be exported for this namespace -func GetMetrics() map[string]*b.MetricDescription { - return metrics +var Metrics = map[string]*string{ + "BinLogDiskUsage": aws.String("The amount of disk space occupied by binary logs on the master. Applies to MySQL read replicas"), + "BurstBalance": aws.String("The percent of General Purpose SSD (gp2) burst-bucket I/O credits available"), + "CPUCreditBalance": aws.String("The number of earned CPU credits that an instance has accrued. This represents the number of credits currently available."), + "CPUCreditUsage": aws.String("The number of CPU credits spent by the instance for CPU utilization. One CPU credit equals one vCPU running at 100 percent utilization for one minute or an equivalent combination of vCPUs, utilization, and time"), + "CPUSurplusCreditBalance": aws.String("The number of surplus credits that have been spent by an unlimited instance when its CPUCreditBalance value is zero"), + "CPUSurplusCreditsCharged": aws.String("The number of spent surplus credits that are not paid down by earned CPU credits, and which thus incur an additional charge"), + "CPUUtilization": aws.String("The percentage of CPU utilization"), + "DatabaseConnections": aws.String("The number of database connections in use"), + "DBLoad": aws.String("The number of active sessions for the DB engine. Typically, you want the data for the average number of active sessions"), + "DBLoadCPU": aws.String("The number of active sessions where the wait event type is CPU"), + "DBLoadNonCPU": aws.String("The number of active sessions where the wait event type is not CPU"), + "DiskQueueDepth": aws.String("The number of outstanding IOs (read/write requests) waiting to access the disk"), + "FreeableMemory": aws.String("The amount of available random access memory"), + "FreeStorageSpace": aws.String("The amount of available storage space"), + "MaximumUsedTransactionIDs": aws.String("The maximum transaction ID that has been used. Applies to PostgreSQL"), + "NetworkReceiveThroughput": aws.String("The incoming (Receive) network traffic on the DB instance, including both customer database traffic and Amazon RDS traffic used for monitoring and replication"), + "NetworkTransmitThroughput": aws.String("The outgoing (Transmit) network traffic on the DB instance, including both customer database traffic and Amazon RDS traffic used for monitoring and replication"), + "OldestReplicationSlotLag": aws.String("The lagging size of the replica lagging the most in terms of WAL data received. Applies to PostgreSQL"), + "ReadIOPS": aws.String("The average number of disk read I/O operations per second"), + "ReadLatency": aws.String("The amount of time taken per disk I/O operation"), + "ReadThroughput": aws.String("The number of bytes read from disk per second"), + "ReplicaLag": aws.String("The amount of time a Read Replica DB instance lags behind the source DB instance. Applies to MySQL, MariaDB, and PostgreSQL Read Replicas"), + "ReplicationSlotDiskUsage": aws.String("The disk space used by replication slot files. Applies to PostgreSQL"), + "SwapUsage": aws.String("The amount of swap space used on the DB instance. This metric is not available for SQL Server"), + "TransactionLogsDiskUsage": aws.String("The disk space used by transaction logs. Applies to PostgreSQL"), + "TransactionLogsGeneration": aws.String("The size of transaction logs generated per second. Applies to PostgreSQL"), + "WriteIOPS": aws.String("The average number of disk write I/O operations per second"), + "WriteLatency": aws.String("The amount of time taken per disk I/O operation"), + "WriteThroughput": aws.String("The number of bytes written to disk per second"), } From ce0ab4518e4a8778f968a7ae0e203fc92924d12a Mon Sep 17 00:00:00 2001 From: Andrew Wright Date: Sun, 5 Apr 2020 15:55:06 +1000 Subject: [PATCH 5/8] All metric types --- base/config.go | 12 +-- base/controller.go | 25 ++++-- ec2/controller.go | 4 +- ec2/metrics.go | 74 +--------------- elasticache/controller.go | 4 +- elasticache/metrics.go | 95 +------------------- elb/controller.go | 4 +- elb/metrics.go | 53 +---------- elbv2/controller.go | 4 +- elbv2/metrics.go | 107 +--------------------- main.go | 56 ++++++------ network/controller.go | 4 +- network/metrics.go | 50 +---------- rds/controller.go | 3 +- rds/metrics.go | 182 +++++++++++++++++++++++++++++++------- s3/controller.go | 4 +- s3/metrics.go | 14 +-- 17 files changed, 219 insertions(+), 476 deletions(-) diff --git a/base/config.go b/base/config.go index a97fdc3..b54ef6b 100644 --- a/base/config.go +++ b/base/config.go @@ -54,16 +54,17 @@ func LoadConfig(path string) (*Config, error) { return &c, nil } -func (c *Config) ConstructMetrics(defaults map[string]map[string]*string) map[string][]*MetricDescription { +func (c *Config) ConstructMetrics(defaults map[string]map[string]*MetricDescription) map[string][]*MetricDescription { mds := make(map[string][]*MetricDescription) for namespace, metrics := range c.Metrics.Data { if len(metrics) <= 0 { - if n_defaults, ok := defaults[namespace]; ok == true { - for key, help := range n_defaults { + if namespaceDefaults, ok := defaults[namespace]; ok == true { + for key, defaultMetric := range namespaceDefaults { metrics = append(metrics, &configMetric{ - AWSMetric: key, - Help: *help, + AWSMetric: key, + OutputName: *defaultMetric.OutputName, + Help: *defaultMetric.Help, }) } } @@ -88,7 +89,6 @@ func (c *Config) ConstructMetrics(defaults map[string]map[string]*string) map[st rangeSeconds = int(c.Period) * int(time.Minute) } - // TODO check this works if metric.Statistics == nil || len(metric.Statistics) < 1 { metric.Statistics = helpers.StringPointers("Average") } diff --git a/base/controller.go b/base/controller.go index b7e16bf..0043285 100644 --- a/base/controller.go +++ b/base/controller.go @@ -63,6 +63,17 @@ type RegionDescription struct { Filters []*ec2.Filter Namespaces map[string]*NamespaceDescription Mutex sync.RWMutex + + cw *cloudwatch.CloudWatch +} + +func NewRegionDescription(c *Config, r string, metrics map[string][]*MetricDescription) *RegionDescription { + session := session.Must(session.NewSession(&aws.Config{Region: &r})) + cw := cloudwatch.New(session) + rd := RegionDescription{Region: &r} + rd.cw = cw + rd.Init(session, c.Tags, metrics) + return &rd } // NamespaceDescription describes an AWS namespace to be monitored via cloudwatch @@ -144,11 +155,10 @@ func (rd *RegionDescription) saveAccountID() error { // Init initializes a region and its nested namspaces in preparation for collection // cloudwatchc metrics for that region. -func (rd *RegionDescription) Init(s *session.Session, td []*TagDescription, r *string) error { - log.Infof("Initializing region %s ...", *r) +func (rd *RegionDescription) Init(s *session.Session, td []*TagDescription, metrics map[string][]*MetricDescription) error { + log.Infof("Initializing region %s ...", *rd.Region) rd.Session = s rd.Tags = td - rd.Region = r err := rd.saveAccountID() h.LogErrorExit(err) @@ -156,14 +166,14 @@ func (rd *RegionDescription) Init(s *session.Session, td []*TagDescription, r *s err = rd.buildFilters() h.LogErrorExit(err) - err = rd.CreateNamespaceDescriptions() + err = rd.CreateNamespaceDescriptions(metrics) h.LogErrorExit(err) return nil } // CreateNamespaceDescriptions populates the list of NamespaceDescriptions for an AWS region -func (rd *RegionDescription) CreateNamespaceDescriptions() error { +func (rd *RegionDescription) CreateNamespaceDescriptions(metrics map[string][]*MetricDescription) error { namespaces := GetNamespaces() rd.Namespaces = make(map[string]*NamespaceDescription) for _, namespace := range namespaces { @@ -171,6 +181,7 @@ func (rd *RegionDescription) CreateNamespaceDescriptions() error { Namespace: aws.String(namespace), Parent: rd, } + nd.Metrics = metrics[namespace] rd.Namespaces[namespace] = &nd } @@ -178,7 +189,7 @@ func (rd *RegionDescription) CreateNamespaceDescriptions() error { } // GatherMetrics queries the Cloudwatch API for metrics related to the resources in this region -func (rd *RegionDescription) GatherMetrics(cw *cloudwatch.CloudWatch) { +func (rd *RegionDescription) GatherMetrics() { log.Infof("Gathering metrics for region %s...", *rd.Region) ndc := make(chan *NamespaceDescription) @@ -187,7 +198,7 @@ func (rd *RegionDescription) GatherMetrics(cw *cloudwatch.CloudWatch) { for _, metric := range namespace.Metrics { metric.initializeMetric() } - go namespace.GatherMetrics(cw, ndc) + go namespace.GatherMetrics(rd.cw, ndc) } } diff --git a/ec2/controller.go b/ec2/controller.go index 8487ecf..6ead9f2 100644 --- a/ec2/controller.go +++ b/ec2/controller.go @@ -33,8 +33,7 @@ func CreateResourceDescription(nd *b.NamespaceDescription, instance *ec2.Instanc rd.Name = name } rd.Type = aws.String("ec2") - rd.Parent = nd - rd.BuildQuery() + rd.Region = nd.Parent.Region nd.Resources = append(nd.Resources, &rd) return nil @@ -46,7 +45,6 @@ func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) error { log.Debug("Creating EC2 resource list ...") nd.Resources = []*b.ResourceDescription{} - nd.Metrics = GetMetrics() session := ec2.New(nd.Parent.Session) input := ec2.DescribeInstancesInput{ Filters: nd.Parent.Filters, diff --git a/ec2/metrics.go b/ec2/metrics.go index 736a2f2..cec4ae8 100644 --- a/ec2/metrics.go +++ b/ec2/metrics.go @@ -2,191 +2,119 @@ package ec2 import ( b "github.com/CoverGenius/cloudwatch-prometheus-exporter/base" - h "github.com/CoverGenius/cloudwatch-prometheus-exporter/helpers" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/cloudwatch" ) -var metrics = map[string]*b.MetricDescription{ +var Metrics = map[string]*b.MetricDescription{ "CPUCreditBalance": { Help: aws.String("The number of earned CPU credits that an instance has accrued since it was launched or started"), OutputName: aws.String("ec2_cpu_credit_balance"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "CPUCreditUsage": { Help: aws.String("The number of CPU credits spent by the instance for CPU utilization"), OutputName: aws.String("ec2_cpu_credit_usage"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "CPUSurplusCreditBalance": { Help: aws.String("The number of surplus credits that have been spent by an unlimited instance when its CPUCreditBalance value is zero"), OutputName: aws.String("ec2_cpu_surplus_credit_balance"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "CPUSurplusCreditsCharged": { Help: aws.String("The number of spent surplus credits that are not paid down by earned CPU credits, and which thus incur an additional charge"), OutputName: aws.String("ec2_cpu_surplus_credits_charged"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "CPUUtilization": { Help: aws.String("The percentage of allocated EC2 compute units that are currently in use on the instance"), OutputName: aws.String("ec2_cpu_utilization"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "DiskReadBytes": { Help: aws.String("Bytes read from all instance store volumes available to the instance"), OutputName: aws.String("ec2_disk_read_bytes"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "DiskReadOps": { Help: aws.String("Completed read operations from all instance store volumes available to the instance in a specified period of time"), OutputName: aws.String("ec2_disk_read_ops"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "DiskWriteBytes": { Help: aws.String("Bytes written to all instance store volumes available to the instance"), OutputName: aws.String("ec2_disk_write_bytes"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "DiskWriteOps": { Help: aws.String("Completed write operations to all instance store volumes available to the instance in a specified period of time"), OutputName: aws.String("ec2_disk_write_ops"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "EBSByteBalance": { Help: aws.String("Available only for the smaller instance sizes. Provides information about the percentage of throughput credits remaining in the burst bucket"), OutputName: aws.String("ec2_ebs_byte_balance"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "EBSIOBalance": { Help: aws.String("Available only for the smaller instance sizes. Provides information about the percentage of I/O credits remaining in the burst bucket"), OutputName: aws.String("ec2_ebs_io_balance"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "EBSReadBytes": { Help: aws.String("Bytes read from all EBS volumes attached to the instance in a specified period of time"), OutputName: aws.String("ec2_ebs_read_bytes"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "EBSReadOps": { Help: aws.String("Completed read operations from all Amazon EBS volumes attached to the instance in a specified period of time"), OutputName: aws.String("ec2_ebs_read_ops"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "EBSWriteBytes": { Help: aws.String("Bytes written to all EBS volumes attached to the instance in a specified period of time"), OutputName: aws.String("ec2_ebs_write_bytes"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "EBSWriteOps": { Help: aws.String("Completed write operations to all EBS volumes attached to the instance in a specified period of time"), OutputName: aws.String("ec2_ebs_write_ops"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "NetworkIn": { Help: aws.String("The number of bytes received on all network interfaces by the instance"), OutputName: aws.String("ec2_network_in"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "NetworkOut": { Help: aws.String("The number of bytes sent out on all network interfaces by the instance"), OutputName: aws.String("ec2_network_out"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "NetworkPacketsIn": { Help: aws.String("The number of packets received on all network interfaces by the instance"), OutputName: aws.String("ec2_network_packets_in"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "NetworkPacketsOut": { Help: aws.String("The number of packets sent out on all network interfaces by the instance"), OutputName: aws.String("ec2_network_packets_out"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "StatusCheckFailed": { Help: aws.String("Reports whether the instance has passed both the instance status check and the system status check in the last minute"), OutputName: aws.String("ec2_status_check_failed"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "StatusCheckFailed_Instance": { Help: aws.String("Reports whether the instance has passed the instance status check in the last minute"), OutputName: aws.String("ec2_status_check_failed_instance"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "StatusCheckFailed_System": { Help: aws.String("Reports whether the instance has passed the system status check in the last minute"), OutputName: aws.String("ec2_status_check_failed_system"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, } - -// GetMetrics returns a map of MetricDescriptions to be exported for this namespace -func GetMetrics() map[string]*b.MetricDescription { - return metrics -} diff --git a/elasticache/controller.go b/elasticache/controller.go index de9f2a8..0359dd0 100644 --- a/elasticache/controller.go +++ b/elasticache/controller.go @@ -25,8 +25,7 @@ func CreateResourceDescription(nd *b.NamespaceDescription, cc *elasticache.Cache rd.ID = cc.CacheClusterId rd.Name = cc.CacheClusterId rd.Type = aws.String("elasticache") - rd.Parent = nd - rd.BuildQuery() + rd.Region = nd.Parent.Region nd.Mutex.Lock() nd.Resources = append(nd.Resources, &rd) nd.Mutex.Unlock() @@ -40,7 +39,6 @@ func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) error { log.Debug("Creating Elasticache resource list ...") nd.Resources = []*b.ResourceDescription{} - nd.Metrics = GetMetrics() session := elasticache.New(nd.Parent.Session) input := elasticache.DescribeCacheClustersInput{} result, err := session.DescribeCacheClusters(&input) diff --git a/elasticache/metrics.go b/elasticache/metrics.go index ef0b95c..8ec5609 100644 --- a/elasticache/metrics.go +++ b/elasticache/metrics.go @@ -2,247 +2,154 @@ package elasticache import ( b "github.com/CoverGenius/cloudwatch-prometheus-exporter/base" - h "github.com/CoverGenius/cloudwatch-prometheus-exporter/helpers" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/cloudwatch" ) -var metrics = map[string]*b.MetricDescription{ +var Metrics = map[string]*b.MetricDescription{ "ActiveDefragHits": { Help: aws.String("The number of value reallocations per minute performed by the active defragmentation process"), OutputName: aws.String("elasticache_active_defrag_hits"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "BytesUsedForCache": { Help: aws.String("The total number of bytes allocated by Redis for all purposes, including the dataset, buffers, etc"), OutputName: aws.String("elasticache_bytes_used_for_cache"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "CacheHits": { Help: aws.String("The number of successful read-only key lookups in the main dictionary"), OutputName: aws.String("elasticache_cache_hits"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "CacheMisses": { Help: aws.String("The number of unsuccessful read-only key lookups in the main dictionary"), OutputName: aws.String("elasticache_cache_misses"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "CPUUtilization": { Help: aws.String("The percentage of CPU utilization"), OutputName: aws.String("elasticache_cpu_utilization"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "CurrConnections": { Help: aws.String("The number of client connections, excluding connections from read replicas. ElastiCache uses two to three of the connections to monitor the cluster in each case"), OutputName: aws.String("elasticache_curr_connections"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "CurrItems": { Help: aws.String("The number of items in the cache"), OutputName: aws.String("elasticache_curr_items"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "EngineCPUUtilization": { Help: aws.String("Provides CPU utilization of the Redis engine thread. Since Redis is single-threaded, you can use this metric to analyze the load of the Redis process itself"), OutputName: aws.String("elasticache_engine_cpu_utilization"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "Evictions": { Help: aws.String("The number of keys that have been evicted due to the maxmemory limit"), OutputName: aws.String("elasticache_evictions"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "FreeableMemory": { Help: aws.String("The amount of free memory available on the host"), OutputName: aws.String("elasticache_freeable_memory"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "GetTypeCmds": { Help: aws.String("The total number of read-only type commands"), OutputName: aws.String("elasticache_get_type_cmds"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "IsMaster": { Help: aws.String("Returns 1 in case if node is master"), OutputName: aws.String("elasticache_is_master"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "KeyBasedCmds": { Help: aws.String("The total number of commands that are key-based"), OutputName: aws.String("elasticache_key_based_cmds"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "ListBasedCmds": { Help: aws.String("The total number of commands that are list-based"), OutputName: aws.String("elasticache_list_based_cmds"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "MasterLinkHealthStatus": { Help: aws.String("This status has two values: 0 or 1. The value 0 indicates that data in the Elasticache primary node is not in sync with Redis on EC2. The value of 1 indicates that the data is in sync"), OutputName: aws.String("elasticache_master_link_health_status"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "NetworkBytesIn": { Help: aws.String("The number of bytes the host has read from the network"), OutputName: aws.String("elasticache_network_bytes_in"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "NetworkBytesOut": { Help: aws.String("The number of bytes the host has written to the network"), OutputName: aws.String("elasticache_network_bytes_out"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "NetworkPacketsIn": { Help: aws.String("The number of packets received on all network interfaces by the instance. This metric identifies the volume of incoming traffic in terms of the number of packets on a single instance"), OutputName: aws.String("elasticache_network_packets_in"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "NetworkPacketsOut": { Help: aws.String("The number of packets sent out on all network interfaces by the instance. This metric identifies the volume of outgoing traffic in terms of the number of packets on a single instance"), OutputName: aws.String("elasticache_network_packets_out"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "NewConnections": { Help: aws.String("The total number of connections that have been accepted by the server during this period"), OutputName: aws.String("elasticache_new_connections"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "Reclaimed": { Help: aws.String("The total number of key expiration events"), OutputName: aws.String("elasticache_reclaimed"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "ReplicationBytes": { Help: aws.String("For nodes in a replicated configuration, ReplicationBytes reports the number of bytes that the primary is sending to all of its replicas. This metric is representative of the write load on the replication group"), OutputName: aws.String("elasticache_replication_bytes"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "ReplicationLag": { Help: aws.String("This metric is only applicable for a node running as a read replica. It represents how far behind, in seconds, the replica is in applying changes from the primary node"), OutputName: aws.String("elasticache_replication_lag"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "SaveInProgress": { Help: aws.String("This binary metric returns 1 whenever a background save (forked or forkless) is in progress, and 0 otherwise. A background save process is typically used during snapshots and syncs. These operations can cause degraded performance"), OutputName: aws.String("elasticache_save_in_progress"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "SetBasedCmds": { Help: aws.String("The total number of commands that are set-based"), OutputName: aws.String("elasticache_set_based_cmds"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "SetTypeCmds": { Help: aws.String("The total number of write types of commands"), OutputName: aws.String("elasticache_set_type_cmds"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "SortedSetBasedCmds": { Help: aws.String("The total number of commands that are sorted set-based"), OutputName: aws.String("elasticache_sorted_set_based_cmds"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "StringBasedCmds": { Help: aws.String("The total number of commands that are string-based"), OutputName: aws.String("elasticache_string_based_cmds"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "SwapUsage": { Help: aws.String("The amount of swap used on the host"), OutputName: aws.String("elasticache_swap_usage"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, } - -// GetMetrics returns a map of MetricDescriptions to be exported for this namespace -func GetMetrics() map[string]*b.MetricDescription { - return metrics -} diff --git a/elb/controller.go b/elb/controller.go index 16a7f6d..62149a0 100644 --- a/elb/controller.go +++ b/elb/controller.go @@ -24,8 +24,7 @@ func CreateResourceDescription(nd *b.NamespaceDescription, td *elb.TagDescriptio rd.ID = td.LoadBalancerName rd.Name = td.LoadBalancerName rd.Type = aws.String("lb-classic") - rd.Parent = nd - rd.BuildQuery() + rd.Region = nd.Parent.Region nd.Resources = append(nd.Resources, &rd) return nil @@ -36,7 +35,6 @@ func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) error { defer wg.Done() log.Debug("Creating Classic LB resource list ...") nd.Resources = []*b.ResourceDescription{} - nd.Metrics = GetMetrics() session := elb.New(nd.Parent.Session) input := elb.DescribeLoadBalancersInput{} result, err := session.DescribeLoadBalancers(&input) diff --git a/elb/metrics.go b/elb/metrics.go index 623ac09..a814a27 100644 --- a/elb/metrics.go +++ b/elb/metrics.go @@ -2,135 +2,84 @@ package elb import ( b "github.com/CoverGenius/cloudwatch-prometheus-exporter/base" - h "github.com/CoverGenius/cloudwatch-prometheus-exporter/helpers" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/cloudwatch" ) -var metrics = map[string]*b.MetricDescription{ +var Metrics = map[string]*b.MetricDescription{ "BackendConnectionErrors": { Help: aws.String("The number of connections that were not successfully established between the load balancer and the registered instances. Because the load balancer retries the connection when there are errors, this count can exceed the request rate. Note that this count also includes any connection errors related to health checks"), OutputName: aws.String("clb_backend_connection_errors"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "EstimatedALBActiveConnectionCount": { Help: aws.String("The estimated number of concurrent TCP connections active from clients to the load balancer and from the load balancer to targets"), OutputName: aws.String("clb_estimated_alb_active_connection_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "EstimatedALBConsumedLCUs": { Help: aws.String("The estimated number of load balancer capacity units (LCU) used by an Application Load Balancer"), OutputName: aws.String("clb_estimated_alb_consumed_lcus"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "EstimatedALBNewConnectionCount": { Help: aws.String("The estimated number of new TCP connections established from clients to the load balancer and from the load balancer to targets"), OutputName: aws.String("clb_estimated_alb_new_connection_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "EstimatedProcessedBytes": { Help: aws.String("The estimated number of bytes processed by an Application Load Balancer"), OutputName: aws.String("clb_estimated_processed_bytes"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "HealthyHostCount": { Help: aws.String("The number of healthy instances registered with your load balancer. A newly registered instance is considered healthy after it passes the first health check"), OutputName: aws.String("clb_healthy_host_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_Backend_2XX": { Help: aws.String("The number of HTTP response codes generated by registered instances. This count does not include any response codes generated by the load balancer"), OutputName: aws.String("clb_httpcode_backend_2xx"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_Backend_3XX": { Help: aws.String("The number of HTTP response codes generated by registered instances. This count does not include any response codes generated by the load balancer"), OutputName: aws.String("clb_httpcode_backend_3xx"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_Backend_4XX": { Help: aws.String("The number of HTTP response codes generated by registered instances. This count does not include any response codes generated by the load balancer"), OutputName: aws.String("clb_httpcode_backend_4xx"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_Backend_5XX": { Help: aws.String("The number of HTTP response codes generated by registered instances. This count does not include any response codes generated by the load balancer"), OutputName: aws.String("clb_httpcode_backend_5xx"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_ELB_5XX": { Help: aws.String("The number of HTTP 5XX server error codes generated by the load balancer. This count does not include any response codes generated by the registered instances. The metric is reported if there are no healthy instances registered to the load balancer, or if the request rate exceeds the capacity of the instances (spillover) or the load balancer"), OutputName: aws.String("clb_httpcode_elb_5xx"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "Latency": { Help: aws.String("[HTTP listener] The total time elapsed, in seconds, from the time the load balancer sent the request to a registered instance until the instance started to send the response headers.[TCP listener] The total time elapsed, in seconds, for the load balancer to successfully establish a connection to a registered instance"), OutputName: aws.String("clb_latency"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "RequestCount": { Help: aws.String("The number of requests completed or connections made during the specified interval (1 or 5 minutes)"), OutputName: aws.String("clb_request_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "SurgeQueueLength": { Help: aws.String("The total number of requests (HTTP listener) or connections (TCP listener) that are pending routing to a healthy instance. The maximum size of the queue is 1,024. Additional requests or connections are rejected when the queue is full"), OutputName: aws.String("clb_surge_queue_length"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "UnHealthyHostCount": { Help: aws.String("The number of unhealthy instances registered with your load balancer"), OutputName: aws.String("clb_unhealthy_host_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, } - -// GetMetrics returns a map of MetricDescriptions to be exported for this namespace -func GetMetrics() map[string]*b.MetricDescription { - return metrics -} diff --git a/elbv2/controller.go b/elbv2/controller.go index 61314ff..2d96657 100644 --- a/elbv2/controller.go +++ b/elbv2/controller.go @@ -35,9 +35,7 @@ func CreateResourceDescription(nd *b.NamespaceDescription, td *elbv2.TagDescript h.LogError(err) rd.ID = td.ResourceArn rd.Name = &lbName - rd.Parent = nd - rd.BuildQuery() - nd.Metrics = GetMetrics(rd.Type) + rd.Region = nd.Parent.Region nd.Resources = append(nd.Resources, &rd) return nil diff --git a/elbv2/metrics.go b/elbv2/metrics.go index a86c512..3bbd5ae 100644 --- a/elbv2/metrics.go +++ b/elbv2/metrics.go @@ -2,265 +2,162 @@ package elbv2 import ( b "github.com/CoverGenius/cloudwatch-prometheus-exporter/base" - h "github.com/CoverGenius/cloudwatch-prometheus-exporter/helpers" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/cloudwatch" ) -var metricsALB = map[string]*b.MetricDescription{ +var ALBMetrics = map[string]*b.MetricDescription{ "ActiveConnectionCount": { Help: aws.String("The total number of concurrent TCP connections active from clients to the load balancer and from the load balancer to targets"), OutputName: aws.String("alb_alive_connection_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "ClientTLSNegotiationErrorCount": { Help: aws.String("The number of TLS connections initiated by the client that did not establish a session with the load balancer. Possible causes include a mismatch of ciphers or protocols"), OutputName: aws.String("alb_client_tls_negotiation_error_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "ConsumedLCUs": { Help: aws.String("The number of load balancer capacity units (LCU) used by your load balancer"), OutputName: aws.String("alb_consumed_lcus"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "HealthyHostCount": { Help: aws.String("The number of targets that are considered healthy"), OutputName: aws.String("alb_healthy_host_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_ELB_4XX_Count": { Help: aws.String("The number of HTTP 4XX client error codes that originate from the load balancer. Client errors are generated when requests are malformed or incomplete"), OutputName: aws.String("alb_httpcode_elb_4xx_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_ELB_502_Count": { Help: aws.String("The number of HTTP 502 error codes that originate from the load balancer"), OutputName: aws.String("alb_httpcode_elb_502_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_ELB_503_Count": { Help: aws.String("The number of HTTP 503 error codes that originate from the load balancer"), OutputName: aws.String("alb_httpcode_elb_503_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_ELB_504_Count": { Help: aws.String("The number of HTTP 504 error codes that originate from the load balancer"), OutputName: aws.String("alb_httpcode_elb_504_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_ELB_5XX_Count": { Help: aws.String("The number of HTTP 5XX server error codes that originate from the load balancer. This count does not include any response codes generated by the targets"), OutputName: aws.String("alb_httpcode_elb_5xx_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_Target_2XX_Count": { Help: aws.String("The number of HTTP response codes generated by the targets"), OutputName: aws.String("alb_httpcode_target_2xx_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_Target_3XX_Count": { Help: aws.String("The number of HTTP response codes generated by the targets"), OutputName: aws.String("alb_httpcode_target_3xx_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_Target_4XX_Count": { Help: aws.String("The number of HTTP response codes generated by the targets"), OutputName: aws.String("alb_httpcode_target_4xx_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_Target_5XX_Count": { Help: aws.String("The number of HTTP response codes generated by the targets"), OutputName: aws.String("alb_httpcode_target_5xx_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "NewConnectionCount": { Help: aws.String("The total number of new TCP connections established from clients to the load balancer and from the load balancer to targets"), OutputName: aws.String("alb_new_connection_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "ProcessedBytes": { Help: aws.String("The total number of bytes processed by the load balancer over IPv4 and IPv6. This count includes traffic to and from clients and Lambda functions, and traffic from an Identity Provider (IdP) if user authentication is enabled"), OutputName: aws.String("alb_processed_bytes"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "RequestCount": { Help: aws.String("The number of requests processed over IPv4 and IPv6. This count includes only the requests with a response generated by a target of the load balancer"), OutputName: aws.String("alb_request_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "RequestCountPerTarget": { Help: aws.String("The average number of requests received by each target in a target group. You must specify the target group using the TargetGroup dimension. This metric does not apply if the target is a Lambda function"), OutputName: aws.String("alb_request_count_per_target"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "RuleEvaluations": { Help: aws.String("The number of rules processed by the load balancer given a request rate averaged over an hour"), OutputName: aws.String("alb_rule_evaluations"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "TargetConnectionErrorCount": { Help: aws.String("The number of connections that were not successfully established between the load balancer and target. This metric does not apply if the target is a Lambda function"), OutputName: aws.String("alb_target_connection_error_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "TargetResponseTime": { Help: aws.String("The time elapsed, in seconds, after the request leaves the load balancer until a response from the target is received. This is equivalent to the target_processing_time field in the access logs"), OutputName: aws.String("alb_target_response_time"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Maximum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "UnHealthyHostCount": { Help: aws.String("The number of targets that are considered unhealthy"), OutputName: aws.String("alb_unhealthy_host_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, } -var metricsNLB = map[string]*b.MetricDescription{ +var NLBMetrics = map[string]*b.MetricDescription{ "ActiveFlowCount": { Help: aws.String("The total number of concurrent flows (or connections) from clients to targets. This metric includes connections in the SYN_SENT and ESTABLISHED states. TCP connections are not terminated at the load balancer, so a client opening a TCP connection to a target counts as a single flow"), OutputName: aws.String("nlb_active_flow_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "ConsumedLCUs": { Help: aws.String("The number of load balancer capacity units (LCU) used by your load balancer"), OutputName: aws.String("nlb_consumed_lcus"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "HealthyHostCount": { Help: aws.String("The number of targets that are considered healthy"), OutputName: aws.String("nlb_healthy_host_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "NewFlowCount": { Help: aws.String("The total number of new flows (or connections) established from clients to targets in the time period"), OutputName: aws.String("nlb_new_flow_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "ProcessedBytes": { Help: aws.String("The total number of bytes processed by the load balancer, including TCP/IP headers. This count includes traffic to and from targets, minus health check traffic"), OutputName: aws.String("nlb_processed_bytes"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "TCP_Client_Reset_Count": { Help: aws.String("The total number of reset (RST) packets sent from a client to a target. These resets are generated by the client and forwarded by the load balancer"), OutputName: aws.String("nlb_tcp_client_reset_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "TCP_ELB_Reset_Count": { Help: aws.String("The total number of reset (RST) packets generated by the load balancer"), OutputName: aws.String("nlb_tcp_elb_reset_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "TCP_Target_Reset_Count": { Help: aws.String("The total number of reset (RST) packets sent from a target to a client. These resets are generated by the target and forwarded by the load balancer"), OutputName: aws.String("nlb_tcp_target_reset_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "UnHealthyHostCount": { Help: aws.String("The number of targets that are considered unhealthy"), OutputName: aws.String("nlb_unhealthy_host_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average", "Sum"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, } - -// GetMetrics returns a map of MetricDescriptions to be exported for this namespace -func GetMetrics(t *string) map[string]*b.MetricDescription { - switch *t { - case "lb-network": - return metricsNLB - case "lb-application": - return metricsALB - default: - return metricsALB - } -} diff --git a/main.go b/main.go index 087c544..fe735f2 100644 --- a/main.go +++ b/main.go @@ -7,14 +7,17 @@ import ( "sync" "time" + "github.com/CoverGenius/cloudwatch-prometheus-exporter/ec2" + "github.com/CoverGenius/cloudwatch-prometheus-exporter/elasticache" + "github.com/CoverGenius/cloudwatch-prometheus-exporter/elb" + "github.com/CoverGenius/cloudwatch-prometheus-exporter/elbv2" + "github.com/CoverGenius/cloudwatch-prometheus-exporter/network" + "github.com/CoverGenius/cloudwatch-prometheus-exporter/s3" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/CoverGenius/cloudwatch-prometheus-exporter/base" h "github.com/CoverGenius/cloudwatch-prometheus-exporter/helpers" "github.com/CoverGenius/cloudwatch-prometheus-exporter/rds" - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/aws/session" - "github.com/aws/aws-sdk-go/service/cloudwatch" log "github.com/sirupsen/logrus" ) @@ -27,26 +30,26 @@ func init() { flag.StringVar(&config, "config", "config.yaml", "Path to config file") } -func run(nd map[string]*base.NamespaceDescription, cw *cloudwatch.CloudWatch, rd *base.RegionDescription, pi uint8, cfg map[string][]*base.MetricDescription) { +func run(nd map[string]*base.NamespaceDescription, rd *base.RegionDescription, pi uint8, cfg map[string][]*base.MetricDescription) { var delay uint8 = 0 for { select { case <-time.After(time.Duration(delay) * time.Minute): var wg sync.WaitGroup - wg.Add(1) + wg.Add(8) log.Debug("Creating list of resources ...") - //go elasticache.CreateResourceList(nd["AWS/ElastiCache"], &wg) - go rds.CreateResourceList(nd["AWS/RDS"], &wg, cfg["AWS/RDS"]) - //go ec2.CreateResourceList(nd["AWS/EC2"], &wg) - //go network.CreateResourceList(nd["AWS/NATGateway"], &wg) - //go elb.CreateResourceList(nd["AWS/ELB"], &wg) - //go elbv2.CreateResourceList(nd["AWS/ApplicationELB"], &wg) - //go elbv2.CreateResourceList(nd["AWS/NetworkELB"], &wg) - //go s3.CreateResourceList(nd["AWS/S3"], &wg) + go elasticache.CreateResourceList(nd["AWS/ElastiCache"], &wg) + go rds.CreateResourceList(nd["AWS/RDS"], &wg) + go ec2.CreateResourceList(nd["AWS/EC2"], &wg) + go network.CreateResourceList(nd["AWS/NATGateway"], &wg) + go elb.CreateResourceList(nd["AWS/ELB"], &wg) + go elbv2.CreateResourceList(nd["AWS/ApplicationELB"], &wg) + go elbv2.CreateResourceList(nd["AWS/NetworkELB"], &wg) + go s3.CreateResourceList(nd["AWS/S3"], &wg) wg.Wait() log.Debug("Gathering metrics ...") delay = pi - go rd.GatherMetrics(cw) + go rd.GatherMetrics() } } } @@ -85,25 +88,26 @@ func processConfig(p *string) *base.Config { } func main() { + // TODO allow hot reload of config flag.Parse() c := processConfig(&config) - defaults := map[string]map[string]*string{ - "AWS/RDS": rds.Metrics, + defaults := map[string]map[string]*base.MetricDescription{ + "AWS/RDS": rds.Metrics, + "AWS/ElastiCache": elasticache.Metrics, + "AWS/EC2": ec2.Metrics, + "AWS/NATGateway": network.Metrics, + "AWS/ELB": elb.Metrics, + "AWS/ApplicationELB": elbv2.ALBMetrics, + "AWS/NetworkELB": elbv2.NLBMetrics, + "AWS/S3": s3.Metrics, } - mds := c.ConstructMetrics(defaults) for _, region := range c.Regions { - r := region - session := session.Must(session.NewSession(&aws.Config{Region: r})) - cw := cloudwatch.New(session) - rd := base.RegionDescription{ - Config: c, - } - rdd = append(rdd, &rd) - rd.Init(session, c.Tags, r) + rd := base.NewRegionDescription(c, *region, mds) + rdd = append(rdd, rd) - go run(rd.Namespaces, cw, &rd, c.PollInterval, mds) + go run(rd.Namespaces, rd, c.PollInterval, mds) } http.Handle("/metrics", promhttp.Handler()) diff --git a/network/controller.go b/network/controller.go index 375c839..26e39e1 100644 --- a/network/controller.go +++ b/network/controller.go @@ -24,8 +24,7 @@ func CreateResourceDescription(nd *b.NamespaceDescription, ng *ec2.NatGateway) e rd.ID = ng.NatGatewayId rd.Name = ng.NatGatewayId rd.Type = aws.String("nat-gateway") - rd.Parent = nd - rd.BuildQuery() + rd.Region = nd.Parent.Region nd.Resources = append(nd.Resources, &rd) return nil @@ -36,7 +35,6 @@ func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) error { defer wg.Done() log.Debug("Creating NatGateway resource list ...") nd.Resources = []*b.ResourceDescription{} - nd.Metrics = GetMetrics() session := ec2.New(nd.Parent.Session) input := ec2.DescribeNatGatewaysInput{ Filter: nd.Parent.Filters, diff --git a/network/metrics.go b/network/metrics.go index e3f24a4..811be15 100644 --- a/network/metrics.go +++ b/network/metrics.go @@ -2,127 +2,79 @@ package network import ( b "github.com/CoverGenius/cloudwatch-prometheus-exporter/base" - h "github.com/CoverGenius/cloudwatch-prometheus-exporter/helpers" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/cloudwatch" ) -var metrics = map[string]*b.MetricDescription{ +var Metrics = map[string]*b.MetricDescription{ "ActiveConnectionCount": { Help: aws.String("The total number of concurrent active TCP connections through the NAT gateway"), OutputName: aws.String("nat_gateway_active_connection_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "BytesInFromDestination": { Help: aws.String("The number of bytes received by the NAT gateway from the destination"), OutputName: aws.String("nat_gateway_bytes_in_from_destination"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "BytesInFromSource": { Help: aws.String("The number of bytes received by the NAT gateway from clients in your VPC"), OutputName: aws.String("nat_gateway_bytes_in_from_source"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "BytesOutToDestination": { Help: aws.String("The number of bytes sent out through the NAT gateway to the destination"), OutputName: aws.String("nat_gateway_bytes_out_to_destination"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "BytesOutToSource": { Help: aws.String("The number of bytes sent through the NAT gateway to the clients in your VPC"), OutputName: aws.String("nat_gateway_bytes_out_to_source"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "ConnectionAttemptCount": { Help: aws.String("The number of connection attempts made through the NAT gateway"), OutputName: aws.String("nat_gateway_connection_attempt_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "ConnectionEstablishedCount": { Help: aws.String("The number of connections established through the NAT gateway"), OutputName: aws.String("nat_gateway_connection_established_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "ErrorPortAllocation": { Help: aws.String("The number of times the NAT gateway could not allocate a source port"), OutputName: aws.String("nat_gateway_error_port_allocation"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "IdleTimeoutCount": { Help: aws.String("The number of connections that transitioned from the active state to the idle state. An active connection transitions to idle if it was not closed gracefully and there was no activity for the last 350 seconds"), OutputName: aws.String("nat_gateway_idle_timeout_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "PacketsDropCount": { Help: aws.String("The number of packets dropped by the NAT gateway"), OutputName: aws.String("nat_gateway_packets_drop_count"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "PacketsInFromDestination": { Help: aws.String("The number of packets received by the NAT gateway from the destination"), OutputName: aws.String("nat_gateway_packets_in_from_destination"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "PacketsInFromSource": { Help: aws.String("The number of packets received by the NAT gateway from clients in your VPC"), OutputName: aws.String("nat_gateway_packets_in_from_source"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "PacketsOutToDestination": { Help: aws.String("The number of packets sent out through the NAT gateway to the destination"), OutputName: aws.String("nat_gateway_packets_out_to_destination"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, "PacketsOutToSource": { Help: aws.String("The number of packets sent through the NAT gateway to the clients in your VPC"), OutputName: aws.String("nat_gateway_packets_out_to_source"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 5, Dimensions: []*cloudwatch.Dimension{}, }, } - -// GetMetrics returns a map of MetricDescriptions to be exported for this namespace -func GetMetrics() map[string]*b.MetricDescription { - return metrics -} diff --git a/rds/controller.go b/rds/controller.go index 9fd60f4..63df635 100644 --- a/rds/controller.go +++ b/rds/controller.go @@ -33,11 +33,10 @@ func CreateResourceDescription(nd *b.NamespaceDescription, dbi *rds.DBInstance) } // CreateResourceList fetches a list of all RDS databases in the region -func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup, metrics []*b.MetricDescription) error { +func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) error { defer wg.Done() log.Debug("Creating RDS resource list ...") nd.Resources = []*b.ResourceDescription{} - nd.Metrics = metrics session := rds.New(nd.Parent.Session) input := rds.DescribeDBInstancesInput{} result, err := session.DescribeDBInstances(&input) diff --git a/rds/metrics.go b/rds/metrics.go index 0ed5e49..d53ea31 100644 --- a/rds/metrics.go +++ b/rds/metrics.go @@ -1,35 +1,155 @@ package rds -import "github.com/aws/aws-sdk-go/aws" +import ( + b "github.com/CoverGenius/cloudwatch-prometheus-exporter/base" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/cloudwatch" +) -var Metrics = map[string]*string{ - "BinLogDiskUsage": aws.String("The amount of disk space occupied by binary logs on the master. Applies to MySQL read replicas"), - "BurstBalance": aws.String("The percent of General Purpose SSD (gp2) burst-bucket I/O credits available"), - "CPUCreditBalance": aws.String("The number of earned CPU credits that an instance has accrued. This represents the number of credits currently available."), - "CPUCreditUsage": aws.String("The number of CPU credits spent by the instance for CPU utilization. One CPU credit equals one vCPU running at 100 percent utilization for one minute or an equivalent combination of vCPUs, utilization, and time"), - "CPUSurplusCreditBalance": aws.String("The number of surplus credits that have been spent by an unlimited instance when its CPUCreditBalance value is zero"), - "CPUSurplusCreditsCharged": aws.String("The number of spent surplus credits that are not paid down by earned CPU credits, and which thus incur an additional charge"), - "CPUUtilization": aws.String("The percentage of CPU utilization"), - "DatabaseConnections": aws.String("The number of database connections in use"), - "DBLoad": aws.String("The number of active sessions for the DB engine. Typically, you want the data for the average number of active sessions"), - "DBLoadCPU": aws.String("The number of active sessions where the wait event type is CPU"), - "DBLoadNonCPU": aws.String("The number of active sessions where the wait event type is not CPU"), - "DiskQueueDepth": aws.String("The number of outstanding IOs (read/write requests) waiting to access the disk"), - "FreeableMemory": aws.String("The amount of available random access memory"), - "FreeStorageSpace": aws.String("The amount of available storage space"), - "MaximumUsedTransactionIDs": aws.String("The maximum transaction ID that has been used. Applies to PostgreSQL"), - "NetworkReceiveThroughput": aws.String("The incoming (Receive) network traffic on the DB instance, including both customer database traffic and Amazon RDS traffic used for monitoring and replication"), - "NetworkTransmitThroughput": aws.String("The outgoing (Transmit) network traffic on the DB instance, including both customer database traffic and Amazon RDS traffic used for monitoring and replication"), - "OldestReplicationSlotLag": aws.String("The lagging size of the replica lagging the most in terms of WAL data received. Applies to PostgreSQL"), - "ReadIOPS": aws.String("The average number of disk read I/O operations per second"), - "ReadLatency": aws.String("The amount of time taken per disk I/O operation"), - "ReadThroughput": aws.String("The number of bytes read from disk per second"), - "ReplicaLag": aws.String("The amount of time a Read Replica DB instance lags behind the source DB instance. Applies to MySQL, MariaDB, and PostgreSQL Read Replicas"), - "ReplicationSlotDiskUsage": aws.String("The disk space used by replication slot files. Applies to PostgreSQL"), - "SwapUsage": aws.String("The amount of swap space used on the DB instance. This metric is not available for SQL Server"), - "TransactionLogsDiskUsage": aws.String("The disk space used by transaction logs. Applies to PostgreSQL"), - "TransactionLogsGeneration": aws.String("The size of transaction logs generated per second. Applies to PostgreSQL"), - "WriteIOPS": aws.String("The average number of disk write I/O operations per second"), - "WriteLatency": aws.String("The amount of time taken per disk I/O operation"), - "WriteThroughput": aws.String("The number of bytes written to disk per second"), +var Metrics = map[string]*b.MetricDescription{ + "BinLogDiskUsage": { + Help: aws.String("The amount of disk space occupied by binary logs on the master. Applies to MySQL read replicas"), + OutputName: aws.String("rds_bin_log_disk_usage"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "BurstBalance": { + Help: aws.String("The percent of General Purpose SSD (gp2) burst-bucket I/O credits available"), + OutputName: aws.String("rds_burst_balance"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "CPUCreditBalance": { + Help: aws.String("The number of earned CPU credits that an instance has accrued. This represents the number of credits currently available."), + OutputName: aws.String("rds_cpu_credit_balance"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "CPUCreditUsage": { + Help: aws.String("The number of CPU credits spent by the instance for CPU utilization. One CPU credit equals one vCPU running at 100 percent utilization for one minute or an equivalent combination of vCPUs, utilization, and time"), + OutputName: aws.String("rds_cpu_credit_usage"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "CPUSurplusCreditBalance": { + Help: aws.String("The number of surplus credits that have been spent by an unlimited instance when its CPUCreditBalance value is zero"), + OutputName: aws.String("rds_cpu_surplus_credit_balance"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "CPUSurplusCreditsCharged": { + Help: aws.String("The number of spent surplus credits that are not paid down by earned CPU credits, and which thus incur an additional charge"), + OutputName: aws.String("rds_cpu_surplus_credits_charged"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "CPUUtilization": { + Help: aws.String("The percentage of CPU utilization"), + OutputName: aws.String("rds_cpu_utilization"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "DatabaseConnections": { + Help: aws.String("The number of database connections in use"), + OutputName: aws.String("rds_database_connections"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "DBLoad": { + Help: aws.String("The number of active sessions for the DB engine. Typically, you want the data for the average number of active sessions"), + OutputName: aws.String("rds_db_load"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "DBLoadCPU": { + Help: aws.String("The number of active sessions where the wait event type is CPU"), + OutputName: aws.String("rds_db_load_cpu"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "DBLoadNonCPU": { + Help: aws.String("The number of active sessions where the wait event type is not CPU"), + OutputName: aws.String("rds_db_load_non_cpu"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "DiskQueueDepth": { + Help: aws.String("The number of outstanding IOs (read/write requests) waiting to access the disk"), + OutputName: aws.String("rds_disk_queue_depth"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "FreeableMemory": { + Help: aws.String("The amount of available random access memory"), + OutputName: aws.String("rds_freeable_memory"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "FreeStorageSpace": { + Help: aws.String("The amount of available storage space"), + OutputName: aws.String("rds_free_storage_space"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "MaximumUsedTransactionIDs": { + Help: aws.String("The maximum transaction ID that has been used. Applies to PostgreSQL"), + OutputName: aws.String("rds_maximum_used_transaction_ids"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "NetworkReceiveThroughput": { + Help: aws.String("The incoming (Receive) network traffic on the DB instance, including both customer database traffic and Amazon RDS traffic used for monitoring and replication"), + OutputName: aws.String("rds_network_receive_throughput"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "NetworkTransmitThroughput": { + Help: aws.String("The outgoing (Transmit) network traffic on the DB instance, including both customer database traffic and Amazon RDS traffic used for monitoring and replication"), + OutputName: aws.String("rds_network_transmit_throughput"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "OldestReplicationSlotLag": { + Help: aws.String("The lagging size of the replica lagging the most in terms of WAL data received. Applies to PostgreSQL"), + OutputName: aws.String("rds_oldest_replication_slot_lag"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "ReadIOPS": { + Help: aws.String("The average number of disk read I/O operations per second"), + OutputName: aws.String("rds_read_iops"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "ReadLatency": { + Help: aws.String("The amount of time taken per disk I/O operation"), + OutputName: aws.String("rds_read_latency"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "ReadThroughput": { + Help: aws.String("The number of bytes read from disk per second"), + OutputName: aws.String("rds_read_throughput"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "ReplicaLag": { + Help: aws.String("The amount of time a Read Replica DB instance lags behind the source DB instance. Applies to MySQL, MariaDB, and PostgreSQL Read Replicas"), + OutputName: aws.String("rds_replica_lag"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "ReplicationSlotDiskUsage": { + Help: aws.String("The disk space used by replication slot files. Applies to PostgreSQL"), + OutputName: aws.String("rds_replication_slot_disk_usage"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "SwapUsage": { + Help: aws.String("The amount of swap space used on the DB instance. This metric is not available for SQL Server"), + OutputName: aws.String("rds_swap_usage"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "TransactionLogsDiskUsage": { + Help: aws.String("The disk space used by transaction logs. Applies to PostgreSQL"), + OutputName: aws.String("rds_transaction_logs_disk_usage"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "TransactionLogsGeneration": { + Help: aws.String("The size of transaction logs generated per second. Applies to PostgreSQL"), + OutputName: aws.String("rds_transaction_logs_generation"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "WriteIOPS": { + Help: aws.String("The average number of disk write I/O operations per second"), + OutputName: aws.String("rds_write_iops"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "WriteLatency": { + Help: aws.String("The amount of time taken per disk I/O operation"), + OutputName: aws.String("rds_write_latency"), + Dimensions: []*cloudwatch.Dimension{}, + }, + "WriteThroughput": { + Help: aws.String("The number of bytes written to disk per second"), + OutputName: aws.String("rds_write_throughput"), + Dimensions: []*cloudwatch.Dimension{}, + }, } diff --git a/s3/controller.go b/s3/controller.go index 3e25f97..ac539be 100644 --- a/s3/controller.go +++ b/s3/controller.go @@ -27,8 +27,7 @@ func CreateResourceDescription(nd *b.NamespaceDescription, bucket *s3.Bucket) er rd.ID = bucket.Name rd.Name = bucket.Name rd.Type = aws.String("s3") - rd.Parent = nd - rd.BuildQuery() + rd.Region = nd.Parent.Region nd.Mutex.Lock() nd.Resources = append(nd.Resources, &rd) nd.Mutex.Unlock() @@ -44,7 +43,6 @@ func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) error { defer wg.Done() nd.Resources = []*b.ResourceDescription{} - nd.Metrics = GetMetrics() session := s3.New(nd.Parent.Session) input := s3.ListBucketsInput{} result, err := session.ListBuckets(&input) diff --git a/s3/metrics.go b/s3/metrics.go index a74914d..7d1fed8 100644 --- a/s3/metrics.go +++ b/s3/metrics.go @@ -2,18 +2,14 @@ package s3 import ( b "github.com/CoverGenius/cloudwatch-prometheus-exporter/base" - h "github.com/CoverGenius/cloudwatch-prometheus-exporter/helpers" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/cloudwatch" ) -var metrics = map[string]*b.MetricDescription{ +var Metrics = map[string]*b.MetricDescription{ "BucketSizeBytes": { Help: aws.String("The amount of data in bytes stored in a bucket in the STANDARD storage class, INTELLIGENT_TIERING storage class, Standard - Infrequent Access (STANDARD_IA) storage class, OneZone - Infrequent Access (ONEZONE_IA), Reduced Redundancy Storage (RRS) class, Deep Archive Storage (DEEP_ARCHIVE) class or, Glacier (GLACIER) storage class"), OutputName: aws.String("s3_bucket_size_bytes"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 1440, Dimensions: []*cloudwatch.Dimension{ { Name: aws.String("StorageType"), @@ -24,9 +20,6 @@ var metrics = map[string]*b.MetricDescription{ "NumberOfObjects": { Help: aws.String("The total number of objects stored in a bucket for all storage classes except for the GLACIER storage class"), OutputName: aws.String("s3_number_of_objects"), - Data: map[string][]*string{}, - Statistic: h.StringPointers("Average"), - Period: 1440, Dimensions: []*cloudwatch.Dimension{ { Name: aws.String("StorageType"), @@ -35,8 +28,3 @@ var metrics = map[string]*b.MetricDescription{ }, }, } - -// GetMetrics returns a map of MetricDescriptions to be exported for this namespace -func GetMetrics() map[string]*b.MetricDescription { - return metrics -} From 9e37ce78d3570f28c2c7759af062b3e49a217952 Mon Sep 17 00:00:00 2001 From: Andrew Wright Date: Sun, 5 Apr 2020 15:55:15 +1000 Subject: [PATCH 6/8] more wip --- base/controller.go | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/base/controller.go b/base/controller.go index 0043285..7a6b9b0 100644 --- a/base/controller.go +++ b/base/controller.go @@ -2,6 +2,7 @@ package base import ( "fmt" + "regexp" "strings" "sync" "time" @@ -181,7 +182,9 @@ func (rd *RegionDescription) CreateNamespaceDescriptions(metrics map[string][]*M Namespace: aws.String(namespace), Parent: rd, } - nd.Metrics = metrics[namespace] + if mds, ok := metrics[namespace]; ok == true { + nd.Metrics = mds + } rd.Namespaces[namespace] = &nd } @@ -204,13 +207,16 @@ func (rd *RegionDescription) GatherMetrics() { // GatherMetrics queries the Cloudwatch API for metrics related to this AWS namespace in the parent region func (nd *NamespaceDescription) GatherMetrics(cw *cloudwatch.CloudWatch, ndc chan *NamespaceDescription) { + log.Debugf("Namespace %s", *nd.Namespace) for _, md := range nd.Metrics { + // TODO keep this/rename? + met := md go func(md *MetricDescription, ndc chan *NamespaceDescription) { - result, err := md.getData(cw, nd.Resources) + result, err := met.getData(cw, nd.Resources) h.LogError(err) - md.saveData(result) + met.saveData(result) ndc <- nd - }(md, ndc) + }(met, ndc) } } @@ -264,8 +270,14 @@ func (rd *ResourceDescription) BuildDimensions(dd []*DimensionDescription) error func (rd *ResourceDescription) queryID() *string { // Cloudwatch calls need a snake-case-unique-id + // TODO this function needs to be more robust id := strings.ToLower(*rd.ID) - return aws.String(strings.Replace(id, "-", "_", -1)) + // TODO handle the error / make this global + reg, _ := regexp.Compile("[^a-zA-Z0-9]+") + id = reg.ReplaceAllString(id, "_") + //id = strings.ReplaceAll(id, "-", "_") + //id = strings.ReplaceAll(id, ".", "_") + return aws.String(id) } // BuildQuery constructs and saves the cloudwatch query for all the metrics associated with the resource From 5ca299541174c970fcc63b995a5e830aad1b8296 Mon Sep 17 00:00:00 2001 From: Andrew Wright Date: Mon, 6 Apr 2020 09:21:12 +1000 Subject: [PATCH 7/8] bump build version --- build/cloudwatch-prometheus-exporter.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/cloudwatch-prometheus-exporter.spec b/build/cloudwatch-prometheus-exporter.spec index 231599f..fd06b4c 100644 --- a/build/cloudwatch-prometheus-exporter.spec +++ b/build/cloudwatch-prometheus-exporter.spec @@ -5,7 +5,7 @@ Name: cloudwatch-prometheus-exporter -Version: 0.0.10 +Version: 0.0.11 Release: 0%{?dist} Summary: Cloudwatch Prometheus Exporter License: BSD From cc1543ab0c330ce69dc1fb6f3ca311810a898487 Mon Sep 17 00:00:00 2001 From: Andrew Wright Date: Mon, 6 Apr 2020 09:24:01 +1000 Subject: [PATCH 8/8] Remove unused function --- base/config.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/base/config.go b/base/config.go index b54ef6b..b013e52 100644 --- a/base/config.go +++ b/base/config.go @@ -47,13 +47,6 @@ type Config struct { Metrics metric `yaml:"metrics"` // Map of per metric configuration overrides } -// LoadConfig reads the config file located at path and reads it into the Config struct -func LoadConfig(path string) (*Config, error) { - c := Config{} - helpers.YAMLDecode(&path, &c) - return &c, nil -} - func (c *Config) ConstructMetrics(defaults map[string]map[string]*MetricDescription) map[string][]*MetricDescription { mds := make(map[string][]*MetricDescription) for namespace, metrics := range c.Metrics.Data {