diff --git a/base/config.go b/base/config.go index af71914..8cda8fa 100644 --- a/base/config.go +++ b/base/config.go @@ -7,18 +7,18 @@ import ( "github.com/aws/aws-sdk-go/service/cloudwatch" ) -type configMetric struct { +type ConfigMetric struct { AWSMetric string `yaml:"metric"` // The Cloudwatch metric to use Help string `yaml:"help"` // Custom help text for the generated metric Dimensions []*cloudwatch.Dimension `yaml:"dimensions"` // The resource dimensions to generate individual series for (via labels) Statistics []*string `yaml:"statistics"` // List of AWS statistics to use. OutputName string `yaml:"output_name"` // Allows override of the generate metric name - RangeSeconds int64 `yaml:"range_seconds"` // How far back to request data for in seconds. PeriodSeconds int64 `yaml:"period_seconds"` // Granularity of results from cloudwatch API. + RangeSeconds int64 `yaml:"range_seconds"` // How far back to request data for in seconds. } type metric struct { - Data map[string][]*configMetric `yaml:",omitempty,inline"` // Map from namespace to list of metrics to scrape. + Data map[string][]*ConfigMetric `yaml:",omitempty,inline"` // Map from namespace to list of metrics to scrape. } // Config represents the exporter configuration passed which is read at runtime from a YAML file. @@ -40,17 +40,16 @@ type Config struct { } // ConstructMetrics generates a map of MetricDescriptions keyed by CloudWatch namespace using the defaults provided in Config. -func (c *Config) ConstructMetrics(defaults map[string]map[string]*MetricDescription) map[string][]*MetricDescription { +func (c *Config) ConstructMetrics(defaults map[string]map[string]*ConfigMetric) map[string][]*MetricDescription { mds := make(map[string][]*MetricDescription) for namespace, metrics := range c.Metrics.Data { - - if len(metrics) <= 0 { + if len(metrics) == 0 { if namespaceDefaults, ok := defaults[namespace]; ok { for key, defaultMetric := range namespaceDefaults { - metrics = append(metrics, &configMetric{ + metrics = append(metrics, &ConfigMetric{ AWSMetric: key, - OutputName: *defaultMetric.OutputName, - Help: *defaultMetric.Help, + OutputName: defaultMetric.OutputName, + Help: defaultMetric.Help, PeriodSeconds: defaultMetric.PeriodSeconds, RangeSeconds: defaultMetric.RangeSeconds, Dimensions: defaultMetric.Dimensions, @@ -83,24 +82,25 @@ func (c *Config) ConstructMetrics(defaults map[string]map[string]*MetricDescript help := metric.Help if help == "" { if d, ok := defaults[namespace][metric.AWSMetric]; ok { - help = *d.Help + help = d.Help } } // TODO handle dimensions // TODO move metricName function here / apply to output name - // TODO one stat per metric - mds[namespace] = append(mds[namespace], &MetricDescription{ - Help: &help, - OutputName: &name, - Dimensions: metric.Dimensions, - PeriodSeconds: period, - RangeSeconds: rangeSeconds, - Statistic: metric.Statistics, - - Namespace: namespace, - AWSMetric: metric.AWSMetric, - }) + for _, stat := range metric.Statistics { + mds[namespace] = append(mds[namespace], &MetricDescription{ + Help: &help, + OutputName: &name, + Dimensions: metric.Dimensions, + PeriodSeconds: period, + RangeSeconds: rangeSeconds, + Statistic: *stat, + + Namespace: namespace, + AWSMetric: metric.AWSMetric, + }) + } } } return mds diff --git a/base/controller.go b/base/controller.go index 0322835..45f5f7d 100644 --- a/base/controller.go +++ b/base/controller.go @@ -25,7 +25,7 @@ import ( log "github.com/sirupsen/logrus" ) -var alphaRegex, _ = regexp.Compile("[^a-zA-Z0-9]+") +var alphaRegex = regexp.MustCompile("[^a-zA-Z0-9]+") // TagDescription represents an AWS tag key value pair type TagDescription struct { @@ -49,7 +49,7 @@ type MetricDescription struct { Dimensions []*cloudwatch.Dimension PeriodSeconds int64 RangeSeconds int64 - Statistic []*string + Statistic string timestamps map[awsLabels]*time.Time mutex sync.RWMutex @@ -89,9 +89,9 @@ type ResourceDescription struct { Query []*cloudwatch.MetricDataQuery } -func (md *MetricDescription) metricName(stat string) *string { +func (md *MetricDescription) metricName() *string { suffix := "" - switch stat { + switch md.Statistic { case "Average": // For backwards compatibility we have to omit the _avg suffix = "" @@ -120,7 +120,7 @@ func (rd *RegionDescription) BuildARN(s *string, r *string) (string, error) { return a.String(), nil } -func (rd *RegionDescription) buildFilters() error { +func (rd *RegionDescription) saveFilters() { filters := []*ec2.Filter{} for _, tag := range rd.Tags { f := &ec2.Filter{ @@ -130,16 +130,20 @@ func (rd *RegionDescription) buildFilters() error { filters = append(filters, f) } rd.Filters = filters - return nil } func (rd *RegionDescription) saveAccountID() error { session := iam.New(rd.Session) input := iam.GetUserInput{} user, err := session.GetUser(&input) - h.LogError(err) + if err != nil { + return err + } + a, err := arn.Parse(*user.User.Arn) - h.LogError(err) + if err != nil { + return err + } rd.AccountID = &a.AccountID return nil @@ -153,13 +157,16 @@ func (rd *RegionDescription) Init(s *session.Session, td []*TagDescription, metr rd.Tags = td err := rd.saveAccountID() - h.LogErrorExit(err) + if err != nil { + return fmt.Errorf("error saving account id: %s", err) + } - err = rd.buildFilters() - h.LogErrorExit(err) + rd.saveFilters() err = rd.CreateNamespaceDescriptions(metrics) - h.LogErrorExit(err) + if err != nil { + return fmt.Errorf("error creating namespaces: %s", err) + } return nil } @@ -196,9 +203,9 @@ func (nd *NamespaceDescription) GatherMetrics(cw *cloudwatch.CloudWatch) { for _, md := range nd.Metrics { go func(md *MetricDescription) { nd.Mutex.RLock() - result, err := md.getData(cw, nd.Resources, nd) + result, err := md.getData(cw, nd.Resources) nd.Mutex.RUnlock() - h.LogError(err) + h.LogIfError(err) md.saveData(result, *nd.Parent.Region) }(md) } @@ -232,62 +239,55 @@ func (md *MetricDescription) BuildQuery(rds []*ResourceDescription) ([]*cloudwat for _, rd := range rds { dimensions := rd.Dimensions dimensions = append(dimensions, md.Dimensions...) - for _, stat := range md.Statistic { - cm := &cloudwatch.MetricDataQuery{ - Id: rd.queryID(*stat), - MetricStat: &cloudwatch.MetricStat{ - Metric: &cloudwatch.Metric{ - MetricName: &md.AWSMetric, - Namespace: rd.Parent.Namespace, - Dimensions: dimensions, - }, - Stat: stat, - Period: aws.Int64(md.PeriodSeconds), + cm := &cloudwatch.MetricDataQuery{ + Id: rd.queryID(*&md.Statistic), + MetricStat: &cloudwatch.MetricStat{ + Metric: &cloudwatch.Metric{ + MetricName: &md.AWSMetric, + Namespace: rd.Parent.Namespace, + Dimensions: dimensions, }, - // We hardcode the label so that we can rely on the ordering in - // saveData. - Label: aws.String((&awsLabels{*stat, *rd.Name, *rd.ID, *rd.Type, *rd.Parent.Parent.Region}).String()), - ReturnData: aws.Bool(true), - } - query = append(query, cm) + Stat: &md.Statistic, + Period: aws.Int64(md.PeriodSeconds), + }, + // We hardcode the label so that we can rely on the ordering in + // saveData. + Label: aws.String((&awsLabels{*rd.Name, *rd.ID, *rd.Type, *rd.Parent.Parent.Region}).String()), + ReturnData: aws.Bool(true), } + query = append(query, cm) } return query, nil } type awsLabels struct { - statistic string - name string - id string - rType string - region string + name string + id string + rType string + region string } func (l *awsLabels) String() string { - return fmt.Sprintf("%s %s %s %s %s", l.statistic, l.name, l.id, l.rType, l.region) + return fmt.Sprintf("%s %s %s %s", l.name, l.id, l.rType, l.region) } func awsLabelsFromString(s string) (*awsLabels, error) { stringLabels := strings.Split(s, " ") - if len(stringLabels) < 5 { - return nil, fmt.Errorf("expected at least five labels, got %s", s) + if len(stringLabels) < 4 { + return nil, fmt.Errorf("expected at least four labels, got %s", s) } labels := awsLabels{ - statistic: stringLabels[len(stringLabels)-5], - name: stringLabels[len(stringLabels)-4], - id: stringLabels[len(stringLabels)-3], - rType: stringLabels[len(stringLabels)-2], - region: stringLabels[len(stringLabels)-1], + name: stringLabels[len(stringLabels)-4], + id: stringLabels[len(stringLabels)-3], + rType: stringLabels[len(stringLabels)-2], + region: stringLabels[len(stringLabels)-1], } return &labels, nil } func (md *MetricDescription) saveData(c *cloudwatch.GetMetricDataOutput, region string) { - newData := map[string][]*promMetric{} - for _, stat := range md.Statistic { - // pre-allocate in case the last resource for a stat goes away - newData[*stat] = []*promMetric{} - } + newData := []*promMetric{} + for _, data := range c.MetricDataResults { if len(data.Values) <= 0 { continue @@ -295,7 +295,7 @@ func (md *MetricDescription) saveData(c *cloudwatch.GetMetricDataOutput, region labels, err := awsLabelsFromString(*data.Label) if err != nil { - h.LogError(err) + h.LogIfError(err) continue } @@ -305,7 +305,7 @@ func (md *MetricDescription) saveData(c *cloudwatch.GetMetricDataOutput, region } value := 0.0 - switch labels.statistic { + switch md.Statistic { case "Average": value, err = h.Average(values) case "Sum": @@ -317,37 +317,36 @@ func (md *MetricDescription) saveData(c *cloudwatch.GetMetricDataOutput, region case "SampleCount": value, err = h.Sum(values) default: - err = fmt.Errorf("unknown statistic type: %s", labels.statistic) + err = fmt.Errorf("unknown statistic type: %s", md.Statistic) } if err != nil { - h.LogError(err) + h.LogIfError(err) continue } - newData[labels.statistic] = append(newData[labels.statistic], &promMetric{value, []string{labels.name, labels.id, labels.rType, labels.region}}) + newData = append(newData, &promMetric{value, []string{labels.name, labels.id, labels.rType, labels.region}}) } - for stat, data := range newData { - name := *md.metricName(stat) - opts := prometheus.Opts{ - Name: name, - Help: *md.Help, - } - labels := []string{"name", "id", "type", "region"} - - exporter.mutex.Lock() - if _, ok := exporter.data[name+region]; !ok { - if stat == "Sum" { - exporter.data[name+region] = NewBatchCounterVec(opts, labels) - } else { - exporter.data[name+region] = NewBatchGaugeVec(opts, labels) - } - } - exporter.mutex.Unlock() - exporter.mutex.RLock() - exporter.data[name+region].BatchUpdate(data) - exporter.mutex.RUnlock() + name := *md.metricName() + opts := prometheus.Opts{ + Name: name, + Help: *md.Help, } + labels := []string{"name", "id", "type", "region"} + + exporter.mutex.Lock() + if _, ok := exporter.data[name+region]; !ok { + if md.Statistic == "Sum" { + exporter.data[name+region] = NewBatchCounterVec(opts, labels) + } else { + exporter.data[name+region] = NewBatchGaugeVec(opts, labels) + } + } + exporter.mutex.Unlock() + + exporter.mutex.RLock() + exporter.data[name+region].BatchUpdate(newData) + exporter.mutex.RUnlock() } func (md *MetricDescription) filterValues(data *cloudwatch.MetricDataResult, labels *awsLabels) []*float64 { @@ -355,7 +354,7 @@ func (md *MetricDescription) filterValues(data *cloudwatch.MetricDataResult, lab // already been added to the counter, otherwise if the poll intervals // overlap we will double count some data. values := data.Values - if labels.statistic == "Sum" { + if md.Statistic == "Sum" { md.mutex.Lock() defer md.mutex.Unlock() if md.timestamps == nil { @@ -448,12 +447,12 @@ func (rd *RegionDescription) TagsFound(tl interface{}) bool { return false } -func (md *MetricDescription) getData(cw *cloudwatch.CloudWatch, rds []*ResourceDescription, nd *NamespaceDescription) (*cloudwatch.GetMetricDataOutput, error) { +func (md *MetricDescription) getData(cw *cloudwatch.CloudWatch, rds []*ResourceDescription) (*cloudwatch.GetMetricDataOutput, error) { query, err := md.BuildQuery(rds) - if len(query) < 1 { + if len(query) == 0 { return &cloudwatch.GetMetricDataOutput{}, nil } - h.LogError(err) + h.LogIfError(err) end := time.Now().Round(5 * time.Minute) start := end.Add(-time.Duration(md.RangeSeconds) * time.Second) @@ -464,7 +463,7 @@ func (md *MetricDescription) getData(cw *cloudwatch.CloudWatch, rds []*ResourceD MetricDataQueries: query, } result, err := cw.GetMetricData(&input) - h.LogError(err) + h.LogIfError(err) return result, err } diff --git a/base/prometheus.go b/base/prometheus.go index ecd3680..833b6cd 100644 --- a/base/prometheus.go +++ b/base/prometheus.go @@ -132,5 +132,4 @@ func NewBatchCounterVec(opts prometheus.Opts, labels []string) *BatchCounterVec labels, ), } - } diff --git a/ec2/controller.go b/ec2/controller.go index c96ce93..2ec22bd 100644 --- a/ec2/controller.go +++ b/ec2/controller.go @@ -11,7 +11,7 @@ import ( "github.com/aws/aws-sdk-go/service/ec2" ) -func CreateResourceDescription(nd *b.NamespaceDescription, instance *ec2.Instance) error { +func createResourceDescription(nd *b.NamespaceDescription, instance *ec2.Instance) (*b.ResourceDescription, error) { rd := b.ResourceDescription{} dd := []*b.DimensionDescription{ { @@ -19,8 +19,9 @@ func CreateResourceDescription(nd *b.NamespaceDescription, instance *ec2.Instanc Value: instance.InstanceId, }, } - err := rd.BuildDimensions(dd) - h.LogError(err) + if err := rd.BuildDimensions(dd); err != nil { + return nil, err + } tags := make(map[string]*string) for _, t := range instance.Tags { @@ -34,29 +35,32 @@ func CreateResourceDescription(nd *b.NamespaceDescription, instance *ec2.Instanc } rd.Type = aws.String("ec2") rd.Parent = nd - nd.Resources = append(nd.Resources, &rd) - return nil + return &rd, nil } // CreateResourceList fetches a list of all EC2 instances in the parent region -func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) error { +func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) { defer wg.Done() log.Debug("Creating EC2 resource list ...") - nd.Resources = []*b.ResourceDescription{} session := ec2.New(nd.Parent.Session) input := ec2.DescribeInstancesInput{ Filters: nd.Parent.Filters, } result, err := session.DescribeInstances(&input) - h.LogError(err) + h.LogIfError(err) + resources := []*b.ResourceDescription{} for _, reservation := range result.Reservations { for _, instance := range reservation.Instances { - err := CreateResourceDescription(nd, instance) - h.LogError(err) + if r, err := createResourceDescription(nd, instance); err == nil { + resources = append(resources, r) + } + h.LogIfError(err) } } - return nil + nd.Mutex.Lock() + nd.Resources = resources + nd.Mutex.Unlock() } diff --git a/ec2/metrics.go b/ec2/metrics.go index 8b587de..576a622 100644 --- a/ec2/metrics.go +++ b/ec2/metrics.go @@ -3,163 +3,162 @@ package ec2 import ( b "github.com/CoverGenius/cloudwatch-prometheus-exporter/base" h "github.com/CoverGenius/cloudwatch-prometheus-exporter/helpers" - "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/cloudwatch" ) -// Metrics is a map of default MetricDescriptions for this namespace -var Metrics = map[string]*b.MetricDescription{ +// Metrics is a map of default ConfigMetrics for this namespace +var Metrics = map[string]*b.ConfigMetric{ "CPUCreditBalance": { - Help: aws.String("The number of earned CPU credits that an instance has accrued since it was launched or started"), - OutputName: aws.String("ec2_cpu_credit_balance"), - Statistic: h.StringPointers("Average"), + Help: ("The number of earned CPU credits that an instance has accrued since it was launched or started"), + OutputName: ("ec2_cpu_credit_balance"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "CPUCreditUsage": { - Help: aws.String("The number of CPU credits spent by the instance for CPU utilization"), - OutputName: aws.String("ec2_cpu_credit_usage"), - Statistic: h.StringPointers("Average"), + Help: ("The number of CPU credits spent by the instance for CPU utilization"), + OutputName: ("ec2_cpu_credit_usage"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "CPUSurplusCreditBalance": { - Help: aws.String("The number of surplus credits that have been spent by an unlimited instance when its CPUCreditBalance value is zero"), - OutputName: aws.String("ec2_cpu_surplus_credit_balance"), - Statistic: h.StringPointers("Average"), + Help: ("The number of surplus credits that have been spent by an unlimited instance when its CPUCreditBalance value is zero"), + OutputName: ("ec2_cpu_surplus_credit_balance"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "CPUSurplusCreditsCharged": { - Help: aws.String("The number of spent surplus credits that are not paid down by earned CPU credits, and which thus incur an additional charge"), - OutputName: aws.String("ec2_cpu_surplus_credits_charged"), - Statistic: h.StringPointers("Average"), + Help: ("The number of spent surplus credits that are not paid down by earned CPU credits, and which thus incur an additional charge"), + OutputName: ("ec2_cpu_surplus_credits_charged"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "CPUUtilization": { - Help: aws.String("The percentage of allocated EC2 compute units that are currently in use on the instance"), - OutputName: aws.String("ec2_cpu_utilization"), - Statistic: h.StringPointers("Average"), + Help: ("The percentage of allocated EC2 compute units that are currently in use on the instance"), + OutputName: ("ec2_cpu_utilization"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "DiskReadBytes": { - Help: aws.String("Bytes read from all instance store volumes available to the instance"), - OutputName: aws.String("ec2_disk_read_bytes"), - Statistic: h.StringPointers("Average"), + Help: ("Bytes read from all instance store volumes available to the instance"), + OutputName: ("ec2_disk_read_bytes"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "DiskReadOps": { - Help: aws.String("Completed read operations from all instance store volumes available to the instance in a specified period of time"), - OutputName: aws.String("ec2_disk_read_ops"), - Statistic: h.StringPointers("Average"), + Help: ("Completed read operations from all instance store volumes available to the instance in a specified period of time"), + OutputName: ("ec2_disk_read_ops"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "DiskWriteBytes": { - Help: aws.String("Bytes written to all instance store volumes available to the instance"), - OutputName: aws.String("ec2_disk_write_bytes"), - Statistic: h.StringPointers("Average"), + Help: ("Bytes written to all instance store volumes available to the instance"), + OutputName: ("ec2_disk_write_bytes"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "DiskWriteOps": { - Help: aws.String("Completed write operations to all instance store volumes available to the instance in a specified period of time"), - OutputName: aws.String("ec2_disk_write_ops"), - Statistic: h.StringPointers("Average"), + Help: ("Completed write operations to all instance store volumes available to the instance in a specified period of time"), + OutputName: ("ec2_disk_write_ops"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "EBSByteBalance": { - Help: aws.String("Available only for the smaller instance sizes. Provides information about the percentage of throughput credits remaining in the burst bucket"), - OutputName: aws.String("ec2_ebs_byte_balance"), - Statistic: h.StringPointers("Average"), + Help: ("Available only for the smaller instance sizes. Provides information about the percentage of throughput credits remaining in the burst bucket"), + OutputName: ("ec2_ebs_byte_balance"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "EBSIOBalance": { - Help: aws.String("Available only for the smaller instance sizes. Provides information about the percentage of I/O credits remaining in the burst bucket"), - OutputName: aws.String("ec2_ebs_io_balance"), - Statistic: h.StringPointers("Average"), + Help: ("Available only for the smaller instance sizes. Provides information about the percentage of I/O credits remaining in the burst bucket"), + OutputName: ("ec2_ebs_io_balance"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "EBSReadBytes": { - Help: aws.String("Bytes read from all EBS volumes attached to the instance in a specified period of time"), - OutputName: aws.String("ec2_ebs_read_bytes"), - Statistic: h.StringPointers("Average"), + Help: ("Bytes read from all EBS volumes attached to the instance in a specified period of time"), + OutputName: ("ec2_ebs_read_bytes"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "EBSReadOps": { - Help: aws.String("Completed read operations from all Amazon EBS volumes attached to the instance in a specified period of time"), - OutputName: aws.String("ec2_ebs_read_ops"), - Statistic: h.StringPointers("Average"), + Help: ("Completed read operations from all Amazon EBS volumes attached to the instance in a specified period of time"), + OutputName: ("ec2_ebs_read_ops"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "EBSWriteBytes": { - Help: aws.String("Bytes written to all EBS volumes attached to the instance in a specified period of time"), - OutputName: aws.String("ec2_ebs_write_bytes"), - Statistic: h.StringPointers("Average"), + Help: ("Bytes written to all EBS volumes attached to the instance in a specified period of time"), + OutputName: ("ec2_ebs_write_bytes"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "EBSWriteOps": { - Help: aws.String("Completed write operations to all EBS volumes attached to the instance in a specified period of time"), - OutputName: aws.String("ec2_ebs_write_ops"), - Statistic: h.StringPointers("Average"), + Help: ("Completed write operations to all EBS volumes attached to the instance in a specified period of time"), + OutputName: ("ec2_ebs_write_ops"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "NetworkIn": { - Help: aws.String("The number of bytes received on all network interfaces by the instance"), - OutputName: aws.String("ec2_network_in"), - Statistic: h.StringPointers("Average"), + Help: ("The number of bytes received on all network interfaces by the instance"), + OutputName: ("ec2_network_in"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "NetworkOut": { - Help: aws.String("The number of bytes sent out on all network interfaces by the instance"), - OutputName: aws.String("ec2_network_out"), - Statistic: h.StringPointers("Average"), + Help: ("The number of bytes sent out on all network interfaces by the instance"), + OutputName: ("ec2_network_out"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "NetworkPacketsIn": { - Help: aws.String("The number of packets received on all network interfaces by the instance"), - OutputName: aws.String("ec2_network_packets_in"), - Statistic: h.StringPointers("Average"), + Help: ("The number of packets received on all network interfaces by the instance"), + OutputName: ("ec2_network_packets_in"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "NetworkPacketsOut": { - Help: aws.String("The number of packets sent out on all network interfaces by the instance"), - OutputName: aws.String("ec2_network_packets_out"), - Statistic: h.StringPointers("Average"), + Help: ("The number of packets sent out on all network interfaces by the instance"), + OutputName: ("ec2_network_packets_out"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "StatusCheckFailed": { - Help: aws.String("Reports whether the instance has passed both the instance status check and the system status check in the last minute"), - OutputName: aws.String("ec2_status_check_failed"), - Statistic: h.StringPointers("Average"), + Help: ("Reports whether the instance has passed both the instance status check and the system status check in the last minute"), + OutputName: ("ec2_status_check_failed"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "StatusCheckFailed_Instance": { - Help: aws.String("Reports whether the instance has passed the instance status check in the last minute"), - OutputName: aws.String("ec2_status_check_failed_instance"), - Statistic: h.StringPointers("Average"), + Help: ("Reports whether the instance has passed the instance status check in the last minute"), + OutputName: ("ec2_status_check_failed_instance"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "StatusCheckFailed_System": { - Help: aws.String("Reports whether the instance has passed the system status check in the last minute"), - OutputName: aws.String("ec2_status_check_failed_system"), - Statistic: h.StringPointers("Average"), + Help: ("Reports whether the instance has passed the system status check in the last minute"), + OutputName: ("ec2_status_check_failed_system"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, diff --git a/elasticache/controller.go b/elasticache/controller.go index ce64ba4..21b481b 100644 --- a/elasticache/controller.go +++ b/elasticache/controller.go @@ -12,7 +12,7 @@ import ( "github.com/aws/aws-sdk-go/service/elasticache" ) -func CreateResourceDescription(nd *b.NamespaceDescription, cc *elasticache.CacheCluster) (*b.ResourceDescription, error) { +func createResourceDescription(nd *b.NamespaceDescription, cc *elasticache.CacheCluster) (*b.ResourceDescription, error) { rd := b.ResourceDescription{} dd := []*b.DimensionDescription{ { @@ -21,7 +21,7 @@ func CreateResourceDescription(nd *b.NamespaceDescription, cc *elasticache.Cache }, } err := rd.BuildDimensions(dd) - h.LogError(err) + h.LogIfError(err) rd.ID = cc.CacheClusterId rd.Name = cc.CacheClusterId rd.Type = aws.String("elasticache") @@ -31,48 +31,49 @@ func CreateResourceDescription(nd *b.NamespaceDescription, cc *elasticache.Cache } // CreateResourceList fetches a list of all Elasticache clusters in the parent region -func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) error { +func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) { defer wg.Done() log.Debug("Creating Elasticache resource list ...") - resources := []*b.ResourceDescription{} session := elasticache.New(nd.Parent.Session) input := elasticache.DescribeCacheClustersInput{} result, err := session.DescribeCacheClusters(&input) - h.LogError(err) + h.LogIfError(err) service := "elasticache" var w sync.WaitGroup w.Add(len(result.CacheClusters)) - ch := make(chan (*b.ResourceDescription), len(result.CacheClusters)) + ch := make(chan *b.ResourceDescription, len(result.CacheClusters)) for _, cc := range result.CacheClusters { go func(cc *elasticache.CacheCluster, wg *sync.WaitGroup) { defer wg.Done() + resource := strings.Join([]string{"cluster", *cc.CacheClusterId}, ":") arn, err := nd.Parent.BuildARN(&service, &resource) - h.LogError(err) + h.LogIfError(err) + input := elasticache.ListTagsForResourceInput{ ResourceName: aws.String(arn), } tags, err := session.ListTagsForResource(&input) - h.LogError(err) + h.LogIfError(err) if nd.Parent.TagsFound(tags) { - if r, err := CreateResourceDescription(nd, cc); err == nil { + if r, err := createResourceDescription(nd, cc); err == nil { ch <- r } - h.LogError(err) + h.LogIfError(err) } }(cc, &w) } w.Wait() close(ch) + + resources := []*b.ResourceDescription{} for r := range ch { resources = append(resources, r) } - nd.Mutex.Lock() nd.Resources = resources nd.Mutex.Unlock() - return nil } diff --git a/elasticache/metrics.go b/elasticache/metrics.go index 650f4fc..31cd3e2 100644 --- a/elasticache/metrics.go +++ b/elasticache/metrics.go @@ -3,212 +3,211 @@ package elasticache import ( b "github.com/CoverGenius/cloudwatch-prometheus-exporter/base" h "github.com/CoverGenius/cloudwatch-prometheus-exporter/helpers" - "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/cloudwatch" ) // Metrics is a map of default MetricDescriptions for this namespace -var Metrics = map[string]*b.MetricDescription{ +var Metrics = map[string]*b.ConfigMetric{ "ActiveDefragHits": { - Help: aws.String("The number of value reallocations per minute performed by the active defragmentation process"), - OutputName: aws.String("elasticache_active_defrag_hits"), - Statistic: h.StringPointers("Average"), + Help: ("The number of value reallocations per minute performed by the active defragmentation process"), + OutputName: ("elasticache_active_defrag_hits"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "BytesUsedForCache": { - Help: aws.String("The total number of bytes allocated by Redis for all purposes, including the dataset, buffers, etc"), - OutputName: aws.String("elasticache_bytes_used_for_cache"), - Statistic: h.StringPointers("Average"), + Help: ("The total number of bytes allocated by Redis for all purposes, including the dataset, buffers, etc"), + OutputName: ("elasticache_bytes_used_for_cache"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "CacheHits": { - Help: aws.String("The number of successful read-only key lookups in the main dictionary"), - OutputName: aws.String("elasticache_cache_hits"), - Statistic: h.StringPointers("Average"), + Help: ("The number of successful read-only key lookups in the main dictionary"), + OutputName: ("elasticache_cache_hits"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "CacheMisses": { - Help: aws.String("The number of unsuccessful read-only key lookups in the main dictionary"), - OutputName: aws.String("elasticache_cache_misses"), - Statistic: h.StringPointers("Average"), + Help: ("The number of unsuccessful read-only key lookups in the main dictionary"), + OutputName: ("elasticache_cache_misses"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "CPUUtilization": { - Help: aws.String("The percentage of CPU utilization"), - OutputName: aws.String("elasticache_cpu_utilization"), - Statistic: h.StringPointers("Average"), + Help: ("The percentage of CPU utilization"), + OutputName: ("elasticache_cpu_utilization"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "CurrConnections": { - Help: aws.String("The number of client connections, excluding connections from read replicas. ElastiCache uses two to three of the connections to monitor the cluster in each case"), - OutputName: aws.String("elasticache_curr_connections"), - Statistic: h.StringPointers("Average"), + Help: ("The number of client connections, excluding connections from read replicas. ElastiCache uses two to three of the connections to monitor the cluster in each case"), + OutputName: ("elasticache_curr_connections"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "CurrItems": { - Help: aws.String("The number of items in the cache"), - OutputName: aws.String("elasticache_curr_items"), - Statistic: h.StringPointers("Average"), + Help: ("The number of items in the cache"), + OutputName: ("elasticache_curr_items"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "EngineCPUUtilization": { - Help: aws.String("Provides CPU utilization of the Redis engine thread. Since Redis is single-threaded, you can use this metric to analyze the load of the Redis process itself"), - OutputName: aws.String("elasticache_engine_cpu_utilization"), - Statistic: h.StringPointers("Average"), + Help: ("Provides CPU utilization of the Redis engine thread. Since Redis is single-threaded, you can use this metric to analyze the load of the Redis process itself"), + OutputName: ("elasticache_engine_cpu_utilization"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "Evictions": { - Help: aws.String("The number of keys that have been evicted due to the maxmemory limit"), - OutputName: aws.String("elasticache_evictions"), - Statistic: h.StringPointers("Average"), + Help: ("The number of keys that have been evicted due to the maxmemory limit"), + OutputName: ("elasticache_evictions"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "FreeableMemory": { - Help: aws.String("The amount of free memory available on the host"), - OutputName: aws.String("elasticache_freeable_memory"), - Statistic: h.StringPointers("Average"), + Help: ("The amount of free memory available on the host"), + OutputName: ("elasticache_freeable_memory"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "GetTypeCmds": { - Help: aws.String("The total number of read-only type commands"), - OutputName: aws.String("elasticache_get_type_cmds"), - Statistic: h.StringPointers("Average"), + Help: ("The total number of read-only type commands"), + OutputName: ("elasticache_get_type_cmds"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "IsMaster": { - Help: aws.String("Returns 1 in case if node is master"), - OutputName: aws.String("elasticache_is_master"), - Statistic: h.StringPointers("Average"), + Help: ("Returns 1 in case if node is master"), + OutputName: ("elasticache_is_master"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "KeyBasedCmds": { - Help: aws.String("The total number of commands that are key-based"), - OutputName: aws.String("elasticache_key_based_cmds"), - Statistic: h.StringPointers("Average"), + Help: ("The total number of commands that are key-based"), + OutputName: ("elasticache_key_based_cmds"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "ListBasedCmds": { - Help: aws.String("The total number of commands that are list-based"), - OutputName: aws.String("elasticache_list_based_cmds"), - Statistic: h.StringPointers("Average"), + Help: ("The total number of commands that are list-based"), + OutputName: ("elasticache_list_based_cmds"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "MasterLinkHealthStatus": { - Help: aws.String("This status has two values: 0 or 1. The value 0 indicates that data in the Elasticache primary node is not in sync with Redis on EC2. The value of 1 indicates that the data is in sync"), - OutputName: aws.String("elasticache_master_link_health_status"), - Statistic: h.StringPointers("Average"), + Help: ("This status has two values: 0 or 1. The value 0 indicates that data in the Elasticache primary node is not in sync with Redis on EC2. The value of 1 indicates that the data is in sync"), + OutputName: ("elasticache_master_link_health_status"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "NetworkBytesIn": { - Help: aws.String("The number of bytes the host has read from the network"), - OutputName: aws.String("elasticache_network_bytes_in"), - Statistic: h.StringPointers("Average"), + Help: ("The number of bytes the host has read from the network"), + OutputName: ("elasticache_network_bytes_in"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "NetworkBytesOut": { - Help: aws.String("The number of bytes the host has written to the network"), - OutputName: aws.String("elasticache_network_bytes_out"), - Statistic: h.StringPointers("Average"), + Help: ("The number of bytes the host has written to the network"), + OutputName: ("elasticache_network_bytes_out"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "NetworkPacketsIn": { - Help: aws.String("The number of packets received on all network interfaces by the instance. This metric identifies the volume of incoming traffic in terms of the number of packets on a single instance"), - OutputName: aws.String("elasticache_network_packets_in"), - Statistic: h.StringPointers("Average"), + Help: ("The number of packets received on all network interfaces by the instance. This metric identifies the volume of incoming traffic in terms of the number of packets on a single instance"), + OutputName: ("elasticache_network_packets_in"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "NetworkPacketsOut": { - Help: aws.String("The number of packets sent out on all network interfaces by the instance. This metric identifies the volume of outgoing traffic in terms of the number of packets on a single instance"), - OutputName: aws.String("elasticache_network_packets_out"), - Statistic: h.StringPointers("Average"), + Help: ("The number of packets sent out on all network interfaces by the instance. This metric identifies the volume of outgoing traffic in terms of the number of packets on a single instance"), + OutputName: ("elasticache_network_packets_out"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "NewConnections": { - Help: aws.String("The total number of connections that have been accepted by the server during this period"), - OutputName: aws.String("elasticache_new_connections"), - Statistic: h.StringPointers("Average"), + Help: ("The total number of connections that have been accepted by the server during this period"), + OutputName: ("elasticache_new_connections"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "Reclaimed": { - Help: aws.String("The total number of key expiration events"), - OutputName: aws.String("elasticache_reclaimed"), - Statistic: h.StringPointers("Average"), + Help: ("The total number of key expiration events"), + OutputName: ("elasticache_reclaimed"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "ReplicationBytes": { - Help: aws.String("For nodes in a replicated configuration, ReplicationBytes reports the number of bytes that the primary is sending to all of its replicas. This metric is representative of the write load on the replication group"), - OutputName: aws.String("elasticache_replication_bytes"), - Statistic: h.StringPointers("Average"), + Help: ("For nodes in a replicated configuration, ReplicationBytes reports the number of bytes that the primary is sending to all of its replicas. This metric is representative of the write load on the replication group"), + OutputName: ("elasticache_replication_bytes"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "ReplicationLag": { - Help: aws.String("This metric is only applicable for a node running as a read replica. It represents how far behind, in seconds, the replica is in applying changes from the primary node"), - OutputName: aws.String("elasticache_replication_lag"), - Statistic: h.StringPointers("Average"), + Help: ("This metric is only applicable for a node running as a read replica. It represents how far behind, in seconds, the replica is in applying changes from the primary node"), + OutputName: ("elasticache_replication_lag"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "SaveInProgress": { - Help: aws.String("This binary metric returns 1 whenever a background save (forked or forkless) is in progress, and 0 otherwise. A background save process is typically used during snapshots and syncs. These operations can cause degraded performance"), - OutputName: aws.String("elasticache_save_in_progress"), - Statistic: h.StringPointers("Average"), + Help: ("This binary metric returns 1 whenever a background save (forked or forkless) is in progress, and 0 otherwise. A background save process is typically used during snapshots and syncs. These operations can cause degraded performance"), + OutputName: ("elasticache_save_in_progress"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "SetBasedCmds": { - Help: aws.String("The total number of commands that are set-based"), - OutputName: aws.String("elasticache_set_based_cmds"), - Statistic: h.StringPointers("Average"), + Help: ("The total number of commands that are set-based"), + OutputName: ("elasticache_set_based_cmds"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "SetTypeCmds": { - Help: aws.String("The total number of write types of commands"), - OutputName: aws.String("elasticache_set_type_cmds"), - Statistic: h.StringPointers("Average"), + Help: ("The total number of write types of commands"), + OutputName: ("elasticache_set_type_cmds"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "SortedSetBasedCmds": { - Help: aws.String("The total number of commands that are sorted set-based"), - OutputName: aws.String("elasticache_sorted_set_based_cmds"), - Statistic: h.StringPointers("Average"), + Help: ("The total number of commands that are sorted set-based"), + OutputName: ("elasticache_sorted_set_based_cmds"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "StringBasedCmds": { - Help: aws.String("The total number of commands that are string-based"), - OutputName: aws.String("elasticache_string_based_cmds"), - Statistic: h.StringPointers("Average"), + Help: ("The total number of commands that are string-based"), + OutputName: ("elasticache_string_based_cmds"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "SwapUsage": { - Help: aws.String("The amount of swap used on the host"), - OutputName: aws.String("elasticache_swap_usage"), - Statistic: h.StringPointers("Average"), + Help: ("The amount of swap used on the host"), + OutputName: ("elasticache_swap_usage"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, diff --git a/elb/controller.go b/elb/controller.go index 886984c..c6feec6 100644 --- a/elb/controller.go +++ b/elb/controller.go @@ -11,7 +11,7 @@ import ( "github.com/aws/aws-sdk-go/service/elb" ) -func CreateResourceDescription(nd *b.NamespaceDescription, td *elb.TagDescription) error { +func createResourceDescription(nd *b.NamespaceDescription, td *elb.TagDescription) (*b.ResourceDescription, error) { rd := b.ResourceDescription{} dd := []*b.DimensionDescription{ { @@ -20,47 +20,50 @@ func CreateResourceDescription(nd *b.NamespaceDescription, td *elb.TagDescriptio }, } err := rd.BuildDimensions(dd) - h.LogError(err) + h.LogIfError(err) rd.ID = td.LoadBalancerName rd.Name = td.LoadBalancerName rd.Type = aws.String("lb-classic") rd.Parent = nd - nd.Resources = append(nd.Resources, &rd) - return nil + return &rd, nil } // CreateResourceList fetches a list of all Classic LB resources in the region -func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) error { +func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) { defer wg.Done() log.Debug("Creating Classic LB resource list ...") - nd.Resources = []*b.ResourceDescription{} session := elb.New(nd.Parent.Session) input := elb.DescribeLoadBalancersInput{} result, err := session.DescribeLoadBalancers(&input) - h.LogError(err) + h.LogIfError(err) resourceList := []*string{} for _, lb := range result.LoadBalancerDescriptions { resourceList = append(resourceList, lb.LoadBalancerName) } if len(resourceList) <= 0 { - return nil + return } dti := elb.DescribeTagsInput{ LoadBalancerNames: resourceList, } tags, err := session.DescribeTags(&dti) - h.LogError(err) + h.LogIfError(err) + resources := []*b.ResourceDescription{} for _, td := range tags.TagDescriptions { if nd.Parent.TagsFound(td) { - err := CreateResourceDescription(nd, td) - h.LogError(err) + if r, err := createResourceDescription(nd, td); err == nil { + resources = append(resources, r) + } + h.LogIfError(err) } else { continue } } - return nil + nd.Mutex.Lock() + nd.Resources = resources + nd.Mutex.Unlock() } diff --git a/elb/metrics.go b/elb/metrics.go index 0daa3ae..05564ac 100644 --- a/elb/metrics.go +++ b/elb/metrics.go @@ -3,114 +3,113 @@ package elb import ( b "github.com/CoverGenius/cloudwatch-prometheus-exporter/base" h "github.com/CoverGenius/cloudwatch-prometheus-exporter/helpers" - "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/cloudwatch" ) // Metrics is a map of default MetricDescriptions for this namespace -var Metrics = map[string]*b.MetricDescription{ +var Metrics = map[string]*b.ConfigMetric{ "BackendConnectionErrors": { - Help: aws.String("The number of connections that were not successfully established between the load balancer and the registered instances. Because the load balancer retries the connection when there are errors, this count can exceed the request rate. Note that this count also includes any connection errors related to health checks"), - OutputName: aws.String("clb_backend_connection_errors"), - Statistic: h.StringPointers("Average"), + Help: ("The number of connections that were not successfully established between the load balancer and the registered instances. Because the load balancer retries the connection when there are errors, this count can exceed the request rate. Note that this count also includes any connection errors related to health checks"), + OutputName: ("clb_backend_connection_errors"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "EstimatedALBActiveConnectionCount": { - Help: aws.String("The estimated number of concurrent TCP connections active from clients to the load balancer and from the load balancer to targets"), - OutputName: aws.String("clb_estimated_alb_active_connection_count"), - Statistic: h.StringPointers("Average"), + Help: ("The estimated number of concurrent TCP connections active from clients to the load balancer and from the load balancer to targets"), + OutputName: ("clb_estimated_alb_active_connection_count"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "EstimatedALBConsumedLCUs": { - Help: aws.String("The estimated number of load balancer capacity units (LCU) used by an Application Load Balancer"), - OutputName: aws.String("clb_estimated_alb_consumed_lcus"), - Statistic: h.StringPointers("Average"), + Help: ("The estimated number of load balancer capacity units (LCU) used by an Application Load Balancer"), + OutputName: ("clb_estimated_alb_consumed_lcus"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "EstimatedALBNewConnectionCount": { - Help: aws.String("The estimated number of new TCP connections established from clients to the load balancer and from the load balancer to targets"), - OutputName: aws.String("clb_estimated_alb_new_connection_count"), - Statistic: h.StringPointers("Average"), + Help: ("The estimated number of new TCP connections established from clients to the load balancer and from the load balancer to targets"), + OutputName: ("clb_estimated_alb_new_connection_count"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "EstimatedProcessedBytes": { - Help: aws.String("The estimated number of bytes processed by an Application Load Balancer"), - OutputName: aws.String("clb_estimated_processed_bytes"), - Statistic: h.StringPointers("Average"), + Help: ("The estimated number of bytes processed by an Application Load Balancer"), + OutputName: ("clb_estimated_processed_bytes"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "HealthyHostCount": { - Help: aws.String("The number of healthy instances registered with your load balancer. A newly registered instance is considered healthy after it passes the first health check"), - OutputName: aws.String("clb_healthy_host_count"), - Statistic: h.StringPointers("Average"), + Help: ("The number of healthy instances registered with your load balancer. A newly registered instance is considered healthy after it passes the first health check"), + OutputName: ("clb_healthy_host_count"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_Backend_2XX": { - Help: aws.String("The number of HTTP response codes generated by registered instances. This count does not include any response codes generated by the load balancer"), - OutputName: aws.String("clb_httpcode_backend_2xx"), - Statistic: h.StringPointers("Average"), + Help: ("The number of HTTP response codes generated by registered instances. This count does not include any response codes generated by the load balancer"), + OutputName: ("clb_httpcode_backend_2xx"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_Backend_3XX": { - Help: aws.String("The number of HTTP response codes generated by registered instances. This count does not include any response codes generated by the load balancer"), - OutputName: aws.String("clb_httpcode_backend_3xx"), - Statistic: h.StringPointers("Average"), + Help: ("The number of HTTP response codes generated by registered instances. This count does not include any response codes generated by the load balancer"), + OutputName: ("clb_httpcode_backend_3xx"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_Backend_4XX": { - Help: aws.String("The number of HTTP response codes generated by registered instances. This count does not include any response codes generated by the load balancer"), - OutputName: aws.String("clb_httpcode_backend_4xx"), - Statistic: h.StringPointers("Average"), + Help: ("The number of HTTP response codes generated by registered instances. This count does not include any response codes generated by the load balancer"), + OutputName: ("clb_httpcode_backend_4xx"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_Backend_5XX": { - Help: aws.String("The number of HTTP response codes generated by registered instances. This count does not include any response codes generated by the load balancer"), - OutputName: aws.String("clb_httpcode_backend_5xx"), - Statistic: h.StringPointers("Average"), + Help: ("The number of HTTP response codes generated by registered instances. This count does not include any response codes generated by the load balancer"), + OutputName: ("clb_httpcode_backend_5xx"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_ELB_5XX": { - Help: aws.String("The number of HTTP 5XX server error codes generated by the load balancer. This count does not include any response codes generated by the registered instances. The metric is reported if there are no healthy instances registered to the load balancer, or if the request rate exceeds the capacity of the instances (spillover) or the load balancer"), - OutputName: aws.String("clb_httpcode_elb_5xx"), - Statistic: h.StringPointers("Average"), + Help: ("The number of HTTP 5XX server error codes generated by the load balancer. This count does not include any response codes generated by the registered instances. The metric is reported if there are no healthy instances registered to the load balancer, or if the request rate exceeds the capacity of the instances (spillover) or the load balancer"), + OutputName: ("clb_httpcode_elb_5xx"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "Latency": { - Help: aws.String("[HTTP listener] The total time elapsed, in seconds, from the time the load balancer sent the request to a registered instance until the instance started to send the response headers.[TCP listener] The total time elapsed, in seconds, for the load balancer to successfully establish a connection to a registered instance"), - OutputName: aws.String("clb_latency"), - Statistic: h.StringPointers("Average"), + Help: ("[HTTP listener] The total time elapsed, in seconds, from the time the load balancer sent the request to a registered instance until the instance started to send the response headers.[TCP listener] The total time elapsed, in seconds, for the load balancer to successfully establish a connection to a registered instance"), + OutputName: ("clb_latency"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "RequestCount": { - Help: aws.String("The number of requests completed or connections made during the specified interval (1 or 5 minutes)"), - OutputName: aws.String("clb_request_count"), - Statistic: h.StringPointers("Average"), + Help: ("The number of requests completed or connections made during the specified interval (1 or 5 minutes)"), + OutputName: ("clb_request_count"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "SurgeQueueLength": { - Help: aws.String("The total number of requests (HTTP listener) or connections (TCP listener) that are pending routing to a healthy instance. The maximum size of the queue is 1,024. Additional requests or connections are rejected when the queue is full"), - OutputName: aws.String("clb_surge_queue_length"), - Statistic: h.StringPointers("Average"), + Help: ("The total number of requests (HTTP listener) or connections (TCP listener) that are pending routing to a healthy instance. The maximum size of the queue is 1,024. Additional requests or connections are rejected when the queue is full"), + OutputName: ("clb_surge_queue_length"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "UnHealthyHostCount": { - Help: aws.String("The number of unhealthy instances registered with your load balancer"), - OutputName: aws.String("clb_unhealthy_host_count"), - Statistic: h.StringPointers("Average"), + Help: ("The number of unhealthy instances registered with your load balancer"), + OutputName: ("clb_unhealthy_host_count"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, diff --git a/elbv2/controller.go b/elbv2/controller.go index 96f6c77..b03f967 100644 --- a/elbv2/controller.go +++ b/elbv2/controller.go @@ -1,6 +1,8 @@ package elbv2 import ( + "errors" + b "github.com/CoverGenius/cloudwatch-prometheus-exporter/base" h "github.com/CoverGenius/cloudwatch-prometheus-exporter/helpers" "github.com/aws/aws-sdk-go/aws" @@ -11,18 +13,19 @@ import ( "sync" ) -func CreateResourceDescription(nd *b.NamespaceDescription, td *elbv2.TagDescription) error { +func createResourceDescription(nd *b.NamespaceDescription, td *elbv2.TagDescription) (*b.ResourceDescription, error) { lbID := strings.Split(*td.ResourceArn, "loadbalancer/")[1] lbTypeAndName := strings.Split(lbID, "/") lbName := lbTypeAndName[1] rd := b.ResourceDescription{} - if lbTypeAndName[0] == "net" && *nd.Namespace == "AWS/NetworkELB" { + switch { + case lbTypeAndName[0] == "net" && *nd.Namespace == "AWS/NetworkELB": rd.Type = aws.String("lb-network") - } else if lbTypeAndName[0] == "app" && *nd.Namespace == "AWS/ApplicationELB" { + case lbTypeAndName[0] == "app" && *nd.Namespace == "AWS/ApplicationELB": rd.Type = aws.String("lb-application") - } else { - return nil + default: + return nil, errors.New("invalid lb type") } dd := []*b.DimensionDescription{ @@ -32,24 +35,22 @@ func CreateResourceDescription(nd *b.NamespaceDescription, td *elbv2.TagDescript }, } err := rd.BuildDimensions(dd) - h.LogError(err) + h.LogIfError(err) rd.ID = td.ResourceArn rd.Name = &lbName rd.Parent = nd - nd.Resources = append(nd.Resources, &rd) - return nil + return &rd, nil } // CreateResourceList fetches a list of all ALB/NLB resources in the region -func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) error { +func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) { defer wg.Done() log.Debug("Creating ALB/NLB resource list ...") - nd.Resources = []*b.ResourceDescription{} session := elbv2.New(nd.Parent.Session) input := elbv2.DescribeLoadBalancersInput{} result, err := session.DescribeLoadBalancers(&input) - h.LogError(err) + h.LogIfError(err) resourceList := []*string{} for _, lb := range result.LoadBalancers { @@ -69,17 +70,23 @@ func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) error { ResourceArns: resourceList[i:end], } tags, err := session.DescribeTags(&dti) - h.LogError(err) + h.LogIfError(err) tagDescriptions = append(tagDescriptions, tags.TagDescriptions...) } + resources := []*b.ResourceDescription{} for _, td := range tagDescriptions { if nd.Parent.TagsFound(td) { - err := CreateResourceDescription(nd, td) - h.LogError(err) + if r, err := createResourceDescription(nd, td); err == nil { + resources = append(resources, r) + } + h.LogIfError(err) } else { continue } } - return nil + + nd.Mutex.Lock() + nd.Resources = resources + nd.Mutex.Unlock() } diff --git a/elbv2/metrics.go b/elbv2/metrics.go index 3aa14d4..db23a82 100644 --- a/elbv2/metrics.go +++ b/elbv2/metrics.go @@ -3,223 +3,222 @@ package elbv2 import ( b "github.com/CoverGenius/cloudwatch-prometheus-exporter/base" h "github.com/CoverGenius/cloudwatch-prometheus-exporter/helpers" - "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/cloudwatch" ) // ALBMetrics is a map of default MetricDescriptions for this namespace -var ALBMetrics = map[string]*b.MetricDescription{ +var ALBMetrics = map[string]*b.ConfigMetric{ "ActiveConnectionCount": { - Help: aws.String("The total number of concurrent TCP connections active from clients to the load balancer and from the load balancer to targets"), - OutputName: aws.String("alb_alive_connection_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The total number of concurrent TCP connections active from clients to the load balancer and from the load balancer to targets"), + OutputName: ("alb_alive_connection_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "ClientTLSNegotiationErrorCount": { - Help: aws.String("The number of TLS connections initiated by the client that did not establish a session with the load balancer. Possible causes include a mismatch of ciphers or protocols"), - OutputName: aws.String("alb_client_tls_negotiation_error_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The number of TLS connections initiated by the client that did not establish a session with the load balancer. Possible causes include a mismatch of ciphers or protocols"), + OutputName: ("alb_client_tls_negotiation_error_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "ConsumedLCUs": { - Help: aws.String("The number of load balancer capacity units (LCU) used by your load balancer"), - OutputName: aws.String("alb_consumed_lcus"), - Statistic: h.StringPointers("Average"), + Help: ("The number of load balancer capacity units (LCU) used by your load balancer"), + OutputName: ("alb_consumed_lcus"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "HealthyHostCount": { - Help: aws.String("The number of targets that are considered healthy"), - OutputName: aws.String("alb_healthy_host_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The number of targets that are considered healthy"), + OutputName: ("alb_healthy_host_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_ELB_4XX_Count": { - Help: aws.String("The number of HTTP 4XX client error codes that originate from the load balancer. Client errors are generated when requests are malformed or incomplete"), - OutputName: aws.String("alb_httpcode_elb_4xx_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The number of HTTP 4XX client error codes that originate from the load balancer. Client errors are generated when requests are malformed or incomplete"), + OutputName: ("alb_httpcode_elb_4xx_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_ELB_502_Count": { - Help: aws.String("The number of HTTP 502 error codes that originate from the load balancer"), - OutputName: aws.String("alb_httpcode_elb_502_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The number of HTTP 502 error codes that originate from the load balancer"), + OutputName: ("alb_httpcode_elb_502_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_ELB_503_Count": { - Help: aws.String("The number of HTTP 503 error codes that originate from the load balancer"), - OutputName: aws.String("alb_httpcode_elb_503_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The number of HTTP 503 error codes that originate from the load balancer"), + OutputName: ("alb_httpcode_elb_503_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_ELB_504_Count": { - Help: aws.String("The number of HTTP 504 error codes that originate from the load balancer"), - OutputName: aws.String("alb_httpcode_elb_504_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The number of HTTP 504 error codes that originate from the load balancer"), + OutputName: ("alb_httpcode_elb_504_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_ELB_5XX_Count": { - Help: aws.String("The number of HTTP 5XX server error codes that originate from the load balancer. This count does not include any response codes generated by the targets"), - OutputName: aws.String("alb_httpcode_elb_5xx_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The number of HTTP 5XX server error codes that originate from the load balancer. This count does not include any response codes generated by the targets"), + OutputName: ("alb_httpcode_elb_5xx_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_Target_2XX_Count": { - Help: aws.String("The number of HTTP response codes generated by the targets"), - OutputName: aws.String("alb_httpcode_target_2xx_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The number of HTTP response codes generated by the targets"), + OutputName: ("alb_httpcode_target_2xx_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_Target_3XX_Count": { - Help: aws.String("The number of HTTP response codes generated by the targets"), - OutputName: aws.String("alb_httpcode_target_3xx_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The number of HTTP response codes generated by the targets"), + OutputName: ("alb_httpcode_target_3xx_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_Target_4XX_Count": { - Help: aws.String("The number of HTTP response codes generated by the targets"), - OutputName: aws.String("alb_httpcode_target_4xx_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The number of HTTP response codes generated by the targets"), + OutputName: ("alb_httpcode_target_4xx_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "HTTPCode_Target_5XX_Count": { - Help: aws.String("The number of HTTP response codes generated by the targets"), - OutputName: aws.String("alb_httpcode_target_5xx_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The number of HTTP response codes generated by the targets"), + OutputName: ("alb_httpcode_target_5xx_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "NewConnectionCount": { - Help: aws.String("The total number of new TCP connections established from clients to the load balancer and from the load balancer to targets"), - OutputName: aws.String("alb_new_connection_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The total number of new TCP connections established from clients to the load balancer and from the load balancer to targets"), + OutputName: ("alb_new_connection_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "ProcessedBytes": { - Help: aws.String("The total number of bytes processed by the load balancer over IPv4 and IPv6. This count includes traffic to and from clients and Lambda functions, and traffic from an Identity Provider (IdP) if user authentication is enabled"), - OutputName: aws.String("alb_processed_bytes"), - Statistic: h.StringPointers("Average"), + Help: ("The total number of bytes processed by the load balancer over IPv4 and IPv6. This count includes traffic to and from clients and Lambda functions, and traffic from an Identity Provider (IdP) if user authentication is enabled"), + OutputName: ("alb_processed_bytes"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "RequestCount": { - Help: aws.String("The number of requests processed over IPv4 and IPv6. This count includes only the requests with a response generated by a target of the load balancer"), - OutputName: aws.String("alb_request_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The number of requests processed over IPv4 and IPv6. This count includes only the requests with a response generated by a target of the load balancer"), + OutputName: ("alb_request_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "RequestCountPerTarget": { - Help: aws.String("The average number of requests received by each target in a target group. You must specify the target group using the TargetGroup dimension. This metric does not apply if the target is a Lambda function"), - OutputName: aws.String("alb_request_count_per_target"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The average number of requests received by each target in a target group. You must specify the target group using the TargetGroup dimension. This metric does not apply if the target is a Lambda function"), + OutputName: ("alb_request_count_per_target"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "RuleEvaluations": { - Help: aws.String("The number of rules processed by the load balancer given a request rate averaged over an hour"), - OutputName: aws.String("alb_rule_evaluations"), - Statistic: h.StringPointers("Average"), + Help: ("The number of rules processed by the load balancer given a request rate averaged over an hour"), + OutputName: ("alb_rule_evaluations"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "TargetConnectionErrorCount": { - Help: aws.String("The number of connections that were not successfully established between the load balancer and target. This metric does not apply if the target is a Lambda function"), - OutputName: aws.String("alb_target_connection_error_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The number of connections that were not successfully established between the load balancer and target. This metric does not apply if the target is a Lambda function"), + OutputName: ("alb_target_connection_error_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "TargetResponseTime": { - Help: aws.String("The time elapsed, in seconds, after the request leaves the load balancer until a response from the target is received. This is equivalent to the target_processing_time field in the access logs"), - OutputName: aws.String("alb_target_response_time"), - Statistic: h.StringPointers("Average", "Maximum"), + Help: ("The time elapsed, in seconds, after the request leaves the load balancer until a response from the target is received. This is equivalent to the target_processing_time field in the access logs"), + OutputName: ("alb_target_response_time"), + Statistics: h.StringPointers("Average", "Maximum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "UnHealthyHostCount": { - Help: aws.String("The number of targets that are considered unhealthy"), - OutputName: aws.String("alb_unhealthy_host_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The number of targets that are considered unhealthy"), + OutputName: ("alb_unhealthy_host_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, } // NLBMetrics is a map of default MetricDescriptions for this namespace -var NLBMetrics = map[string]*b.MetricDescription{ +var NLBMetrics = map[string]*b.ConfigMetric{ "ActiveFlowCount": { - Help: aws.String("The total number of concurrent flows (or connections) from clients to targets. This metric includes connections in the SYN_SENT and ESTABLISHED states. TCP connections are not terminated at the load balancer, so a client opening a TCP connection to a target counts as a single flow"), - OutputName: aws.String("nlb_active_flow_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The total number of concurrent flows (or connections) from clients to targets. This metric includes connections in the SYN_SENT and ESTABLISHED states. TCP connections are not terminated at the load balancer, so a client opening a TCP connection to a target counts as a single flow"), + OutputName: ("nlb_active_flow_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "ConsumedLCUs": { - Help: aws.String("The number of load balancer capacity units (LCU) used by your load balancer"), - OutputName: aws.String("nlb_consumed_lcus"), - Statistic: h.StringPointers("Average"), + Help: ("The number of load balancer capacity units (LCU) used by your load balancer"), + OutputName: ("nlb_consumed_lcus"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "HealthyHostCount": { - Help: aws.String("The number of targets that are considered healthy"), - OutputName: aws.String("nlb_healthy_host_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The number of targets that are considered healthy"), + OutputName: ("nlb_healthy_host_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "NewFlowCount": { - Help: aws.String("The total number of new flows (or connections) established from clients to targets in the time period"), - OutputName: aws.String("nlb_new_flow_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The total number of new flows (or connections) established from clients to targets in the time period"), + OutputName: ("nlb_new_flow_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "ProcessedBytes": { - Help: aws.String("The total number of bytes processed by the load balancer, including TCP/IP headers. This count includes traffic to and from targets, minus health check traffic"), - OutputName: aws.String("nlb_processed_bytes"), - Statistic: h.StringPointers("Average"), + Help: ("The total number of bytes processed by the load balancer, including TCP/IP headers. This count includes traffic to and from targets, minus health check traffic"), + OutputName: ("nlb_processed_bytes"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "TCP_Client_Reset_Count": { - Help: aws.String("The total number of reset (RST) packets sent from a client to a target. These resets are generated by the client and forwarded by the load balancer"), - OutputName: aws.String("nlb_tcp_client_reset_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The total number of reset (RST) packets sent from a client to a target. These resets are generated by the client and forwarded by the load balancer"), + OutputName: ("nlb_tcp_client_reset_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "TCP_ELB_Reset_Count": { - Help: aws.String("The total number of reset (RST) packets generated by the load balancer"), - OutputName: aws.String("nlb_tcp_elb_reset_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The total number of reset (RST) packets generated by the load balancer"), + OutputName: ("nlb_tcp_elb_reset_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "TCP_Target_Reset_Count": { - Help: aws.String("The total number of reset (RST) packets sent from a target to a client. These resets are generated by the target and forwarded by the load balancer"), - OutputName: aws.String("nlb_tcp_target_reset_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The total number of reset (RST) packets sent from a target to a client. These resets are generated by the target and forwarded by the load balancer"), + OutputName: ("nlb_tcp_target_reset_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "UnHealthyHostCount": { - Help: aws.String("The number of targets that are considered unhealthy"), - OutputName: aws.String("nlb_unhealthy_host_count"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The number of targets that are considered unhealthy"), + OutputName: ("nlb_unhealthy_host_count"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, diff --git a/helpers/file.go b/helpers/file.go index df5d549..bce98ee 100644 --- a/helpers/file.go +++ b/helpers/file.go @@ -11,17 +11,14 @@ import ( // If an error is encountered while reading the file it is logged NOT returned func ReadFile(path *string) *[]byte { absolutePath, err := filepath.Abs(*path) - LogError(err) + LogIfError(err) content, err := ioutil.ReadFile(absolutePath) - LogError(err) + LogIfError(err) return &content } // IsFileExists returns true if a file located a path exists func IsFileExists(path *string) bool { _, err := os.Stat(*path) - if err == nil { - return true - } - return false + return err == nil } diff --git a/helpers/log.go b/helpers/log.go index b9eef21..077ce09 100644 --- a/helpers/log.go +++ b/helpers/log.go @@ -27,15 +27,15 @@ func GetLogLevel(level uint8) log.Level { } } -// LogError logs err if it is non nil -func LogError(err error) { +// LogIfError logs err if it is non nil +func LogIfError(err error) { if err != nil { log.Error(err) } } -// LogErrorExit logs err and exits if the input error is non nil -func LogErrorExit(err error) { +// LogIfErrorExit logs err and exits if the input error is non nil +func LogIfErrorExit(err error) { if err != nil { log.Fatal(err) } diff --git a/helpers/math.go b/helpers/math.go index 4c5dd4b..0c99897 100644 --- a/helpers/math.go +++ b/helpers/math.go @@ -31,7 +31,7 @@ func Sum(items []*float64) (float64, error) { // returns an error if the input slice is empty func Min(items []*float64) (float64, error) { if len(items) < 1 { - return 0.0, errors.New("Cannot calculate minimum of empty list") + return 0.0, errors.New("cannot calculate minimum of empty list") } var min = *items[0] diff --git a/helpers/string.go b/helpers/string.go index c1be50b..fa0d969 100644 --- a/helpers/string.go +++ b/helpers/string.go @@ -10,10 +10,10 @@ import ( // StringPointers converts a slice of string values into a slice of string pointers // // This function complements aws.StringSlice but works with variadic arguments so that an array literal is not required. -func StringPointers(strings ...string) []*string { - sp := make([]*string, len(strings)) +func StringPointers(s ...string) []*string { + sp := make([]*string, len(s)) for i := range sp { - sp[i] = &strings[i] + sp[i] = &s[i] } return sp } diff --git a/helpers/yaml.go b/helpers/yaml.go index 92f623e..7e2d796 100644 --- a/helpers/yaml.go +++ b/helpers/yaml.go @@ -11,7 +11,7 @@ func YAMLDecode(path *string, i interface{}) { if IsFileExists(path) { content := ReadFile(path) err := yaml.Unmarshal(*content, i) - LogErrorExit(err) + LogIfErrorExit(err) } else { log.Fatalf("File: %s does not exists!\n", *path) } diff --git a/main.go b/main.go index 6a9fba4..d7a0154 100644 --- a/main.go +++ b/main.go @@ -33,7 +33,7 @@ func init() { flag.StringVar(&config, "config", "config.yaml", "Path to config file") } -func run(nd map[string]*base.NamespaceDescription, cw *cloudwatch.CloudWatch, rd *base.RegionDescription, pi int64, cfg map[string][]*base.MetricDescription) { +func run(nd map[string]*base.NamespaceDescription, cw *cloudwatch.CloudWatch, rd *base.RegionDescription, pi int64) { var delay int64 = 0 for { select { @@ -94,10 +94,10 @@ func processConfig(p *string) *base.Config { } func main() { - // TODO allow hot reload of config flag.Parse() + // TODO allow hot reload of config c := processConfig(&config) - defaults := map[string]map[string]*base.MetricDescription{ + defaults := map[string]map[string]*base.ConfigMetric{ "AWS/RDS": rds.Metrics, "AWS/ElastiCache": elasticache.Metrics, "AWS/EC2": ec2.Metrics, @@ -110,13 +110,15 @@ func main() { mds := c.ConstructMetrics(defaults) for _, r := range c.Regions { - session := session.Must(session.NewSession(&aws.Config{Region: r})) - cw := cloudwatch.New(session) + awsSession := session.Must(session.NewSession(&aws.Config{Region: r})) + cw := cloudwatch.New(awsSession) rd := base.RegionDescription{Region: r} rdd = append(rdd, &rd) - rd.Init(session, c.Tags, mds) + if err := rd.Init(awsSession, c.Tags, mds); err != nil { + log.Fatalf("error initializing region: %s", err) + } - go run(rd.Namespaces, cw, &rd, c.PollInterval, mds) + go run(rd.Namespaces, cw, &rd, c.PollInterval) } http.Handle("/metrics", promhttp.Handler()) diff --git a/network/controller.go b/network/controller.go index 48cf256..ce20fd2 100644 --- a/network/controller.go +++ b/network/controller.go @@ -10,7 +10,7 @@ import ( log "github.com/sirupsen/logrus" ) -func CreateResourceDescription(nd *b.NamespaceDescription, ng *ec2.NatGateway) error { +func createResourceDescription(nd *b.NamespaceDescription, ng *ec2.NatGateway) (*b.ResourceDescription, error) { rd := b.ResourceDescription{} dd := []*b.DimensionDescription{ { @@ -18,33 +18,37 @@ func CreateResourceDescription(nd *b.NamespaceDescription, ng *ec2.NatGateway) e Value: ng.NatGatewayId, }, } - err := rd.BuildDimensions(dd) - h.LogError(err) + if err := rd.BuildDimensions(dd); err != nil { + return nil, err + } rd.ID = ng.NatGatewayId rd.Name = ng.NatGatewayId rd.Type = aws.String("nat-gateway") rd.Parent = nd - nd.Resources = append(nd.Resources, &rd) - return nil + return &rd, nil } // CreateResourceList fetches a list of all NAT gateways in the region -func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) error { +func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) { defer wg.Done() log.Debug("Creating NatGateway resource list ...") - nd.Resources = []*b.ResourceDescription{} session := ec2.New(nd.Parent.Session) input := ec2.DescribeNatGatewaysInput{ Filter: nd.Parent.Filters, } result, err := session.DescribeNatGateways(&input) - h.LogError(err) + h.LogIfError(err) + resources := []*b.ResourceDescription{} for _, ng := range result.NatGateways { - err := CreateResourceDescription(nd, ng) - h.LogError(err) + if r, err := createResourceDescription(nd, ng); err == nil { + resources = append(resources, r) + } + h.LogIfError(err) } - return nil + nd.Mutex.Lock() + nd.Resources = resources + nd.Mutex.Unlock() } diff --git a/network/metrics.go b/network/metrics.go index f354ed6..0cfe315 100644 --- a/network/metrics.go +++ b/network/metrics.go @@ -3,107 +3,106 @@ package network import ( b "github.com/CoverGenius/cloudwatch-prometheus-exporter/base" h "github.com/CoverGenius/cloudwatch-prometheus-exporter/helpers" - "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/cloudwatch" ) // Metrics is a map of default MetricDescriptions for this namespace -var Metrics = map[string]*b.MetricDescription{ +var Metrics = map[string]*b.ConfigMetric{ "ActiveConnectionCount": { - Help: aws.String("The total number of concurrent active TCP connections through the NAT gateway"), - OutputName: aws.String("nat_gateway_active_connection_count"), - Statistic: h.StringPointers("Average"), + Help: ("The total number of concurrent active TCP connections through the NAT gateway"), + OutputName: ("nat_gateway_active_connection_count"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "BytesInFromDestination": { - Help: aws.String("The number of bytes received by the NAT gateway from the destination"), - OutputName: aws.String("nat_gateway_bytes_in_from_destination"), - Statistic: h.StringPointers("Average"), + Help: ("The number of bytes received by the NAT gateway from the destination"), + OutputName: ("nat_gateway_bytes_in_from_destination"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "BytesInFromSource": { - Help: aws.String("The number of bytes received by the NAT gateway from clients in your VPC"), - OutputName: aws.String("nat_gateway_bytes_in_from_source"), - Statistic: h.StringPointers("Average"), + Help: ("The number of bytes received by the NAT gateway from clients in your VPC"), + OutputName: ("nat_gateway_bytes_in_from_source"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "BytesOutToDestination": { - Help: aws.String("The number of bytes sent out through the NAT gateway to the destination"), - OutputName: aws.String("nat_gateway_bytes_out_to_destination"), - Statistic: h.StringPointers("Average"), + Help: ("The number of bytes sent out through the NAT gateway to the destination"), + OutputName: ("nat_gateway_bytes_out_to_destination"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "BytesOutToSource": { - Help: aws.String("The number of bytes sent through the NAT gateway to the clients in your VPC"), - OutputName: aws.String("nat_gateway_bytes_out_to_source"), - Statistic: h.StringPointers("Average"), + Help: ("The number of bytes sent through the NAT gateway to the clients in your VPC"), + OutputName: ("nat_gateway_bytes_out_to_source"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "ConnectionAttemptCount": { - Help: aws.String("The number of connection attempts made through the NAT gateway"), - OutputName: aws.String("nat_gateway_connection_attempt_count"), - Statistic: h.StringPointers("Average"), + Help: ("The number of connection attempts made through the NAT gateway"), + OutputName: ("nat_gateway_connection_attempt_count"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "ConnectionEstablishedCount": { - Help: aws.String("The number of connections established through the NAT gateway"), - OutputName: aws.String("nat_gateway_connection_established_count"), - Statistic: h.StringPointers("Average"), + Help: ("The number of connections established through the NAT gateway"), + OutputName: ("nat_gateway_connection_established_count"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "ErrorPortAllocation": { - Help: aws.String("The number of times the NAT gateway could not allocate a source port"), - OutputName: aws.String("nat_gateway_error_port_allocation"), - Statistic: h.StringPointers("Average"), + Help: ("The number of times the NAT gateway could not allocate a source port"), + OutputName: ("nat_gateway_error_port_allocation"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "IdleTimeoutCount": { - Help: aws.String("The number of connections that transitioned from the active state to the idle state. An active connection transitions to idle if it was not closed gracefully and there was no activity for the last 350 seconds"), - OutputName: aws.String("nat_gateway_idle_timeout_count"), - Statistic: h.StringPointers("Average"), + Help: ("The number of connections that transitioned from the active state to the idle state. An active connection transitions to idle if it was not closed gracefully and there was no activity for the last 350 seconds"), + OutputName: ("nat_gateway_idle_timeout_count"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "PacketsDropCount": { - Help: aws.String("The number of packets dropped by the NAT gateway"), - OutputName: aws.String("nat_gateway_packets_drop_count"), - Statistic: h.StringPointers("Average"), + Help: ("The number of packets dropped by the NAT gateway"), + OutputName: ("nat_gateway_packets_drop_count"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "PacketsInFromDestination": { - Help: aws.String("The number of packets received by the NAT gateway from the destination"), - OutputName: aws.String("nat_gateway_packets_in_from_destination"), - Statistic: h.StringPointers("Average"), + Help: ("The number of packets received by the NAT gateway from the destination"), + OutputName: ("nat_gateway_packets_in_from_destination"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "PacketsInFromSource": { - Help: aws.String("The number of packets received by the NAT gateway from clients in your VPC"), - OutputName: aws.String("nat_gateway_packets_in_from_source"), - Statistic: h.StringPointers("Average"), + Help: ("The number of packets received by the NAT gateway from clients in your VPC"), + OutputName: ("nat_gateway_packets_in_from_source"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "PacketsOutToDestination": { - Help: aws.String("The number of packets sent out through the NAT gateway to the destination"), - OutputName: aws.String("nat_gateway_packets_out_to_destination"), - Statistic: h.StringPointers("Average"), + Help: ("The number of packets sent out through the NAT gateway to the destination"), + OutputName: ("nat_gateway_packets_out_to_destination"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "PacketsOutToSource": { - Help: aws.String("The number of packets sent through the NAT gateway to the clients in your VPC"), - OutputName: aws.String("nat_gateway_packets_out_to_source"), - Statistic: h.StringPointers("Average"), + Help: ("The number of packets sent through the NAT gateway to the clients in your VPC"), + OutputName: ("nat_gateway_packets_out_to_source"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, diff --git a/rds/controller.go b/rds/controller.go index 6d4319a..4d9f079 100644 --- a/rds/controller.go +++ b/rds/controller.go @@ -11,7 +11,7 @@ import ( "github.com/aws/aws-sdk-go/service/rds" ) -func CreateResourceDescription(nd *b.NamespaceDescription, dbi *rds.DBInstance) error { +func createResourceDescription(nd *b.NamespaceDescription, dbi *rds.DBInstance) (*b.ResourceDescription, error) { rd := b.ResourceDescription{} dd := []*b.DimensionDescription{ { @@ -19,31 +19,30 @@ func CreateResourceDescription(nd *b.NamespaceDescription, dbi *rds.DBInstance) Value: dbi.DBInstanceIdentifier, }, } - err := rd.BuildDimensions(dd) - h.LogError(err) + if err := rd.BuildDimensions(dd); err != nil { + return nil, err + } + rd.ID = dbi.DBInstanceIdentifier rd.Name = dbi.DBInstanceIdentifier rd.Type = aws.String("rds") rd.Parent = nd - nd.Mutex.Lock() - nd.Resources = append(nd.Resources, &rd) - nd.Mutex.Unlock() - return nil + return &rd, nil } // CreateResourceList fetches a list of all RDS databases in the region -func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) error { +func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) { defer wg.Done() log.Debug("Creating RDS resource list ...") - nd.Resources = []*b.ResourceDescription{} session := rds.New(nd.Parent.Session) input := rds.DescribeDBInstancesInput{} result, err := session.DescribeDBInstances(&input) - h.LogError(err) + h.LogIfError(err) var w sync.WaitGroup w.Add(len(result.DBInstances)) + ch := make(chan *b.ResourceDescription, len(result.DBInstances)) for _, dbi := range result.DBInstances { go func(dbi *rds.DBInstance, wg *sync.WaitGroup) { defer wg.Done() @@ -51,14 +50,24 @@ func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) error { ResourceName: dbi.DBInstanceArn, } tags, err := session.ListTagsForResource(&input) - h.LogError(err) + h.LogIfError(err) if nd.Parent.TagsFound(tags) { - err := CreateResourceDescription(nd, dbi) - h.LogError(err) + if r, err := createResourceDescription(nd, dbi); err == nil { + ch <- r + } + h.LogIfError(err) } }(dbi, &w) } w.Wait() - return nil + close(ch) + + resources := []*b.ResourceDescription{} + for r := range ch { + resources = append(resources, r) + } + nd.Mutex.Lock() + nd.Resources = resources + nd.Mutex.Unlock() } diff --git a/rds/metrics.go b/rds/metrics.go index acb3f6c..170d8c4 100644 --- a/rds/metrics.go +++ b/rds/metrics.go @@ -3,212 +3,211 @@ package rds import ( b "github.com/CoverGenius/cloudwatch-prometheus-exporter/base" h "github.com/CoverGenius/cloudwatch-prometheus-exporter/helpers" - "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/cloudwatch" ) // Metrics is a map of default MetricDescriptions for this namespace -var Metrics = map[string]*b.MetricDescription{ +var Metrics = map[string]*b.ConfigMetric{ "BinLogDiskUsage": { - Help: aws.String("The amount of disk space occupied by binary logs on the master. Applies to MySQL read replicas"), - OutputName: aws.String("rds_bin_log_disk_usage"), - Statistic: h.StringPointers("Average", "Maximum"), + Help: ("The amount of disk space occupied by binary logs on the master. Applies to MySQL read replicas"), + OutputName: ("rds_bin_log_disk_usage"), + Statistics: h.StringPointers("Average", "Maximum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "BurstBalance": { - Help: aws.String("The percent of General Purpose SSD (gp2) burst-bucket I/O credits available"), - OutputName: aws.String("rds_burst_balance"), - Statistic: h.StringPointers("Average", "Minimum"), + Help: ("The percent of General Purpose SSD (gp2) burst-bucket I/O credits available"), + OutputName: ("rds_burst_balance"), + Statistics: h.StringPointers("Average", "Minimum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "CPUCreditBalance": { - Help: aws.String("The number of earned CPU credits that an instance has accrued. This represents the number of credits currently available."), - OutputName: aws.String("rds_cpu_credit_balance"), - Statistic: h.StringPointers("Average", "Minimum"), + Help: ("The number of earned CPU credits that an instance has accrued. This represents the number of credits currently available."), + OutputName: ("rds_cpu_credit_balance"), + Statistics: h.StringPointers("Average", "Minimum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "CPUCreditUsage": { - Help: aws.String("The number of CPU credits spent by the instance for CPU utilization. One CPU credit equals one vCPU running at 100 percent utilization for one minute or an equivalent combination of vCPUs, utilization, and time"), - OutputName: aws.String("rds_cpu_credit_usage"), - Statistic: h.StringPointers("Average", "Sum"), + Help: ("The number of CPU credits spent by the instance for CPU utilization. One CPU credit equals one vCPU running at 100 percent utilization for one minute or an equivalent combination of vCPUs, utilization, and time"), + OutputName: ("rds_cpu_credit_usage"), + Statistics: h.StringPointers("Average", "Sum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "CPUSurplusCreditBalance": { - Help: aws.String("The number of surplus credits that have been spent by an unlimited instance when its CPUCreditBalance value is zero"), - OutputName: aws.String("rds_cpu_surplus_credit_balance"), - Statistic: h.StringPointers("Average"), + Help: ("The number of surplus credits that have been spent by an unlimited instance when its CPUCreditBalance value is zero"), + OutputName: ("rds_cpu_surplus_credit_balance"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "CPUSurplusCreditsCharged": { - Help: aws.String("The number of spent surplus credits that are not paid down by earned CPU credits, and which thus incur an additional charge"), - OutputName: aws.String("rds_cpu_surplus_credits_charged"), - Statistic: h.StringPointers("Average"), + Help: ("The number of spent surplus credits that are not paid down by earned CPU credits, and which thus incur an additional charge"), + OutputName: ("rds_cpu_surplus_credits_charged"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "CPUUtilization": { - Help: aws.String("The percentage of CPU utilization"), - OutputName: aws.String("rds_cpu_utilization"), - Statistic: h.StringPointers("Average", "Maximum"), + Help: ("The percentage of CPU utilization"), + OutputName: ("rds_cpu_utilization"), + Statistics: h.StringPointers("Average", "Maximum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "DatabaseConnections": { - Help: aws.String("The number of database connections in use"), - OutputName: aws.String("rds_database_connections"), - Statistic: h.StringPointers("Average", "Maximum"), + Help: ("The number of database connections in use"), + OutputName: ("rds_database_connections"), + Statistics: h.StringPointers("Average", "Maximum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "DBLoad": { - Help: aws.String("The number of active sessions for the DB engine. Typically, you want the data for the average number of active sessions"), - OutputName: aws.String("rds_db_load"), - Statistic: h.StringPointers("Average"), + Help: ("The number of active sessions for the DB engine. Typically, you want the data for the average number of active sessions"), + OutputName: ("rds_db_load"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "DBLoadCPU": { - Help: aws.String("The number of active sessions where the wait event type is CPU"), - OutputName: aws.String("rds_db_load_cpu"), - Statistic: h.StringPointers("Average"), + Help: ("The number of active sessions where the wait event type is CPU"), + OutputName: ("rds_db_load_cpu"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "DBLoadNonCPU": { - Help: aws.String("The number of active sessions where the wait event type is not CPU"), - OutputName: aws.String("rds_db_load_non_cpu"), - Statistic: h.StringPointers("Average"), + Help: ("The number of active sessions where the wait event type is not CPU"), + OutputName: ("rds_db_load_non_cpu"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "DiskQueueDepth": { - Help: aws.String("The number of outstanding IOs (read/write requests) waiting to access the disk"), - OutputName: aws.String("rds_disk_queue_depth"), - Statistic: h.StringPointers("Average", "Maximum"), + Help: ("The number of outstanding IOs (read/write requests) waiting to access the disk"), + OutputName: ("rds_disk_queue_depth"), + Statistics: h.StringPointers("Average", "Maximum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "FreeableMemory": { - Help: aws.String("The amount of available random access memory"), - OutputName: aws.String("rds_freeable_memory"), - Statistic: h.StringPointers("Average"), + Help: ("The amount of available random access memory"), + OutputName: ("rds_freeable_memory"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "FreeStorageSpace": { - Help: aws.String("The amount of available storage space"), - OutputName: aws.String("rds_free_storage_space"), - Statistic: h.StringPointers("Average"), + Help: ("The amount of available storage space"), + OutputName: ("rds_free_storage_space"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "MaximumUsedTransactionIDs": { - Help: aws.String("The maximum transaction ID that has been used. Applies to PostgreSQL"), - OutputName: aws.String("rds_maximum_used_transaction_ids"), - Statistic: h.StringPointers("Average", "Maximum"), + Help: ("The maximum transaction ID that has been used. Applies to PostgreSQL"), + OutputName: ("rds_maximum_used_transaction_ids"), + Statistics: h.StringPointers("Average", "Maximum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "NetworkReceiveThroughput": { - Help: aws.String("The incoming (Receive) network traffic on the DB instance, including both customer database traffic and Amazon RDS traffic used for monitoring and replication"), - OutputName: aws.String("rds_network_receive_throughput"), - Statistic: h.StringPointers("Average"), + Help: ("The incoming (Receive) network traffic on the DB instance, including both customer database traffic and Amazon RDS traffic used for monitoring and replication"), + OutputName: ("rds_network_receive_throughput"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "NetworkTransmitThroughput": { - Help: aws.String("The outgoing (Transmit) network traffic on the DB instance, including both customer database traffic and Amazon RDS traffic used for monitoring and replication"), - OutputName: aws.String("rds_network_transmit_throughput"), - Statistic: h.StringPointers("Average"), + Help: ("The outgoing (Transmit) network traffic on the DB instance, including both customer database traffic and Amazon RDS traffic used for monitoring and replication"), + OutputName: ("rds_network_transmit_throughput"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "OldestReplicationSlotLag": { - Help: aws.String("The lagging size of the replica lagging the most in terms of WAL data received. Applies to PostgreSQL"), - OutputName: aws.String("rds_oldest_replication_slot_lag"), - Statistic: h.StringPointers("Average"), + Help: ("The lagging size of the replica lagging the most in terms of WAL data received. Applies to PostgreSQL"), + OutputName: ("rds_oldest_replication_slot_lag"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "ReadIOPS": { - Help: aws.String("The average number of disk read I/O operations per second"), - OutputName: aws.String("rds_read_iops"), - Statistic: h.StringPointers("Average"), + Help: ("The average number of disk read I/O operations per second"), + OutputName: ("rds_read_iops"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "ReadLatency": { - Help: aws.String("The amount of time taken per disk I/O operation"), - OutputName: aws.String("rds_read_latency"), - Statistic: h.StringPointers("Average", "Maximum"), + Help: ("The amount of time taken per disk I/O operation"), + OutputName: ("rds_read_latency"), + Statistics: h.StringPointers("Average", "Maximum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "ReadThroughput": { - Help: aws.String("The number of bytes read from disk per second"), - OutputName: aws.String("rds_read_throughput"), - Statistic: h.StringPointers("Average", "Maximum"), + Help: ("The number of bytes read from disk per second"), + OutputName: ("rds_read_throughput"), + Statistics: h.StringPointers("Average", "Maximum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "ReplicaLag": { - Help: aws.String("The amount of time a Read Replica DB instance lags behind the source DB instance. Applies to MySQL, MariaDB, and PostgreSQL Read Replicas"), - OutputName: aws.String("rds_replica_lag"), - Statistic: h.StringPointers("Average", "Maximum"), + Help: ("The amount of time a Read Replica DB instance lags behind the source DB instance. Applies to MySQL, MariaDB, and PostgreSQL Read Replicas"), + OutputName: ("rds_replica_lag"), + Statistics: h.StringPointers("Average", "Maximum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "ReplicationSlotDiskUsage": { - Help: aws.String("The disk space used by replication slot files. Applies to PostgreSQL"), - OutputName: aws.String("rds_replication_slot_disk_usage"), - Statistic: h.StringPointers("Average"), + Help: ("The disk space used by replication slot files. Applies to PostgreSQL"), + OutputName: ("rds_replication_slot_disk_usage"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "SwapUsage": { - Help: aws.String("The amount of swap space used on the DB instance. This metric is not available for SQL Server"), - OutputName: aws.String("rds_swap_usage"), - Statistic: h.StringPointers("Average"), + Help: ("The amount of swap space used on the DB instance. This metric is not available for SQL Server"), + OutputName: ("rds_swap_usage"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "TransactionLogsDiskUsage": { - Help: aws.String("The disk space used by transaction logs. Applies to PostgreSQL"), - OutputName: aws.String("rds_transaction_logs_disk_usage"), - Statistic: h.StringPointers("Average"), + Help: ("The disk space used by transaction logs. Applies to PostgreSQL"), + OutputName: ("rds_transaction_logs_disk_usage"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "TransactionLogsGeneration": { - Help: aws.String("The size of transaction logs generated per second. Applies to PostgreSQL"), - OutputName: aws.String("rds_transaction_logs_generation"), - Statistic: h.StringPointers("Average"), + Help: ("The size of transaction logs generated per second. Applies to PostgreSQL"), + OutputName: ("rds_transaction_logs_generation"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "WriteIOPS": { - Help: aws.String("The average number of disk write I/O operations per second"), - OutputName: aws.String("rds_write_iops"), - Statistic: h.StringPointers("Average"), + Help: ("The average number of disk write I/O operations per second"), + OutputName: ("rds_write_iops"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "WriteLatency": { - Help: aws.String("The amount of time taken per disk I/O operation"), - OutputName: aws.String("rds_write_latency"), - Statistic: h.StringPointers("Average", "Maximum"), + Help: ("The amount of time taken per disk I/O operation"), + OutputName: ("rds_write_latency"), + Statistics: h.StringPointers("Average", "Maximum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, "WriteThroughput": { - Help: aws.String("The number of bytes written to disk per second"), - OutputName: aws.String("rds_write_throughput"), - Statistic: h.StringPointers("Average", "Maximum"), + Help: ("The number of bytes written to disk per second"), + OutputName: ("rds_write_throughput"), + Statistics: h.StringPointers("Average", "Maximum"), PeriodSeconds: 300, Dimensions: []*cloudwatch.Dimension{}, }, diff --git a/s3/controller.go b/s3/controller.go index ae1a589..17deb68 100644 --- a/s3/controller.go +++ b/s3/controller.go @@ -10,7 +10,7 @@ import ( log "github.com/sirupsen/logrus" ) -func CreateResourceDescription(nd *b.NamespaceDescription, bucket *s3.Bucket) error { +func createResourceDescription(nd *b.NamespaceDescription, bucket *s3.Bucket) (*b.ResourceDescription, error) { rd := b.ResourceDescription{} dd := []*b.DimensionDescription{ { @@ -23,36 +23,33 @@ func CreateResourceDescription(nd *b.NamespaceDescription, bucket *s3.Bucket) er }, } err := rd.BuildDimensions(dd) - h.LogError(err) + h.LogIfError(err) rd.ID = bucket.Name rd.Name = bucket.Name rd.Type = aws.String("s3") rd.Parent = nd - nd.Mutex.Lock() - nd.Resources = append(nd.Resources, &rd) - nd.Mutex.Unlock() - return nil + return &rd, err } // CreateResourceList fetches a list of all S3 buckets in the region // // TODO channel can be added instead of sync.WaitGroup -func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) error { +func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) { log.Debug("Creating S3 resource list ...") defer wg.Done() - nd.Resources = []*b.ResourceDescription{} session := s3.New(nd.Parent.Session) input := s3.ListBucketsInput{} result, err := session.ListBuckets(&input) - h.LogError(err) + h.LogIfError(err) tagError := "NoSuchTagSet" var w sync.WaitGroup w.Add(len(result.Buckets)) + ch := make(chan *b.ResourceDescription, len(result.Buckets)) for _, bucket := range result.Buckets { go func(bucket *s3.Bucket, wg *sync.WaitGroup) { defer wg.Done() @@ -60,7 +57,7 @@ func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) error { Bucket: bucket.Name, } location, err := session.GetBucketLocation(&input) - h.LogError(err) + h.LogIfError(err) if location.LocationConstraint == nil || *location.LocationConstraint != *nd.Parent.Region { return @@ -71,17 +68,26 @@ func CreateResourceList(nd *b.NamespaceDescription, wg *sync.WaitGroup) error { } tags, err := session.GetBucketTagging(&locationInput) - if b.IsSameErrorType(err, &tagError) == false { - h.LogError(err) + if !b.IsSameErrorType(err, &tagError) { + h.LogIfError(err) } if nd.Parent.TagsFound(tags) { - err := CreateResourceDescription(nd, bucket) - h.LogError(err) + if r, err := createResourceDescription(nd, bucket); err == nil { + ch <- r + } + h.LogIfError(err) } }(bucket, &w) } w.Wait() + close(ch) - return nil + resources := []*b.ResourceDescription{} + for r := range ch { + resources = append(resources, r) + } + nd.Mutex.Lock() + nd.Resources = resources + nd.Mutex.Unlock() } diff --git a/s3/metrics.go b/s3/metrics.go index eafba11..7841207 100644 --- a/s3/metrics.go +++ b/s3/metrics.go @@ -8,11 +8,11 @@ import ( ) // Metrics is a map of default MetricDescriptions for this namespace -var Metrics = map[string]*b.MetricDescription{ +var Metrics = map[string]*b.ConfigMetric{ "BucketSizeBytes": { - Help: aws.String("The amount of data in bytes stored in a bucket in the STANDARD storage class, INTELLIGENT_TIERING storage class, Standard - Infrequent Access (STANDARD_IA) storage class, OneZone - Infrequent Access (ONEZONE_IA), Reduced Redundancy Storage (RRS) class, Deep Archive Storage (DEEP_ARCHIVE) class or, Glacier (GLACIER) storage class"), - OutputName: aws.String("s3_bucket_size_bytes"), - Statistic: h.StringPointers("Average"), + Help: "The amount of data in bytes stored in a bucket in the STANDARD storage class, INTELLIGENT_TIERING storage class, Standard - Infrequent Access (STANDARD_IA) storage class, OneZone - Infrequent Access (ONEZONE_IA), Reduced Redundancy Storage (RRS) class, Deep Archive Storage (DEEP_ARCHIVE) class or, Glacier (GLACIER) storage class", + OutputName: "s3_bucket_size_bytes", + Statistics: h.StringPointers("Average"), PeriodSeconds: 60 * 60 * 24, RangeSeconds: 60 * 60 * 24 * 7, Dimensions: []*cloudwatch.Dimension{ @@ -23,9 +23,9 @@ var Metrics = map[string]*b.MetricDescription{ }, }, "NumberOfObjects": { - Help: aws.String("The total number of objects stored in a bucket for all storage classes except for the GLACIER storage class"), - OutputName: aws.String("s3_number_of_objects"), - Statistic: h.StringPointers("Average"), + Help: ("The total number of objects stored in a bucket for all storage classes except for the GLACIER storage class"), + OutputName: ("s3_number_of_objects"), + Statistics: h.StringPointers("Average"), PeriodSeconds: 60 * 60 * 24, RangeSeconds: 60 * 60 * 24 * 7, Dimensions: []*cloudwatch.Dimension{