From ec1ab2eb7000b7c8533cbff808d839b776282263 Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Wed, 7 Nov 2018 18:25:07 -0800 Subject: [PATCH 01/17] Refactor gRPC import to call the new ingester interface. --- .gitignore | 1 + importsrv/server.go | 95 ++++++------ importsrv/server_test.go | 272 +++++++++++++++++---------------- metricingester/types.go | 205 +++++++++++++++++++++++++ samplers/metricpb/metric.pb.go | 110 +++++++++---- samplers/metricpb/metric.proto | 1 + server.go | 8 +- 7 files changed, 473 insertions(+), 219 deletions(-) create mode 100644 metricingester/types.go diff --git a/.gitignore b/.gitignore index 5af5de6e1..91884b0a0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ *.o *.a *.so +.idea/ # Folders _obj diff --git a/importsrv/server.go b/importsrv/server.go index 7d8c8efd5..2d64cc9b6 100644 --- a/importsrv/server.go +++ b/importsrv/server.go @@ -6,12 +6,14 @@ package importsrv import ( + "errors" "fmt" "net" "time" + "github.com/stripe/veneur/metricingester" + "github.com/golang/protobuf/ptypes/empty" - "github.com/segmentio/fasthash/fnv1a" "golang.org/x/net/context" // This can be replace with "context" after Go 1.8 support is dropped "google.golang.org/grpc" @@ -25,9 +27,10 @@ const ( responseDurationMetric = "import.response_duration_ns" ) -// MetricIngester reads metrics from protobufs -type MetricIngester interface { - IngestMetrics([]*metricpb.Metric) +// metricsIngester is the contract expected of objects to which metrics will be submitted. +type metricsIngester interface { + Ingest(metricingester.Metric) error + Merge(metricingester.Digest) error } // Server wraps a gRPC server and implements the forwardrpc.Forward service. @@ -36,8 +39,8 @@ type MetricIngester interface { // should always be routed to the same MetricIngester. type Server struct { *grpc.Server - metricOuts []MetricIngester - opts *options + ingester metricsIngester + opts *options } type options struct { @@ -50,11 +53,11 @@ type Option func(*options) // New creates an unstarted Server with the input MetricIngester's to send // output to. -func New(metricOuts []MetricIngester, opts ...Option) *Server { +func New(ingester metricsIngester, opts ...Option) *Server { res := &Server{ - Server: grpc.NewServer(), - metricOuts: metricOuts, - opts: &options{}, + Server: grpc.NewServer(), + ingester: ingester, + opts: &options{}, } for _, opt := range opts { @@ -85,41 +88,52 @@ func (s *Server) Serve(addr string) error { // Static maps of tags used in the SendMetrics handler var ( - grpcTags = map[string]string{"protocol": "grpc"} - responseGroupTags = map[string]string{ - "protocol": "grpc", - "part": "group", - } + grpcTags = map[string]string{"protocol": "grpc"} responseSendTags = map[string]string{ "protocol": "grpc", "part": "send", } ) -// SendMetrics takes a list of metrics and hashes each one (based on the -// metric key) to a specific metric ingester. +// SendMetrics accepts a batch of metrics for importing. func (s *Server) SendMetrics(ctx context.Context, mlist *forwardrpc.MetricList) (*empty.Empty, error) { span, _ := trace.StartSpanFromContext(ctx, "veneur.opentracing.importsrv.handle_send_metrics") span.SetTag("protocol", "grpc") defer span.ClientFinish(s.opts.traceClient) - dests := make([][]*metricpb.Metric, len(s.metricOuts)) - - // group metrics by their destination - groupStart := time.Now() - for _, m := range mlist.Metrics { - workerIdx := s.hashMetric(m) % uint32(len(dests)) - dests[workerIdx] = append(dests[workerIdx], m) - } - span.Add(ssf.Timing(responseDurationMetric, time.Since(groupStart), time.Nanosecond, responseGroupTags)) - - // send each set of metrics to its destination. Since this is typically - // implemented with channels, batching the metrics together avoids - // repeated channel send operations sendStart := time.Now() - for i, ms := range dests { - if len(ms) > 0 { - s.metricOuts[i].IngestMetrics(ms) + for _, m := range mlist.Metrics { + hostname := m.GetHostname() + tags := m.GetTags() + switch v := m.GetValue().(type) { + case *metricpb.Metric_Gauge: + s.ingester.Ingest(metricingester.NewGauge(m.Name, v.Gauge.GetValue(), tags, 1.0, hostname)) + case *metricpb.Metric_Counter: + s.ingester.Ingest(metricingester.NewCounter(m.Name, v.Counter.GetValue(), tags, 1.0, hostname)) + case *metricpb.Metric_Set: + s.ingester.Merge(metricingester.NewSetDigest(m.Name, v.Set, tags, hostname)) + case *metricpb.Metric_Histogram: + // Scope is a legacy concept used to designate whether a metric needed to be emitted locally + // or aggregated globally. + // + // The presence of a hostname now encodes the same concept. + // + // However, histograms have a special "MixedScope" that emits percentiles globally and + // min/max/count values locally. + // + // We need a special metric type to represent this: the MixedHistogram. + switch m.GetScope() { + case metricpb.Scope_Local, metricpb.Scope_Global: + s.ingester.Merge( + metricingester.NewHistogramDigest(m.Name, v.Histogram, tags, hostname), + ) + case metricpb.Scope_Mixed: + s.ingester.Merge( + metricingester.NewMixedHistogramDigest(m.Name, v.Histogram, tags, hostname), + ) + } + case nil: + return nil, errors.New("can't import a metric with a nil value") } } @@ -130,18 +144,3 @@ func (s *Server) SendMetrics(ctx context.Context, mlist *forwardrpc.MetricList) return &empty.Empty{}, nil } - -// hashMetric returns a 32-bit hash from the input metric based on its name, -// type, and tags. -// -// The fnv1a package is used as opposed to fnv from the standard library, as -// it avoids allocations by not using the hash.Hash interface and by avoiding -// string to []byte conversions. -func (s *Server) hashMetric(m *metricpb.Metric) uint32 { - h := fnv1a.HashString32(m.Name) - h = fnv1a.AddString32(h, m.Type.String()) - for _, tag := range m.Tags { - h = fnv1a.AddString32(h, tag) - } - return h -} diff --git a/importsrv/server_test.go b/importsrv/server_test.go index eb14ca3de..42e9fc9de 100644 --- a/importsrv/server_test.go +++ b/importsrv/server_test.go @@ -1,137 +1,139 @@ package importsrv -import ( - "context" - "fmt" - "math/rand" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stripe/veneur/forwardrpc" - "github.com/stripe/veneur/samplers/metricpb" - metrictest "github.com/stripe/veneur/samplers/metricpb/testutils" - "github.com/stripe/veneur/trace" -) - -type testMetricIngester struct { - metrics []*metricpb.Metric -} - -func (mi *testMetricIngester) IngestMetrics(ms []*metricpb.Metric) { - mi.metrics = append(mi.metrics, ms...) -} - -func (mi *testMetricIngester) clear() { - mi.metrics = mi.metrics[:0] -} - -// Test that sending the same metric to a Veneur results in it being hashed -// to the same worker every time -func TestSendMetrics_ConsistentHash(t *testing.T) { - ingesters := []*testMetricIngester{&testMetricIngester{}, &testMetricIngester{}} - - casted := make([]MetricIngester, len(ingesters)) - for i, ingester := range ingesters { - casted[i] = ingester - } - s := New(casted) - - inputs := []*metricpb.Metric{ - &metricpb.Metric{Name: "test.counter", Type: metricpb.Type_Counter, Tags: []string{"tag:1"}}, - &metricpb.Metric{Name: "test.gauge", Type: metricpb.Type_Gauge}, - &metricpb.Metric{Name: "test.histogram", Type: metricpb.Type_Histogram, Tags: []string{"type:histogram"}}, - &metricpb.Metric{Name: "test.set", Type: metricpb.Type_Set}, - &metricpb.Metric{Name: "test.gauge3", Type: metricpb.Type_Gauge}, - } - - // Send the same inputs many times - for i := 0; i < 10; i++ { - s.SendMetrics(context.Background(), &forwardrpc.MetricList{inputs}) - - assert.Equal(t, []*metricpb.Metric{inputs[0], inputs[4]}, - ingesters[0].metrics, "Ingester 0 has the wrong metrics") - assert.Equal(t, []*metricpb.Metric{inputs[1], inputs[2], inputs[3]}, - ingesters[1].metrics, "Ingester 1 has the wrong metrics") - - for _, ingester := range ingesters { - ingester.clear() - } - } -} - -func TestSendMetrics_Empty(t *testing.T) { - ingester := &testMetricIngester{} - s := New([]MetricIngester{ingester}) - s.SendMetrics(context.Background(), &forwardrpc.MetricList{}) - - assert.Empty(t, ingester.metrics, "The server shouldn't have submitted "+ - "any metrics") -} - -func TestOptions_WithTraceClient(t *testing.T) { - c, err := trace.NewClient(trace.DefaultVeneurAddress) - if err != nil { - t.Fatalf("failed to initialize a trace client: %v", err) - } - - s := New([]MetricIngester{}, WithTraceClient(c)) - assert.Equal(t, c, s.opts.traceClient, "WithTraceClient didn't correctly "+ - "set the trace client") -} - -type noopChannelMetricIngester struct { - in chan []*metricpb.Metric - quit chan struct{} -} - -func newNoopChannelMetricIngester() *noopChannelMetricIngester { - return &noopChannelMetricIngester{ - in: make(chan []*metricpb.Metric), - quit: make(chan struct{}), - } -} - -func (mi *noopChannelMetricIngester) start() { - go func() { - for { - select { - case <-mi.in: - case <-mi.quit: - return - } - } - }() -} - -func (mi *noopChannelMetricIngester) stop() { - mi.quit <- struct{}{} -} - -func (mi *noopChannelMetricIngester) IngestMetrics(ms []*metricpb.Metric) { - mi.in <- ms -} - -func BenchmarkImportServerSendMetrics(b *testing.B) { - rand.Seed(time.Now().Unix()) - - metrics := metrictest.RandomForwardMetrics(10000) - for _, inputSize := range []int{10, 100, 1000, 10000} { - ingesters := make([]MetricIngester, 100) - for i := range ingesters { - ingester := newNoopChannelMetricIngester() - ingester.start() - defer ingester.stop() - ingesters[i] = ingester - } - s := New(ingesters) - ctx := context.Background() - input := &forwardrpc.MetricList{Metrics: metrics[:inputSize]} - - b.Run(fmt.Sprintf("InputSize=%d", inputSize), func(b *testing.B) { - for i := 0; i < b.N; i++ { - s.SendMetrics(ctx, input) - } - }) - } -} +// TODO(clin): Add tests back. +// +//import ( +// "context" +// "fmt" +// "math/rand" +// "testing" +// "time" +// +// "github.com/stretchr/testify/assert" +// "github.com/stripe/veneur/forwardrpc" +// "github.com/stripe/veneur/samplers/metricpb" +// metrictest "github.com/stripe/veneur/samplers/metricpb/testutils" +// "github.com/stripe/veneur/trace" +//) +// +//type testMetricIngester struct { +// metrics []*metricpb.Metric +//} +// +//func (mi *testMetricIngester) IngestMetrics(ms []*metricpb.Metric) { +// mi.metrics = append(mi.metrics, ms...) +//} +// +//func (mi *testMetricIngester) clear() { +// mi.metrics = mi.metrics[:0] +//} +// +//// Test that sending the same metric to a Veneur results in it being hashed +//// to the same worker every time +//func TestSendMetrics_ConsistentHash(t *testing.T) { +// ingesters := []*testMetricIngester{&testMetricIngester{}, &testMetricIngester{}} +// +// casted := make([]MetricIngester, len(ingesters)) +// for i, ingester := range ingesters { +// casted[i] = ingester +// } +// s := New(casted) +// +// inputs := []*metricpb.Metric{ +// &metricpb.Metric{Name: "test.counter", Type: metricpb.Type_Counter, Tags: []string{"tag:1"}}, +// &metricpb.Metric{Name: "test.gauge", Type: metricpb.Type_Gauge}, +// &metricpb.Metric{Name: "test.histogram", Type: metricpb.Type_Histogram, Tags: []string{"type:histogram"}}, +// &metricpb.Metric{Name: "test.set", Type: metricpb.Type_Set}, +// &metricpb.Metric{Name: "test.gauge3", Type: metricpb.Type_Gauge}, +// } +// +// // Send the same inputs many times +// for i := 0; i < 10; i++ { +// s.SendMetrics(context.Background(), &forwardrpc.MetricList{inputs}) +// +// assert.Equal(t, []*metricpb.Metric{inputs[0], inputs[4]}, +// ingesters[0].metrics, "Ingester 0 has the wrong metrics") +// assert.Equal(t, []*metricpb.Metric{inputs[1], inputs[2], inputs[3]}, +// ingesters[1].metrics, "Ingester 1 has the wrong metrics") +// +// for _, ingester := range ingesters { +// ingester.clear() +// } +// } +//} +// +//func TestSendMetrics_Empty(t *testing.T) { +// ingester := &testMetricIngester{} +// s := New([]MetricIngester{ingester}) +// s.SendMetrics(context.Background(), &forwardrpc.MetricList{}) +// +// assert.Empty(t, ingester.metrics, "The server shouldn't have submitted "+ +// "any metrics") +//} +// +//func TestOptions_WithTraceClient(t *testing.T) { +// c, err := trace.NewClient(trace.DefaultVeneurAddress) +// if err != nil { +// t.Fatalf("failed to initialize a trace client: %v", err) +// } +// +// s := New([]MetricIngester{}, WithTraceClient(c)) +// assert.Equal(t, c, s.opts.traceClient, "WithTraceClient didn't correctly "+ +// "set the trace client") +//} +// +//type noopChannelMetricIngester struct { +// in chan []*metricpb.Metric +// quit chan struct{} +//} +// +//func newNoopChannelMetricIngester() *noopChannelMetricIngester { +// return &noopChannelMetricIngester{ +// in: make(chan []*metricpb.Metric), +// quit: make(chan struct{}), +// } +//} +// +//func (mi *noopChannelMetricIngester) start() { +// go func() { +// for { +// select { +// case <-mi.in: +// case <-mi.quit: +// return +// } +// } +// }() +//} +// +//func (mi *noopChannelMetricIngester) stop() { +// mi.quit <- struct{}{} +//} +// +//func (mi *noopChannelMetricIngester) IngestMetrics(ms []*metricpb.Metric) { +// mi.in <- ms +//} +// +//func BenchmarkImportServerSendMetrics(b *testing.B) { +// rand.Seed(time.Now().Unix()) +// +// metrics := metrictest.RandomForwardMetrics(10000) +// for _, inputSize := range []int{10, 100, 1000, 10000} { +// ingesters := make([]MetricIngester, 100) +// for i := range ingesters { +// ingester := newNoopChannelMetricIngester() +// ingester.start() +// defer ingester.stop() +// ingesters[i] = ingester +// } +// s := New(ingesters) +// ctx := context.Background() +// input := &forwardrpc.MetricList{Metrics: metrics[:inputSize]} +// +// b.Run(fmt.Sprintf("InputSize=%d", inputSize), func(b *testing.B) { +// for i := 0; i < b.N; i++ { +// s.SendMetrics(ctx, input) +// } +// }) +// } +//} diff --git a/metricingester/types.go b/metricingester/types.go new file mode 100644 index 000000000..9ab9fe4ea --- /dev/null +++ b/metricingester/types.go @@ -0,0 +1,205 @@ +package metricingester + +import ( + "strings" + + "github.com/stripe/veneur/samplers" + + "github.com/segmentio/fasthash/fnv1a" + + "github.com/stripe/veneur/samplers/metricpb" +) + +/*********** + * METRICS * + ***********/ +type metricType int + +const ( + gauge metricType = iota + 1 + counter + histogram + set +) + +type metricHash uint32 + +// Metric represents a single metric sample. +type Metric struct { + name string + samplerate float32 + tags []string + hostname string + metricType metricType + + gaugevalue float64 + countervalue int64 + setvalue string + histovalue float64 +} + +type metricKey struct { + name string + hostname string + tags string +} + +func (m Metric) Key() metricKey { + return metricKey{ + name: m.name, + hostname: m.hostname, + tags: strings.Join(m.tags, ","), + } +} + +func (m Metric) Hash() metricHash { + return metricHash(hash(m.name, m.hostname, m.tags)) +} + +// NewCounter constructs a Metric that represents whole number values that accumulate. +// +// tags are assumed to be inC sorted order. +func NewCounter(name string, v int64, tags []string, samplerate float32, hostname string) Metric { + return Metric{ + name: name, + countervalue: v, + tags: tags, + samplerate: samplerate, + metricType: counter, + hostname: hostname, + } +} + +// NewGauge constructs a Metric that represents arbitrary floating point readings. +// +// tags are assumed to be inC sorted order. +func NewGauge(name string, v float64, tags []string, samplerate float32, hostname string) Metric { + return Metric{ + name: name, + gaugevalue: v, + tags: tags, + samplerate: samplerate, + metricType: gauge, + hostname: hostname, + } +} + +/*********** + * DIGESTS * + ***********/ + +type digestType int + +const ( + histoDigest digestType = iota + 1 + mixedHistoDigest + setDigest +) + +// Merge represents a combination of multiple Metric samples. Digests are merged +// into existing aggWorker. +type Digest struct { + name string + tags []string + hostname string + digestType digestType + + histodigest *metricpb.HistogramValue + setdigest *metricpb.SetValue +} + +// TODO(clin): Ideally merge this with Hash when we choose an algorithm with sufficiently +// low probability of collision and high enough performance. +func (d Digest) Key() metricKey { + return metricKey{ + name: d.name, + hostname: d.hostname, + tags: strings.Join(d.tags, ","), + } +} + +func (d Digest) Hash() metricHash { + return metricHash(hash(d.name, d.hostname, d.tags)) +} + +// +// tags are assumed to be inC sorted order. +func NewHistogramDigest( + name string, + digest *metricpb.HistogramValue, + tags []string, + hostname string, +) Digest { + return Digest{ + name: name, + tags: tags, + digestType: histoDigest, + hostname: hostname, + histodigest: digest, + } +} + +func NewMixedHistogramDigest( + name string, + digest *metricpb.HistogramValue, + tags []string, + hostname string, +) Digest { + return Digest{ + name: name, + tags: tags, + digestType: mixedHistoDigest, + hostname: hostname, + histodigest: digest, + } +} + +// +// tags are assumed to be inC sorted order. +func NewSetDigest( + name string, + digest *metricpb.SetValue, + tags []string, + hostname string, +) Digest { + return Digest{ + name: name, + tags: tags, + digestType: setDigest, + hostname: hostname, + setdigest: digest, + } +} + +// todo(clin): Replace this with farmhash and use 64bit to mitigate collisions. Then replace metricKey. +func hash(name, hostname string, tags []string) uint32 { + h := fnv1a.Init32 + fnv1a.AddString32(h, name) + fnv1a.AddString32(h, hostname) + for _, t := range tags { + fnv1a.AddString32(h, t) + } + return h +} + +/************ + * INTERNAL * + ************/ + +type samplerEnvelope struct { + counters map[metricKey]*samplers.Counter + gauges map[metricKey]*samplers.Gauge + histograms map[metricKey]*samplers.Histo + mixedHistograms map[metricKey]*samplers.Histo + sets map[metricKey]*samplers.Set +} + +func newSamplerEnvelope() samplerEnvelope { + return samplerEnvelope{ + counters: make(map[metricKey]*samplers.Counter), + gauges: make(map[metricKey]*samplers.Gauge), + histograms: make(map[metricKey]*samplers.Histo), + mixedHistograms: make(map[metricKey]*samplers.Histo), + sets: make(map[metricKey]*samplers.Set), + } +} diff --git a/samplers/metricpb/metric.pb.go b/samplers/metricpb/metric.pb.go index 331f46112..737be3e99 100644 --- a/samplers/metricpb/metric.pb.go +++ b/samplers/metricpb/metric.pb.go @@ -104,8 +104,9 @@ type Metric struct { // *Metric_Gauge // *Metric_Histogram // *Metric_Set - Value isMetric_Value `protobuf_oneof:"value"` - Scope Scope `protobuf:"varint,9,opt,name=scope,proto3,enum=metricpb.Scope" json:"scope,omitempty"` + Value isMetric_Value `protobuf_oneof:"value"` + Scope Scope `protobuf:"varint,9,opt,name=scope,proto3,enum=metricpb.Scope" json:"scope,omitempty"` + Hostname string `protobuf:"bytes,10,opt,name=hostname,proto3" json:"hostname,omitempty"` } func (m *Metric) Reset() { *m = Metric{} } @@ -200,6 +201,13 @@ func (m *Metric) GetScope() Scope { return Scope_Mixed } +func (m *Metric) GetHostname() string { + if m != nil { + return m.Hostname + } + return "" +} + // XXX_OneofFuncs is for the internal use of the proto package. func (*Metric) XXX_OneofFuncs() (func(msg proto.Message, b *proto.Buffer) error, func(msg proto.Message, tag, wire int, b *proto.Buffer) (bool, error), func(msg proto.Message) (n int), []interface{}) { return _Metric_OneofMarshaler, _Metric_OneofUnmarshaler, _Metric_OneofSizer, []interface{}{ @@ -443,6 +451,12 @@ func (m *Metric) MarshalTo(dAtA []byte) (int, error) { i++ i = encodeVarintMetric(dAtA, i, uint64(m.Scope)) } + if len(m.Hostname) > 0 { + dAtA[i] = 0x52 + i++ + i = encodeVarintMetric(dAtA, i, uint64(len(m.Hostname))) + i += copy(dAtA[i:], m.Hostname) + } return i, nil } @@ -632,6 +646,10 @@ func (m *Metric) Size() (n int) { if m.Scope != 0 { n += 1 + sovMetric(uint64(m.Scope)) } + l = len(m.Hostname) + if l > 0 { + n += 1 + l + sovMetric(uint64(l)) + } return n } @@ -975,6 +993,35 @@ func (m *Metric) Unmarshal(dAtA []byte) error { break } } + case 10: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Hostname", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMetric + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthMetric + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Hostname = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex default: iNdEx = preIndex skippy, err := skipMetric(dAtA[iNdEx:]) @@ -1398,33 +1445,34 @@ var ( func init() { proto.RegisterFile("samplers/metricpb/metric.proto", fileDescriptorMetric) } var fileDescriptorMetric = []byte{ - // 438 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x54, 0x92, 0xc1, 0x6a, 0xdb, 0x40, - 0x10, 0x86, 0xbd, 0x96, 0x65, 0xd9, 0xe3, 0xc4, 0x15, 0x43, 0x5a, 0x96, 0x1c, 0x84, 0x11, 0x6d, - 0x71, 0x43, 0x51, 0xc0, 0xa5, 0xd0, 0x6b, 0xd3, 0x80, 0x73, 0xb0, 0x2f, 0x4a, 0xe8, 0x35, 0xac, - 0x9d, 0x61, 0x23, 0x90, 0xbc, 0x42, 0x5a, 0x97, 0xfa, 0x2d, 0xfa, 0x58, 0x39, 0xf6, 0x11, 0x8a, - 0xfb, 0x22, 0x65, 0x57, 0xda, 0x2a, 0x39, 0x08, 0xcd, 0xfe, 0xff, 0xf7, 0x4b, 0xfc, 0x1a, 0x41, - 0x54, 0x8b, 0xa2, 0xcc, 0xa9, 0xaa, 0x2f, 0x0b, 0xd2, 0x55, 0xb6, 0x2d, 0x37, 0xed, 0x90, 0x94, - 0x95, 0xd2, 0x0a, 0x47, 0x4e, 0x3e, 0x7f, 0xad, 0x1f, 0x32, 0x49, 0xb5, 0xbe, 0x6c, 0xef, 0x0d, - 0x10, 0x3f, 0xf5, 0x61, 0xb8, 0xb6, 0x0c, 0x22, 0x0c, 0x76, 0xa2, 0x20, 0xce, 0x66, 0x6c, 0x3e, - 0x4e, 0xed, 0x6c, 0x34, 0x2d, 0x64, 0xcd, 0xfb, 0x33, 0xcf, 0x68, 0x66, 0xc6, 0x18, 0x06, 0xfa, - 0x50, 0x12, 0xf7, 0x66, 0x6c, 0x3e, 0x5d, 0x4c, 0x13, 0xf7, 0x8a, 0xe4, 0xee, 0x50, 0x52, 0x6a, - 0x3d, 0x5c, 0x40, 0xb0, 0x55, 0xfb, 0x9d, 0xa6, 0x8a, 0xfb, 0x33, 0x36, 0x9f, 0x2c, 0xde, 0x74, - 0xd8, 0xb7, 0xc6, 0xf8, 0x2e, 0xf2, 0x3d, 0xdd, 0xf4, 0x52, 0x07, 0xe2, 0x47, 0xf0, 0xa5, 0xd8, - 0x4b, 0xe2, 0x43, 0x9b, 0x38, 0xeb, 0x12, 0x4b, 0x23, 0x3b, 0xbe, 0x81, 0xf0, 0x0b, 0x8c, 0x1f, - 0xb3, 0x5a, 0x2b, 0x59, 0x89, 0x82, 0x07, 0x36, 0xc1, 0xbb, 0xc4, 0x8d, 0xb3, 0x5c, 0xaa, 0x83, - 0xf1, 0x3d, 0x78, 0x35, 0x69, 0x3e, 0xb2, 0x19, 0xec, 0x32, 0xb7, 0xa4, 0x1d, 0x6d, 0x00, 0x7c, - 0x07, 0x7e, 0xbd, 0x55, 0x25, 0xf1, 0xb1, 0x2d, 0xfa, 0xea, 0x19, 0x69, 0xe4, 0xb4, 0x71, 0xaf, - 0x02, 0xf0, 0x7f, 0x98, 0x58, 0xfc, 0x16, 0x4e, 0x9e, 0x57, 0xc3, 0xb3, 0xd6, 0xb0, 0x1f, 0xd4, - 0x4b, 0x5b, 0x2a, 0x06, 0xe8, 0xea, 0xbc, 0x64, 0x98, 0x63, 0x96, 0x30, 0x7d, 0x59, 0x00, 0x3f, - 0xc3, 0x48, 0xdf, 0x37, 0x8b, 0xb3, 0xe8, 0x64, 0x71, 0x9e, 0xb8, 0x45, 0xae, 0xa9, 0x92, 0xd9, - 0x4e, 0x5e, 0xdb, 0xd3, 0xb5, 0xd0, 0x22, 0x0d, 0x74, 0x73, 0x88, 0x13, 0x18, 0xb9, 0x56, 0x18, - 0xc3, 0xe9, 0xe3, 0xa1, 0xa4, 0xea, 0x3e, 0x57, 0xd2, 0x5c, 0xf6, 0x39, 0x27, 0xe9, 0xc4, 0x8a, - 0x2b, 0x25, 0x57, 0x4a, 0x5e, 0x7c, 0x00, 0xdf, 0x76, 0xc3, 0x31, 0xf8, 0xeb, 0xec, 0x27, 0x3d, - 0x84, 0x3d, 0x33, 0xae, 0xd4, 0x56, 0xe4, 0x21, 0x43, 0x80, 0xe1, 0x32, 0x57, 0x1b, 0x91, 0x87, - 0xfd, 0x8b, 0xaf, 0x30, 0x30, 0xfb, 0xc6, 0x09, 0x04, 0x6d, 0xeb, 0x86, 0xb5, 0xe5, 0x42, 0x86, - 0xa7, 0x30, 0xfe, 0xdf, 0x21, 0xec, 0x63, 0x00, 0xde, 0x2d, 0xe9, 0xd0, 0x33, 0xc8, 0x5d, 0x56, - 0x50, 0x15, 0x0e, 0xae, 0xc2, 0xa7, 0x63, 0xc4, 0x7e, 0x1f, 0x23, 0xf6, 0xe7, 0x18, 0xb1, 0x5f, - 0x7f, 0xa3, 0xde, 0x66, 0x68, 0x7f, 0xca, 0x4f, 0xff, 0x02, 0x00, 0x00, 0xff, 0xff, 0x3d, 0x41, - 0xaf, 0x18, 0xd7, 0x02, 0x00, 0x00, + // 451 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x54, 0x92, 0xd1, 0x6a, 0xdb, 0x30, + 0x14, 0x86, 0xa3, 0x38, 0x8e, 0xed, 0x93, 0x36, 0x33, 0x87, 0x6e, 0x88, 0x5c, 0x98, 0x60, 0xb6, + 0x91, 0x95, 0xe1, 0x42, 0xc6, 0x60, 0xb7, 0xeb, 0x0a, 0xe9, 0x45, 0x72, 0xe3, 0x96, 0xdd, 0x16, + 0x25, 0x15, 0x8a, 0xc1, 0x8e, 0x8c, 0xad, 0x8c, 0xe5, 0x2d, 0xf6, 0x58, 0xbb, 0xdc, 0x23, 0x8c, + 0x6c, 0x0f, 0x52, 0x24, 0x5b, 0x75, 0x73, 0x11, 0x72, 0xce, 0x7f, 0xbe, 0x5f, 0xe6, 0x3f, 0x12, + 0x44, 0x35, 0x2b, 0xca, 0x9c, 0x57, 0xf5, 0x55, 0xc1, 0x55, 0x95, 0x6d, 0xca, 0x75, 0x5b, 0x24, + 0x65, 0x25, 0x95, 0x44, 0xdf, 0xca, 0x93, 0xd7, 0xea, 0x31, 0x13, 0xbc, 0x56, 0x57, 0xed, 0x7f, + 0x03, 0xc4, 0xff, 0xfb, 0x30, 0x5c, 0x19, 0x06, 0x11, 0x06, 0x3b, 0x56, 0x70, 0x4a, 0xa6, 0x64, + 0x16, 0xa4, 0xa6, 0xd6, 0x9a, 0x62, 0xa2, 0xa6, 0xfd, 0xa9, 0xa3, 0x35, 0x5d, 0x63, 0x0c, 0x03, + 0x75, 0x28, 0x39, 0x75, 0xa6, 0x64, 0x36, 0x9e, 0x8f, 0x13, 0xfb, 0x89, 0xe4, 0xfe, 0x50, 0xf2, + 0xd4, 0xcc, 0x70, 0x0e, 0xde, 0x46, 0xee, 0x77, 0x8a, 0x57, 0xd4, 0x9d, 0x92, 0xd9, 0x68, 0xfe, + 0xa6, 0xc3, 0xbe, 0x35, 0x83, 0xef, 0x2c, 0xdf, 0xf3, 0xdb, 0x5e, 0x6a, 0x41, 0xfc, 0x08, 0xae, + 0x60, 0x7b, 0xc1, 0xe9, 0xd0, 0x38, 0x2e, 0x3a, 0xc7, 0x42, 0xcb, 0x96, 0x6f, 0x20, 0xfc, 0x02, + 0xc1, 0x36, 0xab, 0x95, 0x14, 0x15, 0x2b, 0xa8, 0x67, 0x1c, 0xb4, 0x73, 0xdc, 0xda, 0x91, 0x75, + 0x75, 0x30, 0xbe, 0x07, 0xa7, 0xe6, 0x8a, 0xfa, 0xc6, 0x83, 0x9d, 0xe7, 0x8e, 0x2b, 0x4b, 0x6b, + 0x00, 0xdf, 0x81, 0x5b, 0x6f, 0x64, 0xc9, 0x69, 0x60, 0x82, 0xbe, 0x7a, 0x41, 0x6a, 0x39, 0x6d, + 0xa6, 0x38, 0x01, 0x7f, 0x2b, 0x6b, 0x65, 0x56, 0x07, 0x66, 0x75, 0xcf, 0xfd, 0xb5, 0x07, 0xee, + 0x0f, 0x7d, 0x64, 0xfc, 0x16, 0xce, 0x5e, 0xc6, 0xc6, 0x8b, 0x76, 0x60, 0x96, 0xed, 0xa4, 0x2d, + 0x15, 0x03, 0x74, 0x51, 0x4f, 0x19, 0x62, 0x99, 0x05, 0x8c, 0x4f, 0xc3, 0xe1, 0x67, 0xf0, 0xd5, + 0x43, 0x73, 0xa9, 0x06, 0x1d, 0xcd, 0x27, 0x89, 0xbd, 0xe4, 0x15, 0xaf, 0x44, 0xb6, 0x13, 0x37, + 0xa6, 0xbb, 0x61, 0x8a, 0xa5, 0x9e, 0x6a, 0x9a, 0x38, 0x01, 0xdf, 0x26, 0xc6, 0x18, 0xce, 0xb7, + 0x87, 0x92, 0x57, 0x0f, 0xb9, 0x14, 0xfa, 0x67, 0xce, 0x39, 0x4b, 0x47, 0x46, 0x5c, 0x4a, 0xb1, + 0x94, 0xe2, 0xf2, 0x03, 0xb8, 0x26, 0x37, 0x06, 0xe0, 0xae, 0xb2, 0x9f, 0xfc, 0x31, 0xec, 0xe9, + 0x72, 0x29, 0x37, 0x2c, 0x0f, 0x09, 0x02, 0x0c, 0x17, 0xb9, 0x5c, 0xb3, 0x3c, 0xec, 0x5f, 0x7e, + 0x85, 0x81, 0x7e, 0x0b, 0x38, 0x02, 0xaf, 0x4d, 0xdd, 0xb0, 0x26, 0x5c, 0x48, 0xf0, 0x1c, 0x82, + 0xe7, 0x0c, 0x61, 0x1f, 0x3d, 0x70, 0xee, 0xb8, 0x0a, 0x1d, 0x8d, 0xdc, 0x67, 0x05, 0xaf, 0xc2, + 0xc1, 0x75, 0xf8, 0xfb, 0x18, 0x91, 0x3f, 0xc7, 0x88, 0xfc, 0x3d, 0x46, 0xe4, 0xd7, 0xbf, 0xa8, + 0xb7, 0x1e, 0x9a, 0x07, 0xfb, 0xe9, 0x29, 0x00, 0x00, 0xff, 0xff, 0x7b, 0x6d, 0x88, 0x2f, 0xf3, + 0x02, 0x00, 0x00, } diff --git a/samplers/metricpb/metric.proto b/samplers/metricpb/metric.proto index ea969ffbc..dd56c918c 100644 --- a/samplers/metricpb/metric.proto +++ b/samplers/metricpb/metric.proto @@ -19,6 +19,7 @@ message Metric { } Scope scope = 9; + string hostname = 10; } // Scope describes at which level the metric will be emitted. diff --git a/server.go b/server.go index 687ad3db2..89d67de10 100644 --- a/server.go +++ b/server.go @@ -20,6 +20,8 @@ import ( "syscall" "time" + "github.com/stripe/veneur/metricingester" + "github.com/DataDog/datadog-go/statsd" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/credentials" @@ -583,12 +585,8 @@ func NewFromConfig(logger *logrus.Logger, conf Config) (*Server, error) { ret.grpcListenAddress = conf.GrpcAddress if ret.grpcListenAddress != "" { // convert all the workers to the proper interface - ingesters := make([]importsrv.MetricIngester, len(ret.Workers)) - for i, worker := range ret.Workers { - ingesters[i] = worker - } - ret.grpcServer = importsrv.New(ingesters, + ret.grpcServer = importsrv.New(metricingester.AggregatingIngestor{}, importsrv.WithTraceClient(ret.TraceClient)) } From 0df42d17d3f5b091bf626b11e2196f0ab140f073 Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Wed, 7 Nov 2018 18:26:08 -0800 Subject: [PATCH 02/17] Implement ingester and worker. --- metricingester/aggingestor.go | 47 +++++++++++++++++ metricingester/aggworker.go | 96 +++++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 metricingester/aggingestor.go create mode 100644 metricingester/aggworker.go diff --git a/metricingester/aggingestor.go b/metricingester/aggingestor.go new file mode 100644 index 000000000..3e636520f --- /dev/null +++ b/metricingester/aggingestor.go @@ -0,0 +1,47 @@ +package metricingester + +import "time" + +type AggregatingIngestor struct { + workers []aggWorker + flusher func(samplerEnvelope) error + interval time.Duration + quit chan struct{} +} + +// TODO(clin): This needs to take ctx. +func (a AggregatingIngestor) Ingest(m Metric) error { + workerid := m.Hash() % metricHash(len(a.workers)) + a.workers[workerid].Ingest(m) + return nil +} + +func (a AggregatingIngestor) Merge(d Digest) error { + workerid := d.Hash() % metricHash(len(a.workers)) + a.workers[workerid].Merge(d) + return nil +} + +func (a AggregatingIngestor) Start() { + go func() { + ticker := time.NewTicker(a.interval) + for { + select { + case <-ticker.C: + a.flush() + case <-a.quit: + return + } + } + }() +} + +func (a AggregatingIngestor) Stop() { + close(a.quit) +} + +func (a AggregatingIngestor) flush() { + for _, w := range a.workers { + go a.flusher(w.Flush()) + } +} diff --git a/metricingester/aggworker.go b/metricingester/aggworker.go new file mode 100644 index 000000000..338cea0da --- /dev/null +++ b/metricingester/aggworker.go @@ -0,0 +1,96 @@ +package metricingester + +import ( + "github.com/stripe/veneur/samplers" +) + +type aggWorker struct { + samplers samplerEnvelope + + inC chan Metric + mergeC chan Digest + flush chan chan<- samplerEnvelope +} + +func (a aggWorker) Start() { + go func() { + for { + select { + case m, ok := <-a.inC: + if !ok { + return + } + a.ingest(m) + case d, ok := <-a.mergeC: + if !ok { + return + } + a.merge(d) + case responseCh := <-a.flush: + responseCh <- a.samplers + a.samplers = newSamplerEnvelope() + } + } + }() +} + +func (a aggWorker) Ingest(m Metric) { + a.inC <- m +} + +func (a aggWorker) Merge(d Digest) { + a.mergeC <- d +} + +func (a aggWorker) Stop() { + close(a.inC) + close(a.flush) +} + +func (a aggWorker) Flush() samplerEnvelope { + rcv := make(chan samplerEnvelope) + a.flush <- rcv + return <-rcv +} + +func (a aggWorker) ingest(m Metric) { + key := m.Key() + switch m.metricType { + case counter: + if _, present := a.samplers.counters[key]; !present { + a.samplers.counters[key] = samplers.NewCounter(m.name, m.tags) + } + a.samplers.counters[key].Sample(float64(m.countervalue), m.samplerate) + case gauge: + if _, present := a.samplers.gauges[key]; !present { + a.samplers.gauges[key] = samplers.NewGauge(m.name, m.tags) + } + a.samplers.gauges[key].Sample(m.gaugevalue, m.samplerate) + case set: + if _, present := a.samplers.sets[key]; !present { + a.samplers.sets[key] = samplers.NewSet(m.name, m.tags) + } + a.samplers.sets[key].Sample(m.setvalue, m.samplerate) + case histogram: + if _, present := a.samplers.histograms[key]; !present { + a.samplers.histograms[key] = samplers.NewHist(m.name, m.tags) + } + a.samplers.histograms[key].Sample(m.histovalue, m.samplerate) + } +} + +func (a aggWorker) merge(d Digest) { + key := d.Key() + switch d.digestType { + case histoDigest: + if _, present := a.samplers.histograms[key]; !present { + a.samplers.histograms[key] = samplers.NewHist(d.name, d.tags) + } + a.samplers.histograms[key].Merge(d.histodigest) + case setDigest: + if _, present := a.samplers.sets[key]; !present { + a.samplers.sets[key] = samplers.NewSet(d.name, d.tags) + } + a.samplers.sets[key].Merge(d.setdigest) + } +} From f255e56265892d2bbbb4d969954ee2722a9f9c81 Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Wed, 7 Nov 2018 18:26:19 -0800 Subject: [PATCH 03/17] Implement flusher. --- metricingester/sinkflusher.go | 78 +++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 metricingester/sinkflusher.go diff --git a/metricingester/sinkflusher.go b/metricingester/sinkflusher.go new file mode 100644 index 000000000..efe73eebe --- /dev/null +++ b/metricingester/sinkflusher.go @@ -0,0 +1,78 @@ +package metricingester + +import ( + "context" + "sync" + "time" + + "github.com/sirupsen/logrus" + "github.com/stripe/veneur/samplers" + "github.com/stripe/veneur/trace" +) + +type sinkFlusher struct { + aggregates samplers.HistogramAggregates + percentiles []float64 + sinks []sink + + log *logrus.Logger + trace trace.Client +} + +type sink interface { + Name() string + Flush(context.Context, []samplers.InterMetric) error +} + +func (s sinkFlusher) Flush(ctx context.Context, envelope samplerEnvelope) error { + metrics := make([]samplers.InterMetric, 0, countMetrics(envelope)) + // get metrics from envelope + for _, sampler := range envelope.counters { + metrics = append(metrics, sampler.Flush(time.Second)...) + } + for _, sampler := range envelope.sets { + metrics = append(metrics, sampler.Flush()...) + } + for _, sampler := range envelope.gauges { + metrics = append(metrics, sampler.Flush()...) + } + for _, sampler := range envelope.histograms { + metrics = append(metrics, sampler.Flush(time.Second, s.percentiles, s.aggregates, true)...) + } + for _, sampler := range envelope.mixedHistograms { + metrics = append(metrics, sampler.Flush(time.Second, s.percentiles, s.aggregates, true)...) + } + + if len(metrics) == 0 { + return nil + } + + // TODO(clin): Add back metrics once we finalize the metrics client pull request. + wg := sync.WaitGroup{} + for _, sinkInstance := range s.sinks { + wg.Add(1) + go func(ms sink) { + err := ms.Flush(ctx, metrics) + if err != nil { + s.log.WithError(err).WithField("sink", ms.Name()).Warn("Error flushing sink") + } + wg.Done() + }(sinkInstance) + } + wg.Wait() + return nil +} + +func countMetrics(samplers samplerEnvelope) (count int) { + // This is a minor optimization to reduce allocations produced by append statements. + // We just need to get order of magnitude right, so this isn't super precise, and that's probably ok. + // + // If we need to be close to zeroalloc for some reason we can come back and make this perfect. + count += len(samplers.counters) + count += len(samplers.gauges) + count += len(samplers.sets) + count += len(samplers.histograms) * 5 + // probably way off + count += len(samplers.mixedHistograms) * 5 * 10 + return count +} From 89a7ac9283cadc0c4a9a00ec9284f66643634d24 Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Sat, 10 Nov 2018 11:22:10 -0800 Subject: [PATCH 04/17] Move channel sink into a different package so it can be exported. --- forward_grpc_test.go | 4 +++- forward_test.go | 6 +++-- server_test.go | 50 ++++++++-------------------------------- sinks/channel/channel.go | 41 ++++++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 43 deletions(-) create mode 100644 sinks/channel/channel.go diff --git a/forward_grpc_test.go b/forward_grpc_test.go index bd1aa8690..4603fc1c7 100644 --- a/forward_grpc_test.go +++ b/forward_grpc_test.go @@ -6,6 +6,8 @@ import ( "testing" "time" + "github.com/stripe/veneur/sinks/channel" + "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" "github.com/stripe/veneur/samplers" @@ -181,7 +183,7 @@ func forwardGRPCTestMetrics() []*samplers.UDPMetric { // after passing through a proxy. func TestE2EForwardingGRPCMetrics(t *testing.T) { ch := make(chan []samplers.InterMetric) - sink, _ := NewChannelMetricSink(ch) + sink, _ := channel.NewChannelMetricSink(ch) ff := newForwardGRPCFixture(t, localConfig(), sink) defer ff.stop() diff --git a/forward_test.go b/forward_test.go index 967b59c37..d6f6403e5 100644 --- a/forward_test.go +++ b/forward_test.go @@ -7,6 +7,8 @@ import ( "testing" "time" + "github.com/stripe/veneur/sinks/channel" + "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -100,7 +102,7 @@ func (ff *forwardFixture) IngestMetric(m *samplers.UDPMetric) { func TestE2EForwardingIndicatorMetrics(t *testing.T) { t.Parallel() ch := make(chan []samplers.InterMetric) - sink, _ := NewChannelMetricSink(ch) + sink, _ := channel.NewChannelMetricSink(ch) cfg := localConfig() cfg.IndicatorSpanTimerName = "indicator.span.timer" ffx := newForwardingFixture(t, cfg, nil, sink) @@ -143,7 +145,7 @@ func TestE2EForwardingIndicatorMetrics(t *testing.T) { func TestE2EForwardMetric(t *testing.T) { t.Parallel() ch := make(chan []samplers.InterMetric) - sink, _ := NewChannelMetricSink(ch) + sink, _ := channel.NewChannelMetricSink(ch) cfg := localConfig() cfg.IndicatorSpanTimerName = "indicator.span.timer" ffx := newForwardingFixture(t, cfg, nil, sink) diff --git a/server_test.go b/server_test.go index 49befabf1..3a438178e 100644 --- a/server_test.go +++ b/server_test.go @@ -12,6 +12,8 @@ import ( "io/ioutil" "sync" + "github.com/stripe/veneur/sinks/channel" + "math/rand" "net" "net/http" @@ -166,38 +168,6 @@ func setupVeneurServer(t testing.TB, config Config, transport http.RoundTripper, return server } -type channelMetricSink struct { - metricsChannel chan []samplers.InterMetric -} - -// NewChannelMetricSink creates a new channelMetricSink. This sink writes any -// flushed metrics to its `metricsChannel` such that the test can inspect -// the metrics for correctness. -func NewChannelMetricSink(ch chan []samplers.InterMetric) (*channelMetricSink, error) { - return &channelMetricSink{ - metricsChannel: ch, - }, nil -} - -func (c *channelMetricSink) Name() string { - return "channel" -} - -func (c *channelMetricSink) Start(*trace.Client) error { - return nil -} - -func (c *channelMetricSink) Flush(ctx context.Context, metrics []samplers.InterMetric) error { - // Put the whole slice in since many tests want to see all of them and we - // don't want them to have to loop over and wait on empty or something - c.metricsChannel <- metrics - return nil -} - -func (c *channelMetricSink) FlushOtherSamples(ctx context.Context, events []ssf.SSFSample) { - return -} - // fixture sets up a mock Datadog API server and Veneur type fixture struct { api *httptest.Server @@ -236,7 +206,7 @@ func TestLocalServerUnaggregatedMetrics(t *testing.T) { config.Tags = []string{"butts:farts"} metricsChan := make(chan []samplers.InterMetric, 10) - cms, _ := NewChannelMetricSink(metricsChan) + cms, _ := channel.NewChannelMetricSink(metricsChan) defer close(metricsChan) f := newFixture(t, config, cms, nil) @@ -266,7 +236,7 @@ func TestGlobalServerFlush(t *testing.T) { config := globalConfig() metricsChan := make(chan []samplers.InterMetric, 10) - cms, _ := NewChannelMetricSink(metricsChan) + cms, _ := channel.NewChannelMetricSink(metricsChan) defer close(metricsChan) f := newFixture(t, config, cms, nil) @@ -560,7 +530,7 @@ func TestUDPMetrics(t *testing.T) { config.Interval = "60s" config.StatsdListenAddresses = []string{"udp://127.0.0.1:0"} ch := make(chan []samplers.InterMetric, 20) - sink, _ := NewChannelMetricSink(ch) + sink, _ := channel.NewChannelMetricSink(ch) f := newFixture(t, config, sink, nil) defer f.Close() @@ -583,7 +553,7 @@ func TestMultipleUDPSockets(t *testing.T) { config.Interval = "60s" config.StatsdListenAddresses = []string{"udp://127.0.0.1:0", "udp://127.0.0.1:0"} ch := make(chan []samplers.InterMetric, 20) - sink, _ := NewChannelMetricSink(ch) + sink, _ := channel.NewChannelMetricSink(ch) f := newFixture(t, config, sink, nil) defer f.Close() @@ -622,7 +592,7 @@ func TestUDPMetricsSSF(t *testing.T) { config.Interval = "60s" config.SsfListenAddresses = []string{"udp://127.0.0.1:0"} ch := make(chan []samplers.InterMetric, 20) - sink, _ := NewChannelMetricSink(ch) + sink, _ := channel.NewChannelMetricSink(ch) f := newFixture(t, config, sink, nil) defer f.Close() @@ -703,7 +673,7 @@ func TestUNIXMetricsSSF(t *testing.T) { path := filepath.Join(tdir, "test.sock") config.SsfListenAddresses = []string{fmt.Sprintf("unix://%s", path)} ch := make(chan []samplers.InterMetric, 20) - sink, _ := NewChannelMetricSink(ch) + sink, _ := channel.NewChannelMetricSink(ch) f := newFixture(t, config, sink, nil) defer f.Close() @@ -1109,7 +1079,7 @@ func TestSSFMetricsEndToEnd(t *testing.T) { config := localConfig() config.SsfListenAddresses = []string{ssfAddr} metricsChan := make(chan []samplers.InterMetric, 10) - cms, _ := NewChannelMetricSink(metricsChan) + cms, _ := channel.NewChannelMetricSink(metricsChan) f := newFixture(t, config, cms, nil) defer f.Close() @@ -1170,7 +1140,7 @@ func TestInternalSSFMetricsEndToEnd(t *testing.T) { config := localConfig() config.SsfListenAddresses = []string{ssfAddr} metricsChan := make(chan []samplers.InterMetric, 10) - cms, _ := NewChannelMetricSink(metricsChan) + cms, _ := channel.NewChannelMetricSink(metricsChan) f := newFixture(t, config, cms, nil) defer f.Close() diff --git a/sinks/channel/channel.go b/sinks/channel/channel.go new file mode 100644 index 000000000..7f1b51834 --- /dev/null +++ b/sinks/channel/channel.go @@ -0,0 +1,41 @@ +package channel + +import ( + "context" + + "github.com/stripe/veneur/samplers" + "github.com/stripe/veneur/ssf" + "github.com/stripe/veneur/trace" +) + +type ChannelMetricSink struct { + metricsChannel chan []samplers.InterMetric +} + +// NewChannelMetricSink creates a new ChannelMetricSink. This sink writes any +// flushed metrics to its `metricsChannel` such that the test can inspect +// the metrics for correctness. +func NewChannelMetricSink(ch chan []samplers.InterMetric) (ChannelMetricSink, error) { + return ChannelMetricSink{ + metricsChannel: ch, + }, nil +} + +func (c ChannelMetricSink) Name() string { + return "channel" +} + +func (c ChannelMetricSink) Start(*trace.Client) error { + return nil +} + +func (c ChannelMetricSink) Flush(ctx context.Context, metrics []samplers.InterMetric) error { + // Put the whole slice in since many tests want to see all of them and we + // don't want them to have to loop over and wait on empty or something + c.metricsChannel <- metrics + return nil +} + +func (c ChannelMetricSink) FlushOtherSamples(ctx context.Context, events []ssf.SSFSample) { + return +} From bec6df3ab1f7437ff308aabf58a4335382434ea0 Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Sat, 10 Nov 2018 11:22:38 -0800 Subject: [PATCH 05/17] Add a testing framework and an end to end test. Add wiring until test compiles. --- importsrv/server_test.go | 335 +++++++++++++++++++++++----------- importsrv/testtools_test.go | 75 ++++++++ metricingester/aggingestor.go | 69 ++++++- metricingester/aggworker.go | 11 +- metricingester/sinkflusher.go | 41 +++-- metricingester/types.go | 6 + samplers/test_tools.go | 81 ++++++++ 7 files changed, 494 insertions(+), 124 deletions(-) create mode 100644 importsrv/testtools_test.go create mode 100644 samplers/test_tools.go diff --git a/importsrv/server_test.go b/importsrv/server_test.go index 42e9fc9de..43eca6cb9 100644 --- a/importsrv/server_test.go +++ b/importsrv/server_test.go @@ -1,76 +1,235 @@ -package importsrv +package importsrv_test + +import ( + "context" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/stripe/veneur/forwardrpc" + "github.com/stripe/veneur/importsrv" + "github.com/stripe/veneur/metricingester" + "github.com/stripe/veneur/samplers" + "github.com/stripe/veneur/sinks/channel" + + "github.com/stripe/veneur/samplers/metricpb" +) + +var testE2EFlushingCases = []struct { + in []*metricpb.Metric + out []samplers.TestMetric + msg string +}{ + + // basic test cases + + { + pbmetrics(pbcounter("test", 5)), + samplers.TMetrics(samplers.TCounter("test", 5)), + "counter not present", + }, + { + pbmetrics(pbgauge("test", 100.5)), + samplers.TMetrics(samplers.TGauge("test", 100.5)), + "gauge not present", + }, + { + pbmetrics(pbhisto("test", []float64{1, 2, 3})), + samplers.TMetrics(samplers.TGauge("test.min", 1), samplers.TGauge("test.max", 3), samplers.TCounter("test.count", 3)), + "histo not present", + }, + { + pbmetrics(pbset("test", []string{"asf"})), + samplers.TMetrics(samplers.TGauge("test", 3)), + "set not present", + }, + + // basic tests that things are aggregated correctly + + { + pbmetrics(pbcounter("test", 5), pbcounter("test", 10), pbcounter("test2", 3)), + samplers.TMetrics(samplers.TCounter("test", 15), samplers.TCounter("test2", 3)), + "counters not aggregated correctly", + }, + { + pbmetrics(pbgauge("test", 5), pbgauge("test", 10)), + samplers.TMetrics(samplers.TGauge("test", 10)), + "gauge not aggregated correctly", + }, + { + pbmetrics(pbset("test", []string{"hey", "hey2"}), pbset("test", []string{"hey3", "hey4"})), + samplers.TMetrics(samplers.TGauge("test", 4)), + "sets not aggregated correctly", + }, + { + pbmetrics(pbcounter("test", 5), pbcounter("test", 10)), + samplers.TMetrics(samplers.TCounter("test", 15)), + "counters not aggregated correctly", + }, + { + pbmetrics(pbcounter("test", 3), pbgauge("test", 4), pbset("test", []string{"a", "b"})), + samplers.TMetrics(samplers.TCounter("test", 3), samplers.TGauge("test", 4), samplers.TGauge("test", 2)), + "different types not aggregated separately", + }, + + // test special aggregation rules + + { + pbmetrics( + pbcounter("test", 5, pbtags("a:b", "c:d")), + pbcounter("test", 3, pbtags("c:d", "a:b")), + ), + samplers.TMetrics(samplers.TCounter("test", 8, samplers.OptTags("a:b", "c:d"))), + "out of order tags don't aggregate together", + }, + { + pbmetrics( + pbcounter("test", 5, pbhostname("a")), + pbcounter("test", 5, pbhostname("a")), + pbcounter("test", 3, pbhostname("b")), + pbcounter("test", 3, pbhostname("")), + ), + samplers.TMetrics( + samplers.TCounter("test", 10, samplers.OptHostname("a")), + samplers.TCounter("test", 3, samplers.OptHostname("b")), + samplers.TCounter("test", 3, samplers.OptHostname("")), + ), + "hostnames not being aggregated separately", + }, + { + pbmetrics( + pbcounter("test", 5, pbhostname("a"), pbscope(metricpb.Scope_Local)), + pbcounter("test", 5, pbhostname("a"), pbscope(metricpb.Scope_Global)), + pbcounter("test", 5, pbhostname("a"), pbscope(metricpb.Scope_Mixed)), + pbcounter("test", 3, pbhostname("a")), + ), + samplers.TMetrics( + samplers.TCounter("test", 18, samplers.OptHostname("a")), + ), + "scope field should be ignored for counter types", + }, + { + pbmetrics( + pbgauge("test", 5, pbhostname("a"), pbscope(metricpb.Scope_Local)), + pbgauge("test", 6, pbhostname("a"), pbscope(metricpb.Scope_Global)), + pbgauge("test", 7, pbhostname("a"), pbscope(metricpb.Scope_Mixed)), + pbgauge("test", 3, pbhostname("a")), + ), + samplers.TMetrics( + samplers.TGauge("test", 3, samplers.OptHostname("a")), + ), + "scope field should be ignored for gauge types", + }, + + // mixed histogram fun + + { + pbmetrics( + pbhisto("test", []float64{1, 2, 3}, pbscope(metricpb.Scope_Mixed), pbhostname("a")), + ), + samplers.TMetrics( + samplers.TGauge("test.min", 1, samplers.OptHostname("a")), + samplers.TGauge("test.max", 3, samplers.OptHostname("a")), + samplers.TGauge("test.count", 3, samplers.OptHostname("a")), + samplers.TGauge("test.p99", 3), + ), + "mixed histos not reporting host level aggregates", + }, + { + pbmetrics( + pbhisto("test", []float64{1, 2, 3}, pbscope(metricpb.Scope_Mixed), pbhostname("a")), + pbhisto("test", []float64{4, 5, 6}, pbscope(metricpb.Scope_Mixed), pbhostname("b")), + ), + samplers.TMetrics( + samplers.TGauge("test.min", 1, samplers.OptHostname("a")), + samplers.TGauge("test.max", 3, samplers.OptHostname("a")), + samplers.TGauge("test.count", 3, samplers.OptHostname("a")), + samplers.TGauge("test.min", 4, samplers.OptHostname("b")), + samplers.TGauge("test.max", 6, samplers.OptHostname("b")), + samplers.TGauge("test.count", 3, samplers.OptHostname("b")), + samplers.TGauge("test.p99", 6), + ), + "mixed histos not reporting host level aggregates", + }, + + // sink tests + + // crazy random "real world" tests +} + +func toResults(ms []samplers.InterMetric) (outs []samplers.TestMetric) { + for _, inm := range ms { + outs = append(outs, samplers.TestMetric{ + Name: inm.Name, + Tags: strings.Join(inm.Tags, ","), + Value: inm.Value, + Type: inm.Type, + Message: inm.Message, + Hostname: inm.HostName, + Sinks: inm.Sinks, + }) + } + return outs +} + +// TestE2EFlushingIngester tests the integration of the import endpoint with +// the flushing ingester. +func TestE2EFlushingIngester(t *testing.T) { + for _, tc := range testE2EFlushingCases { + t.Run(tc.msg, test(tc.in, tc.out, tc.msg)) + } +} + +func test(in []*metricpb.Metric, out []samplers.TestMetric, msg string) func(*testing.T) { + return func(t *testing.T) { + t.Parallel() + + // SETUP TEST + + // this sink collects the results + rc := make(chan []samplers.InterMetric, 20) + cms, _ := channel.NewChannelMetricSink(rc) + + flushc := make(chan time.Time) + workers := 1 + ing := metricingester.NewFlushingIngester( + workers, // we want to test the parallel case + 1, // this field is practically meaningless since we override the flush channel + []metricingester.Sink{cms}, + metricingester.FlushChan(flushc), // override the flush ticker channel so we control when flush + ) + s := importsrv.New(ing) + defer s.Stop() + + // EXECUTE THE TEST + + ing.Start() + defer ing.Stop() + _, err := s.SendMetrics(context.Background(), &forwardrpc.MetricList{Metrics: in}) + require.NoError(t, err) + flushc <- time.Now() + + // COLLECT RESULTS + + var results []samplers.InterMetric + for i := 0; i < workers; i++ { + select { + case result := <-rc: + //t.Logf("result: %v", result) + results = append(results, result...) + case <-time.After(10 * time.Second): + t.Fatal("took too long to flush metric results") + } + } + + // ASSERT + + assert.ElementsMatch(t, out, toResults(results), "MSG: %v // VALUES : %v", msg, results) + } +} -// TODO(clin): Add tests back. -// -//import ( -// "context" -// "fmt" -// "math/rand" -// "testing" -// "time" -// -// "github.com/stretchr/testify/assert" -// "github.com/stripe/veneur/forwardrpc" -// "github.com/stripe/veneur/samplers/metricpb" -// metrictest "github.com/stripe/veneur/samplers/metricpb/testutils" -// "github.com/stripe/veneur/trace" -//) -// -//type testMetricIngester struct { -// metrics []*metricpb.Metric -//} -// -//func (mi *testMetricIngester) IngestMetrics(ms []*metricpb.Metric) { -// mi.metrics = append(mi.metrics, ms...) -//} -// -//func (mi *testMetricIngester) clear() { -// mi.metrics = mi.metrics[:0] -//} -// -//// Test that sending the same metric to a Veneur results in it being hashed -//// to the same worker every time -//func TestSendMetrics_ConsistentHash(t *testing.T) { -// ingesters := []*testMetricIngester{&testMetricIngester{}, &testMetricIngester{}} -// -// casted := make([]MetricIngester, len(ingesters)) -// for i, ingester := range ingesters { -// casted[i] = ingester -// } -// s := New(casted) -// -// inputs := []*metricpb.Metric{ -// &metricpb.Metric{Name: "test.counter", Type: metricpb.Type_Counter, Tags: []string{"tag:1"}}, -// &metricpb.Metric{Name: "test.gauge", Type: metricpb.Type_Gauge}, -// &metricpb.Metric{Name: "test.histogram", Type: metricpb.Type_Histogram, Tags: []string{"type:histogram"}}, -// &metricpb.Metric{Name: "test.set", Type: metricpb.Type_Set}, -// &metricpb.Metric{Name: "test.gauge3", Type: metricpb.Type_Gauge}, -// } -// -// // Send the same inputs many times -// for i := 0; i < 10; i++ { -// s.SendMetrics(context.Background(), &forwardrpc.MetricList{inputs}) -// -// assert.Equal(t, []*metricpb.Metric{inputs[0], inputs[4]}, -// ingesters[0].metrics, "Ingester 0 has the wrong metrics") -// assert.Equal(t, []*metricpb.Metric{inputs[1], inputs[2], inputs[3]}, -// ingesters[1].metrics, "Ingester 1 has the wrong metrics") -// -// for _, ingester := range ingesters { -// ingester.clear() -// } -// } -//} -// -//func TestSendMetrics_Empty(t *testing.T) { -// ingester := &testMetricIngester{} -// s := New([]MetricIngester{ingester}) -// s.SendMetrics(context.Background(), &forwardrpc.MetricList{}) -// -// assert.Empty(t, ingester.metrics, "The server shouldn't have submitted "+ -// "any metrics") -//} -// //func TestOptions_WithTraceClient(t *testing.T) { // c, err := trace.NewClient(trace.DefaultVeneurAddress) // if err != nil { @@ -82,38 +241,6 @@ package importsrv // "set the trace client") //} // -//type noopChannelMetricIngester struct { -// in chan []*metricpb.Metric -// quit chan struct{} -//} -// -//func newNoopChannelMetricIngester() *noopChannelMetricIngester { -// return &noopChannelMetricIngester{ -// in: make(chan []*metricpb.Metric), -// quit: make(chan struct{}), -// } -//} -// -//func (mi *noopChannelMetricIngester) start() { -// go func() { -// for { -// select { -// case <-mi.in: -// case <-mi.quit: -// return -// } -// } -// }() -//} -// -//func (mi *noopChannelMetricIngester) stop() { -// mi.quit <- struct{}{} -//} -// -//func (mi *noopChannelMetricIngester) IngestMetrics(ms []*metricpb.Metric) { -// mi.in <- ms -//} -// //func BenchmarkImportServerSendMetrics(b *testing.B) { // rand.Seed(time.Now().Unix()) // diff --git a/importsrv/testtools_test.go b/importsrv/testtools_test.go new file mode 100644 index 000000000..084c6e0d6 --- /dev/null +++ b/importsrv/testtools_test.go @@ -0,0 +1,75 @@ +package importsrv_test + +import ( + "github.com/stripe/veneur/samplers/metricpb" +) + +// +// PROTOBUF +// + +func pbmetrics(ms ...*metricpb.Metric) []*metricpb.Metric { + return ms +} + +// options + +func pbtags(tags ...string) func(m *metricpb.Metric) { + return func(m *metricpb.Metric) { + m.Tags = tags + } +} + +func pbhostname(hostname string) func(m *metricpb.Metric) { + return func(m *metricpb.Metric) { + m.Hostname = hostname + } +} + +func pbscope(scope metricpb.Scope) func(m *metricpb.Metric) { + return func(m *metricpb.Metric) { + m.Scope = scope + } +} + +// metric types + +func pbcounter(name string, value int64, opts ...func(m *metricpb.Metric)) *metricpb.Metric { + c := &metricpb.Metric{ + Name: name, + Value: &metricpb.Metric_Counter{Counter: &metricpb.CounterValue{Value: value}}, + Type: metricpb.Type_Counter, + } + + for _, opt := range opts { + opt(c) + } + return c +} + +func pbgauge(name string, value float64, opts ...func(m *metricpb.Metric)) *metricpb.Metric { + c := &metricpb.Metric{ + Name: name, + Value: &metricpb.Metric_Gauge{Gauge: &metricpb.GaugeValue{Value: value}}, + Type: metricpb.Type_Gauge, + } + + for _, opt := range opts { + opt(c) + } + return c +} + +func pbhisto(name string, values []float64, opts ...func(m *metricpb.Metric)) *metricpb.Metric { + return nil +} + +func pbset(name string, values []string, opts ...func(m *metricpb.Metric)) *metricpb.Metric { + return nil +} + +// +// RESULTS +// + +// opts diff --git a/metricingester/aggingestor.go b/metricingester/aggingestor.go index 3e636520f..4b683c6a1 100644 --- a/metricingester/aggingestor.go +++ b/metricingester/aggingestor.go @@ -1,12 +1,56 @@ package metricingester -import "time" +import ( + "context" + "time" +) type AggregatingIngestor struct { - workers []aggWorker - flusher func(samplerEnvelope) error - interval time.Duration - quit chan struct{} + workers []aggWorker + flusher flusher + ticker *time.Ticker + tickerC <-chan time.Time + quit chan struct{} +} + +type flusher interface { + Flush(ctx context.Context, envelope samplerEnvelope) +} + +type ingesterOption func(AggregatingIngestor) AggregatingIngestor + +// Override the ticker channel that triggers flushing. Useful for testing. +func FlushChan(tckr <-chan time.Time) ingesterOption { + return func(option AggregatingIngestor) AggregatingIngestor { + option.tickerC = tckr + return option + } +} + +// NewFlushingIngester creates an ingester that flushes metrics to the specified sinks. +func NewFlushingIngester( + workers int, + interval time.Duration, + sinks []Sink, + options ...ingesterOption, +) AggregatingIngestor { + var aggW []aggWorker + for i := 0; i < workers; i++ { + aggW = append(aggW, newAggWorker()) + } + + t := time.NewTicker(interval) + ing := AggregatingIngestor{ + workers: aggW, + flusher: newSinkFlusher(sinks), + ticker: t, + tickerC: t.C, + quit: make(chan struct{}), + } + for _, opt := range options { + ing = opt(ing) + } + return ing } // TODO(clin): This needs to take ctx. @@ -23,11 +67,14 @@ func (a AggregatingIngestor) Merge(d Digest) error { } func (a AggregatingIngestor) Start() { + for _, w := range a.workers { + w.Start() + } + go func() { - ticker := time.NewTicker(a.interval) for { select { - case <-ticker.C: + case <-a.tickerC: a.flush() case <-a.quit: return @@ -37,11 +84,17 @@ func (a AggregatingIngestor) Start() { } func (a AggregatingIngestor) Stop() { + a.ticker.Stop() close(a.quit) + for _, w := range a.workers { + w.Stop() + } } func (a AggregatingIngestor) flush() { for _, w := range a.workers { - go a.flusher(w.Flush()) + go func(worker aggWorker) { + a.flusher.Flush(context.Background(), worker.Flush()) + }(w) } } diff --git a/metricingester/aggworker.go b/metricingester/aggworker.go index 338cea0da..523458508 100644 --- a/metricingester/aggworker.go +++ b/metricingester/aggworker.go @@ -12,6 +12,15 @@ type aggWorker struct { flush chan chan<- samplerEnvelope } +func newAggWorker() aggWorker { + return aggWorker{ + samplers: newSamplerEnvelope(), + inC: make(chan Metric), + mergeC: make(chan Digest), + flush: make(chan chan<- samplerEnvelope), + } +} + func (a aggWorker) Start() { go func() { for { @@ -44,7 +53,7 @@ func (a aggWorker) Merge(d Digest) { func (a aggWorker) Stop() { close(a.inC) - close(a.flush) + close(a.mergeC) } func (a aggWorker) Flush() samplerEnvelope { diff --git a/metricingester/sinkflusher.go b/metricingester/sinkflusher.go index efe73eebe..a501540cf 100644 --- a/metricingester/sinkflusher.go +++ b/metricingester/sinkflusher.go @@ -5,26 +5,45 @@ import ( "sync" "time" - "github.com/sirupsen/logrus" "github.com/stripe/veneur/samplers" - "github.com/stripe/veneur/trace" ) type sinkFlusher struct { aggregates samplers.HistogramAggregates percentiles []float64 - sinks []sink + sinks []Sink +} + +type sinkFlusherOpt func(sinkFlusher) sinkFlusher + +func optPercentiles(ps []float64) sinkFlusherOpt { + return func(f sinkFlusher) sinkFlusher { + f.percentiles = ps + return f + } +} - log *logrus.Logger - trace trace.Client +func optAggregates(as samplers.HistogramAggregates) sinkFlusherOpt { + return func(f sinkFlusher) sinkFlusher { + f.aggregates = as + return f + } +} + +func newSinkFlusher(sinks []Sink, opts ...sinkFlusherOpt) sinkFlusher { + sf := sinkFlusher{sinks: sinks} + for _, opt := range opts { + sf = opt(sf) + } + return sf } -type sink interface { +type Sink interface { Name() string Flush(context.Context, []samplers.InterMetric) error } -func (s sinkFlusher) Flush(ctx context.Context, envelope samplerEnvelope) error { +func (s sinkFlusher) Flush(ctx context.Context, envelope samplerEnvelope) { metrics := make([]samplers.InterMetric, 0, countMetrics(envelope)) // get metrics from envelope for _, sampler := range envelope.counters { @@ -44,23 +63,23 @@ func (s sinkFlusher) Flush(ctx context.Context, envelope samplerEnvelope) error } if len(metrics) == 0 { - return nil + return } // TODO(clin): Add back metrics once we finalize the metrics client pull request. wg := sync.WaitGroup{} for _, sinkInstance := range s.sinks { wg.Add(1) - go func(ms sink) { + go func(ms Sink) { err := ms.Flush(ctx, metrics) if err != nil { - s.log.WithError(err).WithField("sink", ms.Name()).Warn("Error flushing sink") + //s.log.WithError(err).WithField("Sink", ms.Name()).Warn("Error flushing Sink") } wg.Done() }(sinkInstance) } wg.Wait() - return nil + return } func countMetrics(samplers samplerEnvelope) (count int) { diff --git a/metricingester/types.go b/metricingester/types.go index 9ab9fe4ea..88eb32c4e 100644 --- a/metricingester/types.go +++ b/metricingester/types.go @@ -1,6 +1,7 @@ package metricingester import ( + "sort" "strings" "github.com/stripe/veneur/samplers" @@ -60,6 +61,7 @@ func (m Metric) Hash() metricHash { // // tags are assumed to be inC sorted order. func NewCounter(name string, v int64, tags []string, samplerate float32, hostname string) Metric { + sort.Sort(sort.StringSlice(tags)) return Metric{ name: name, countervalue: v, @@ -74,6 +76,7 @@ func NewCounter(name string, v int64, tags []string, samplerate float32, hostnam // // tags are assumed to be inC sorted order. func NewGauge(name string, v float64, tags []string, samplerate float32, hostname string) Metric { + sort.Sort(sort.StringSlice(tags)) return Metric{ name: name, gaugevalue: v, @@ -130,6 +133,7 @@ func NewHistogramDigest( tags []string, hostname string, ) Digest { + sort.Sort(sort.StringSlice(tags)) return Digest{ name: name, tags: tags, @@ -145,6 +149,7 @@ func NewMixedHistogramDigest( tags []string, hostname string, ) Digest { + sort.Sort(sort.StringSlice(tags)) return Digest{ name: name, tags: tags, @@ -162,6 +167,7 @@ func NewSetDigest( tags []string, hostname string, ) Digest { + sort.Sort(sort.StringSlice(tags)) return Digest{ name: name, tags: tags, diff --git a/samplers/test_tools.go b/samplers/test_tools.go new file mode 100644 index 000000000..9924730ed --- /dev/null +++ b/samplers/test_tools.go @@ -0,0 +1,81 @@ +package samplers + +import "strings" + +type TestMetric struct { + Name string + Tags string + Value float64 + Hostname string + Type MetricType + Message string + Sinks RouteInformation +} + +func TMetrics(rs ...TestMetric) []TestMetric { + return rs +} + +func TCounter(name string, value float64, opts ...func(r TestMetric) TestMetric) TestMetric { + r := TestMetric{ + Name: name, + Value: value, + Type: CounterMetric, + } + for _, opt := range opts { + r = opt(r) + } + return r +} + +func TGauge(name string, value float64, opts ...func(r TestMetric) TestMetric) TestMetric { + r := TestMetric{ + Name: name, + Value: value, + Type: GaugeMetric, + } + for _, opt := range opts { + r = opt(r) + } + return r +} + +func TStatus(name string, value float64, opts ...func(r TestMetric) TestMetric) TestMetric { + r := TestMetric{ + Name: name, + Value: value, + Type: StatusMetric, + } + for _, opt := range opts { + r = opt(r) + } + return r +} + +func OptTags(ts ...string) func(r TestMetric) TestMetric { + return func(r TestMetric) TestMetric { + r.Tags = strings.Join(ts, ",") + return r + } +} + +func OptHostname(hn string) func(r TestMetric) TestMetric { + return func(r TestMetric) TestMetric { + r.Hostname = hn + return r + } +} + +func OptMessage(msg string) func(r TestMetric) TestMetric { + return func(r TestMetric) TestMetric { + r.Message = msg + return r + } +} + +func OptSinks(ri RouteInformation) func(r TestMetric) TestMetric { + return func(r TestMetric) TestMetric { + r.Sinks = ri + return r + } +} From 481c946b2d38f44bf36aced71a89e851ddc7cfae Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Wed, 14 Nov 2018 16:18:52 -0800 Subject: [PATCH 06/17] Add a mixedhistogram sampler and tests. --- importsrv/server_test.go | 18 +----- importsrv/testtools_test.go | 6 -- samplers/mixedhisto.go | 97 ++++++++++++++++++++++++++++++ samplers/mixedhisto_test.go | 115 ++++++++++++++++++++++++++++++++++++ samplers/test_tools.go | 19 +++++- 5 files changed, 231 insertions(+), 24 deletions(-) create mode 100644 samplers/mixedhisto.go create mode 100644 samplers/mixedhisto_test.go diff --git a/importsrv/server_test.go b/importsrv/server_test.go index 43eca6cb9..e300b6afd 100644 --- a/importsrv/server_test.go +++ b/importsrv/server_test.go @@ -2,7 +2,6 @@ package importsrv_test import ( "context" - "strings" "testing" "time" @@ -159,21 +158,6 @@ var testE2EFlushingCases = []struct { // crazy random "real world" tests } -func toResults(ms []samplers.InterMetric) (outs []samplers.TestMetric) { - for _, inm := range ms { - outs = append(outs, samplers.TestMetric{ - Name: inm.Name, - Tags: strings.Join(inm.Tags, ","), - Value: inm.Value, - Type: inm.Type, - Message: inm.Message, - Hostname: inm.HostName, - Sinks: inm.Sinks, - }) - } - return outs -} - // TestE2EFlushingIngester tests the integration of the import endpoint with // the flushing ingester. func TestE2EFlushingIngester(t *testing.T) { @@ -226,7 +210,7 @@ func test(in []*metricpb.Metric, out []samplers.TestMetric, msg string) func(*te // ASSERT - assert.ElementsMatch(t, out, toResults(results), "MSG: %v // VALUES : %v", msg, results) + assert.ElementsMatch(t, out, samplers.ToTestMetrics(results), "MSG: %v // VALUES : %v", msg, results) } } diff --git a/importsrv/testtools_test.go b/importsrv/testtools_test.go index 084c6e0d6..da9867248 100644 --- a/importsrv/testtools_test.go +++ b/importsrv/testtools_test.go @@ -67,9 +67,3 @@ func pbhisto(name string, values []float64, opts ...func(m *metricpb.Metric)) *m func pbset(name string, values []string, opts ...func(m *metricpb.Metric)) *metricpb.Metric { return nil } - -// -// RESULTS -// - -// opts diff --git a/samplers/mixedhisto.go b/samplers/mixedhisto.go new file mode 100644 index 000000000..805679194 --- /dev/null +++ b/samplers/mixedhisto.go @@ -0,0 +1,97 @@ +package samplers + +import ( + "fmt" + "math" + "time" +) + +// NewMixedHisto creates a new mixed histogram. +func NewMixedHisto(name string, tags []string) MixedHisto { + return MixedHisto{ + histo: NewHist(name, tags), + min: make(map[string]float64), + max: make(map[string]float64), + weight: make(map[string]float64), + sum: make(map[string]float64), + reciprocalSum: make(map[string]float64), + } +} + +// MixedHisto is a sampler for the MixedScope histogram case. +// +// In other words, it tracks min, max, count, sum, and harmonic mean for each host +// separately but aggregates all percentiles globally. +// +// In the mixed scope case, histogram behavior is to emit summary statistics that +// can be aggregated without bias at the host level, but emit all other metrics +// (namely, percentiles) at the global level. +// +// Note that we don't support the "median" aggregate for mixed histograms. +type MixedHisto struct { + histo *Histo + min map[string]float64 + max map[string]float64 + weight map[string]float64 + sum map[string]float64 + reciprocalSum map[string]float64 +} + +// Sample accepts a new data point for the histogram. +func (m MixedHisto) Sample(sample float64, sampleRate float32, host string) { + m.histo.Sample(sample, sampleRate) + + weight := float64(1 / sampleRate) + m.min[host] = math.Min(sample, getDefault(m.min, host, math.Inf(+1))) + m.max[host] = math.Max(sample, getDefault(m.max, host, math.Inf(-1))) + m.weight[host] += weight + m.sum[host] += sample * weight + m.reciprocalSum[host] += (1 / sample) * weight +} + +// Flush returns metrics for the specified percentiles and aggregates. +func (m MixedHisto) Flush(percentiles []float64, aggregates HistogramAggregates) []InterMetric { + ms := m.histo.Flush(0, percentiles, HistogramAggregates{}, false) + + // doesn't support median! Would require implementing separated digests. + now := time.Now().Unix() + metric := func(suffix string, val float64, host string) InterMetric { + return InterMetric{ + Name: fmt.Sprintf("%s.%s", m.histo.Name, suffix), + Timestamp: now, + Value: val, + Tags: m.histo.Tags, + HostName: host, + Type: GaugeMetric, + Sinks: routeInfo(m.histo.Tags), + } + } + for host, _ := range m.max { + if (aggregates.Value & AggregateMax) != 0 { + ms = append(ms, metric("max", m.max[host], host)) + } + if (aggregates.Value & AggregateMin) != 0 { + ms = append(ms, metric("min", m.min[host], host)) + } + if (aggregates.Value & AggregateSum) != 0 { + ms = append(ms, metric("sum", m.sum[host], host)) + } + if (aggregates.Value & AggregateAverage) != 0 { + ms = append(ms, metric("avg", m.sum[host]/m.weight[host], host)) + } + if (aggregates.Value & AggregateCount) != 0 { + ms = append(ms, metric("count", m.weight[host], host)) + } + if (aggregates.Value & AggregateHarmonicMean) != 0 { + ms = append(ms, metric("hmean", m.weight[host]/m.reciprocalSum[host], host)) + } + } + return ms +} + +func getDefault(m map[string]float64, key string, def float64) float64 { + if v, ok := m[key]; ok { + return v + } + return def +} diff --git a/samplers/mixedhisto_test.go b/samplers/mixedhisto_test.go new file mode 100644 index 000000000..43e675ae7 --- /dev/null +++ b/samplers/mixedhisto_test.go @@ -0,0 +1,115 @@ +package samplers + +import ( + "testing" + + "github.com/stripe/veneur/samplers/metricpb" + "github.com/stripe/veneur/tdigest" + + "github.com/stretchr/testify/assert" +) + +type sampleCase struct { + host string + val float64 +} + +var sampleCases = []struct { + aggregates Aggregate + percentiles []float64 + in []sampleCase + out []TestMetric + msg string +}{ + { + AggregateMin, + []float64{}, + []sampleCase{{"", -2}, {"", 4}, {"", 2}}, + TMetrics( + TGauge("test.min", -2), + ), + "wrong min reported", + }, + { + AggregateSum, + []float64{}, + []sampleCase{{"", 1}, {"", 2}, {"", 3}}, + TMetrics(TGauge("test.sum", 6)), + "wrong sum reported", + }, + { + AggregateMax, + []float64{}, + []sampleCase{{"", -2}, {"", -4}, {"", -1}}, + TMetrics(TGauge("test.max", -1)), + "wrong max reported", + }, + { + AggregateMin | AggregateMax | AggregateAverage | AggregateCount | AggregateSum, + []float64{}, + []sampleCase{{"", 1}, {"", 2}, {"", 3}}, + TMetrics( + TGauge("test.min", 1), + TGauge("test.max", 3), + TGauge("test.avg", 2), + TGauge("test.count", 3), + TGauge("test.sum", 6), + ), + "an aggregate value is incorrect", + }, + { + AggregateHarmonicMean, + []float64{}, + []sampleCase{{"", 2}, {"", 4}, {"", 2}}, + TMetrics( + TGauge("test.hmean", 2.4), + ), + "harmonic mean is incorrect", + }, + { + 0, + []float64{.5, .99}, + []sampleCase{{"", 1}, {"", 2}, {"", 3}, {"", 4}, {"", 5}}, + TMetrics( + TGauge("test.50percentile", 3), + TGauge("test.99percentile", 4.975), + ), + "percentiles are incorrect", + }, + { + AggregateMin, + []float64{.5}, + []sampleCase{{"a", 1}, {"b", 2}, {"c", 3}}, + TMetrics( + TGauge("test.50percentile", 2), + TGauge("test.min", 1, OptHostname("a")), + TGauge("test.min", 2, OptHostname("b")), + TGauge("test.min", 3, OptHostname("c")), + ), + "aggregates reported separately", + }, +} + +func TestMixedHistoSample(t *testing.T) { + for _, c := range sampleCases { + t.Run(c.msg, testSample(c.percentiles, c.aggregates, c.in, c.out)) + } +} + +func testSample(ps []float64, aggs Aggregate, inms []sampleCase, ts []TestMetric) func(*testing.T) { + return func(t *testing.T) { + t.Parallel() + mh := NewMixedHisto("test", nil) + for _, inm := range inms { + mh.Sample(inm.val, 1, inm.host) + } + + results := ToTestMetrics(mh.Flush(ps, HistogramAggregates{aggs, 0})) + assert.ElementsMatch( + t, + results, + ts, + "EXPECTED: %v\nACTUAL:%v", ts, results, + ) + } +} diff --git a/samplers/test_tools.go b/samplers/test_tools.go index 9924730ed..7eab41d43 100644 --- a/samplers/test_tools.go +++ b/samplers/test_tools.go @@ -1,6 +1,8 @@ package samplers -import "strings" +import ( + "strings" +) type TestMetric struct { Name string @@ -79,3 +81,18 @@ func OptSinks(ri RouteInformation) func(r TestMetric) TestMetric { return r } } + +func ToTestMetrics(ms []InterMetric) (outs []TestMetric) { + for _, inm := range ms { + outs = append(outs, TestMetric{ + Name: inm.Name, + Tags: strings.Join(inm.Tags, ","), + Value: inm.Value, + Type: inm.Type, + Message: inm.Message, + Hostname: inm.HostName, + Sinks: inm.Sinks, + }) + } + return outs +} From 3af1f3e26f45fcf2184a92a6dd6360ef1707e58b Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Thu, 15 Nov 2018 11:29:34 -0800 Subject: [PATCH 07/17] Add supporting for mixed histogram merging + tests. --- importsrv/server_test.go | 23 ++-- importsrv/testtools_test.go | 16 ++- metricingester/aggingestor.go | 10 +- metricingester/sinkflusher.go | 30 +----- samplers/mixedhisto.go | 15 +++ samplers/mixedhisto_test.go | 191 ++++++++++++++++++++++++++++++++++ samplers/test_tools.go | 9 ++ 7 files changed, 257 insertions(+), 37 deletions(-) diff --git a/importsrv/server_test.go b/importsrv/server_test.go index e300b6afd..c3d55c717 100644 --- a/importsrv/server_test.go +++ b/importsrv/server_test.go @@ -134,7 +134,7 @@ var testE2EFlushingCases = []struct { samplers.TGauge("test.count", 3, samplers.OptHostname("a")), samplers.TGauge("test.p99", 3), ), - "mixed histos not reporting host level aggregates", + "mixed histos not reporting host level aggregates for one host", }, { pbmetrics( @@ -148,9 +148,10 @@ var testE2EFlushingCases = []struct { samplers.TGauge("test.min", 4, samplers.OptHostname("b")), samplers.TGauge("test.max", 6, samplers.OptHostname("b")), samplers.TGauge("test.count", 3, samplers.OptHostname("b")), - samplers.TGauge("test.p99", 6), + samplers.TGauge("test.50percentile", 3.5), + samplers.TGauge("test.99percentile", 5.984999999999999), ), - "mixed histos not reporting host level aggregates", + "mixed histos not reporting host level aggregates for two hosts", }, // sink tests @@ -182,6 +183,8 @@ func test(in []*metricpb.Metric, out []samplers.TestMetric, msg string) func(*te workers, // we want to test the parallel case 1, // this field is practically meaningless since we override the flush channel []metricingester.Sink{cms}, + []float64{0.5, 0.95}, + samplers.AggregateMin|samplers.AggregateMax|samplers.AggregateCount, metricingester.FlushChan(flushc), // override the flush ticker channel so we control when flush ) s := importsrv.New(ing) @@ -201,16 +204,22 @@ func test(in []*metricpb.Metric, out []samplers.TestMetric, msg string) func(*te for i := 0; i < workers; i++ { select { case result := <-rc: - //t.Logf("result: %v", result) results = append(results, result...) - case <-time.After(10 * time.Second): - t.Fatal("took too long to flush metric results") + case <-time.After(1 * time.Second): + t.Fatal("took too long to flush metric results or no metrics to flush!") } } // ASSERT - assert.ElementsMatch(t, out, samplers.ToTestMetrics(results), "MSG: %v // VALUES : %v", msg, results) + assert.ElementsMatch( + t, + out, + samplers.ToTestMetrics(results), + "EXPECTED: %+v\nACTUAL: %+v", + out, + samplers.ToTestMetrics(results), + ) } } diff --git a/importsrv/testtools_test.go b/importsrv/testtools_test.go index da9867248..9e4b33e42 100644 --- a/importsrv/testtools_test.go +++ b/importsrv/testtools_test.go @@ -2,6 +2,7 @@ package importsrv_test import ( "github.com/stripe/veneur/samplers/metricpb" + "github.com/stripe/veneur/tdigest" ) // @@ -61,7 +62,20 @@ func pbgauge(name string, value float64, opts ...func(m *metricpb.Metric)) *metr } func pbhisto(name string, values []float64, opts ...func(m *metricpb.Metric)) *metricpb.Metric { - return nil + td := tdigest.NewMerging(1000, true) + for _, s := range values { + td.Add(s, 1) + } + m := &metricpb.Metric{ + Name: name, + Value: &metricpb.Metric_Histogram{Histogram: &metricpb.HistogramValue{TDigest: td.Data()}}, + Type: metricpb.Type_Histogram, + } + + for _, opt := range opts { + opt(m) + } + return m } func pbset(name string, values []string, opts ...func(m *metricpb.Metric)) *metricpb.Metric { diff --git a/metricingester/aggingestor.go b/metricingester/aggingestor.go index 4b683c6a1..9d846fb90 100644 --- a/metricingester/aggingestor.go +++ b/metricingester/aggingestor.go @@ -3,6 +3,8 @@ package metricingester import ( "context" "time" + + "github.com/stripe/veneur/samplers" ) type AggregatingIngestor struct { @@ -32,6 +34,8 @@ func NewFlushingIngester( workers int, interval time.Duration, sinks []Sink, + percentiles []float64, + aggregates samplers.Aggregate, options ...ingesterOption, ) AggregatingIngestor { var aggW []aggWorker @@ -42,7 +46,11 @@ func NewFlushingIngester( t := time.NewTicker(interval) ing := AggregatingIngestor{ workers: aggW, - flusher: newSinkFlusher(sinks), + flusher: sinkFlusher{ + sinks: sinks, + percentiles: percentiles, + aggregates: samplers.HistogramAggregates{aggregates, 4}, + }, ticker: t, tickerC: t.C, quit: make(chan struct{}), diff --git a/metricingester/sinkflusher.go b/metricingester/sinkflusher.go index a501540cf..77bc997fa 100644 --- a/metricingester/sinkflusher.go +++ b/metricingester/sinkflusher.go @@ -16,28 +16,6 @@ type sinkFlusher struct { type sinkFlusherOpt func(sinkFlusher) sinkFlusher -func optPercentiles(ps []float64) sinkFlusherOpt { - return func(f sinkFlusher) sinkFlusher { - f.percentiles = ps - return f - } -} - -func optAggregates(as samplers.HistogramAggregates) sinkFlusherOpt { - return func(f sinkFlusher) sinkFlusher { - f.aggregates = as - return f - } -} - -func newSinkFlusher(sinks []Sink, opts ...sinkFlusherOpt) sinkFlusher { - sf := sinkFlusher{sinks: sinks} - for _, opt := range opts { - sf = opt(sf) - } - return sf -} - type Sink interface { Name() string Flush(context.Context, []samplers.InterMetric) error @@ -59,26 +37,22 @@ func (s sinkFlusher) Flush(ctx context.Context, envelope samplerEnvelope) { metrics = append(metrics, sampler.Flush(time.Second, s.percentiles, s.aggregates, true)...) } for _, sampler := range envelope.mixedHistograms { - metrics = append(metrics, sampler.Flush(time.Second, s.percentiles, s.aggregates, true)...) + metrics = append(metrics, sampler.Flush(s.percentiles, s.aggregates)...) } if len(metrics) == 0 { return } - // TODO(clin): Add back metrics once we finalize the metrics client pull request. - wg := sync.WaitGroup{} for _, sinkInstance := range s.sinks { - wg.Add(1) + // TODO(clin): Add back metrics once we finalize the metrics client pull request. go func(ms Sink) { err := ms.Flush(ctx, metrics) if err != nil { //s.log.WithError(err).WithField("Sink", ms.Name()).Warn("Error flushing Sink") } - wg.Done() }(sinkInstance) } - wg.Wait() return } diff --git a/samplers/mixedhisto.go b/samplers/mixedhisto.go index 805679194..6d6948205 100644 --- a/samplers/mixedhisto.go +++ b/samplers/mixedhisto.go @@ -4,6 +4,9 @@ import ( "fmt" "math" "time" + + "github.com/stripe/veneur/samplers/metricpb" + "github.com/stripe/veneur/tdigest" ) // NewMixedHisto creates a new mixed histogram. @@ -95,3 +98,15 @@ func getDefault(m map[string]float64, key string, def float64) float64 { } return def } + +// Merge merges in a histogram. +func (m MixedHisto) Merge(host string, v *metricpb.HistogramValue) { + merging := tdigest.NewMergingFromData(v.GetTDigest()) + m.histo.Value.Merge(merging) + + m.min[host] = math.Min(merging.Min(), getDefault(m.min, host, math.Inf(+1))) + m.max[host] = math.Max(merging.Max(), getDefault(m.max, host, math.Inf(-1))) + m.weight[host] += merging.Count() + m.sum[host] += merging.Sum() + m.reciprocalSum[host] += merging.ReciprocalSum() +} diff --git a/samplers/mixedhisto_test.go b/samplers/mixedhisto_test.go index 43e675ae7..3894fd9fd 100644 --- a/samplers/mixedhisto_test.go +++ b/samplers/mixedhisto_test.go @@ -113,3 +113,194 @@ func testSample(ps []float64, aggs Aggregate, inms []sampleCase, ts []TestMetric ) } } + +type mergeCase struct { + samples []float64 + host string +} + +var mergeCases = []struct { + aggregates Aggregate + percentiles []float64 + cases []mergeCase + out []TestMetric + msg string +}{ + { + AggregateMin, + nil, + []mergeCase{ + { + []float64{2, 2, 2}, + "", + }, + { + []float64{1, 2, 3}, + "", + }, + }, + TMetrics(TGauge("test.min", 1)), + "positive min is incorrect", + }, + { + AggregateMin, + nil, + []mergeCase{ + { + []float64{2, 2, 2}, + "", + }, + { + []float64{1, 2, -3}, + "", + }, + }, + TMetrics(TGauge("test.min", -3)), + "negative min is incorrect", + }, + { + AggregateMax, + nil, + []mergeCase{ + { + []float64{2, 2, 2}, + "", + }, + { + []float64{1, 2, -3}, + "", + }, + }, + TMetrics(TGauge("test.max", 2)), + "positive max is incorrect", + }, + { + AggregateMax, + nil, + []mergeCase{ + { + []float64{-2, -2, -2}, + "", + }, + { + []float64{-1, -2, -3}, + "", + }, + }, + TMetrics(TGauge("test.max", -1)), + "negative max is incorrect", + }, + { + AggregateCount | AggregateAverage, + nil, + []mergeCase{ + { + []float64{1, 2, 3}, + "", + }, + { + []float64{4, 5}, + "", + }, + }, + TMetrics( + TGauge("test.avg", 3), + TGauge("test.count", 5), + ), + "count or average is incorrect", + }, + { + AggregateHarmonicMean, + nil, + []mergeCase{ + { + []float64{2}, + "", + }, + { + []float64{4, 2}, + "", + }, + }, + TMetrics( + TGauge("test.hmean", 2.4), + ), + "harmonic mean is incorrect", + }, + { + 0, + []float64{.5, .99}, + []mergeCase{ + { + []float64{3, 5}, + "", + }, + { + []float64{1, 2, 4}, + "", + }, + }, + TMetrics( + TGauge("test.50percentile", 3), + TGauge("test.99percentile", 4.975), + ), + "percentiles are incorrect", + }, + { + AggregateMin, + []float64{.5}, + []mergeCase{ + { + []float64{3, 5}, + "", + }, + { + []float64{1, 2}, + "a", + }, + { + []float64{4}, + "b", + }, + }, + TMetrics( + TGauge("test.50percentile", 3), + TGauge("test.min", 3), + TGauge("test.min", 1, OptHostname("a")), + TGauge("test.min", 4, OptHostname("b")), + ), + "aggregates not aggregated separately for each hostname", + }, +} + +func TestMixedHistoMerge(t *testing.T) { + for _, c := range mergeCases { + t.Run(c.msg, testMerge(c.percentiles, c.aggregates, c.cases, c.out)) + } +} + +func testMerge(ps []float64, aggs Aggregate, mergeCase []mergeCase, ts []TestMetric) func(*testing.T) { + return func(t *testing.T) { + t.Parallel() + mh := NewMixedHisto("test", nil) + for _, c := range mergeCase { + mh.Merge(c.host, histvalue(c.samples)) + } + + results := ToTestMetrics(mh.Flush(ps, HistogramAggregates{aggs, 0})) + assert.ElementsMatch( + t, + results, + ts, + "EXPECTED: %v\nACTUAL:%v", ts, results, + ) + } +} + +func histvalue(samples []float64) *metricpb.HistogramValue { + td := tdigest.NewMerging(1000, true) + for _, s := range samples { + td.Add(s, 1.0) + } + return &metricpb.HistogramValue{td.Data()} +} diff --git a/samplers/test_tools.go b/samplers/test_tools.go index 7eab41d43..edab9b18c 100644 --- a/samplers/test_tools.go +++ b/samplers/test_tools.go @@ -1,6 +1,7 @@ package samplers import ( + "encoding/json" "strings" ) @@ -14,6 +15,14 @@ type TestMetric struct { Sinks RouteInformation } +func (t TestMetric) String() string { + m, err := json.MarshalIndent(t, "", " ") + if err != nil { + return "" + } + return string(m) +} + func TMetrics(rs ...TestMetric) []TestMetric { return rs } From 9fd6d5bc496015412a1107589f28222a58d7038b Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Fri, 16 Nov 2018 18:38:57 -0800 Subject: [PATCH 08/17] Integrate the mixed histogram sampler. --- importsrv/server_test.go | 13 ++++++++++--- metricingester/aggingestor.go | 7 ++++++- metricingester/aggworker.go | 9 ++++++++- metricingester/sinkflusher.go | 1 - metricingester/types.go | 20 ++++++++++++++++++-- 5 files changed, 42 insertions(+), 8 deletions(-) diff --git a/importsrv/server_test.go b/importsrv/server_test.go index c3d55c717..77ca0dd08 100644 --- a/importsrv/server_test.go +++ b/importsrv/server_test.go @@ -36,7 +36,13 @@ var testE2EFlushingCases = []struct { }, { pbmetrics(pbhisto("test", []float64{1, 2, 3})), - samplers.TMetrics(samplers.TGauge("test.min", 1), samplers.TGauge("test.max", 3), samplers.TCounter("test.count", 3)), + samplers.TMetrics( + samplers.TGauge("test.min", 1), + samplers.TGauge("test.max", 3), + samplers.TGauge("test.count", 3), + samplers.TGauge("test.50percentile", 2), + samplers.TGauge("test.95percentile", 2.925), + ), "histo not present", }, { @@ -132,7 +138,8 @@ var testE2EFlushingCases = []struct { samplers.TGauge("test.min", 1, samplers.OptHostname("a")), samplers.TGauge("test.max", 3, samplers.OptHostname("a")), samplers.TGauge("test.count", 3, samplers.OptHostname("a")), - samplers.TGauge("test.p99", 3), + samplers.TGauge("test.50percentile", 2), + samplers.TGauge("test.95percentile", 2.925), ), "mixed histos not reporting host level aggregates for one host", }, @@ -149,7 +156,7 @@ var testE2EFlushingCases = []struct { samplers.TGauge("test.max", 6, samplers.OptHostname("b")), samplers.TGauge("test.count", 3, samplers.OptHostname("b")), samplers.TGauge("test.50percentile", 3.5), - samplers.TGauge("test.99percentile", 5.984999999999999), + samplers.TGauge("test.95percentile", 5.85), ), "mixed histos not reporting host level aggregates for two hosts", }, diff --git a/metricingester/aggingestor.go b/metricingester/aggingestor.go index 9d846fb90..09378051b 100644 --- a/metricingester/aggingestor.go +++ b/metricingester/aggingestor.go @@ -69,7 +69,12 @@ func (a AggregatingIngestor) Ingest(m Metric) error { } func (a AggregatingIngestor) Merge(d Digest) error { - workerid := d.Hash() % metricHash(len(a.workers)) + var workerid metricHash + if d.digestType == mixedHistoDigest { + workerid = d.MixedHash() % metricHash(len(a.workers)) + } else { + workerid = d.Hash() % metricHash(len(a.workers)) + } a.workers[workerid].Merge(d) return nil } diff --git a/metricingester/aggworker.go b/metricingester/aggworker.go index 523458508..6da9bba49 100644 --- a/metricingester/aggworker.go +++ b/metricingester/aggworker.go @@ -89,14 +89,21 @@ func (a aggWorker) ingest(m Metric) { } func (a aggWorker) merge(d Digest) { - key := d.Key() switch d.digestType { + case mixedHistoDigest: + key := d.MixedKey() + if _, present := a.samplers.mixedHistograms[key]; !present { + a.samplers.mixedHistograms[key] = samplers.NewMixedHisto(d.name, d.tags) + } + a.samplers.mixedHistograms[key].Merge(d.hostname, d.histodigest) case histoDigest: + key := d.Key() if _, present := a.samplers.histograms[key]; !present { a.samplers.histograms[key] = samplers.NewHist(d.name, d.tags) } a.samplers.histograms[key].Merge(d.histodigest) case setDigest: + key := d.Key() if _, present := a.samplers.sets[key]; !present { a.samplers.sets[key] = samplers.NewSet(d.name, d.tags) } diff --git a/metricingester/sinkflusher.go b/metricingester/sinkflusher.go index 77bc997fa..52e614f14 100644 --- a/metricingester/sinkflusher.go +++ b/metricingester/sinkflusher.go @@ -2,7 +2,6 @@ package metricingester import ( "context" - "sync" "time" "github.com/stripe/veneur/samplers" diff --git a/metricingester/types.go b/metricingester/types.go index 88eb32c4e..ca4009b77 100644 --- a/metricingester/types.go +++ b/metricingester/types.go @@ -121,10 +121,25 @@ func (d Digest) Key() metricKey { } } +// MixedKey is the key used to identify mixed histograms, which need to be aggregated +// without consideration of host. +func (d Digest) MixedKey() metricKey { + return metricKey{ + name: d.name, + tags: strings.Join(d.tags, ","), + } +} + func (d Digest) Hash() metricHash { return metricHash(hash(d.name, d.hostname, d.tags)) } +// MixedHash is the hash used to shard mixed histograms, which need to be aggregated +// without consideration of host. +func (d Digest) MixedHash() metricHash { + return metricHash(hash(d.name, "", d.tags)) +} + // // tags are assumed to be inC sorted order. func NewHistogramDigest( @@ -133,6 +148,7 @@ func NewHistogramDigest( tags []string, hostname string, ) Digest { + // TODO(clin): Ensure tags are always sorted. sort.Sort(sort.StringSlice(tags)) return Digest{ name: name, @@ -196,7 +212,7 @@ type samplerEnvelope struct { counters map[metricKey]*samplers.Counter gauges map[metricKey]*samplers.Gauge histograms map[metricKey]*samplers.Histo - mixedHistograms map[metricKey]*samplers.Histo + mixedHistograms map[metricKey]samplers.MixedHisto sets map[metricKey]*samplers.Set } @@ -205,7 +221,7 @@ func newSamplerEnvelope() samplerEnvelope { counters: make(map[metricKey]*samplers.Counter), gauges: make(map[metricKey]*samplers.Gauge), histograms: make(map[metricKey]*samplers.Histo), - mixedHistograms: make(map[metricKey]*samplers.Histo), + mixedHistograms: make(map[metricKey]samplers.MixedHisto), sets: make(map[metricKey]*samplers.Set), } } From 325187212c1570bf012fb81d25230aa5f1809c14 Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Mon, 19 Nov 2018 11:39:23 -0800 Subject: [PATCH 09/17] Integrate sets and thread hostname through everything. --- importsrv/server_test.go | 5 +-- importsrv/testtools_test.go | 21 ++++++++++- metricingester/aggingestor.go | 5 +++ metricingester/aggworker.go | 22 +++++++---- samplers/mixedhisto.go | 20 ++++++++-- samplers/samplers.go | 70 +++++++++++++++++++++++++++-------- 6 files changed, 113 insertions(+), 30 deletions(-) diff --git a/importsrv/server_test.go b/importsrv/server_test.go index 77ca0dd08..828dd8c50 100644 --- a/importsrv/server_test.go +++ b/importsrv/server_test.go @@ -11,9 +11,8 @@ import ( "github.com/stripe/veneur/importsrv" "github.com/stripe/veneur/metricingester" "github.com/stripe/veneur/samplers" - "github.com/stripe/veneur/sinks/channel" - "github.com/stripe/veneur/samplers/metricpb" + "github.com/stripe/veneur/sinks/channel" ) var testE2EFlushingCases = []struct { @@ -46,7 +45,7 @@ var testE2EFlushingCases = []struct { "histo not present", }, { - pbmetrics(pbset("test", []string{"asf"})), + pbmetrics(pbset("test", []string{"asf", "clin", "aditya"})), samplers.TMetrics(samplers.TGauge("test", 3)), "set not present", }, diff --git a/importsrv/testtools_test.go b/importsrv/testtools_test.go index 9e4b33e42..442c155d1 100644 --- a/importsrv/testtools_test.go +++ b/importsrv/testtools_test.go @@ -1,6 +1,7 @@ package importsrv_test import ( + "github.com/axiomhq/hyperloglog" "github.com/stripe/veneur/samplers/metricpb" "github.com/stripe/veneur/tdigest" ) @@ -79,5 +80,23 @@ func pbhisto(name string, values []float64, opts ...func(m *metricpb.Metric)) *m } func pbset(name string, values []string, opts ...func(m *metricpb.Metric)) *metricpb.Metric { - return nil + hll := hyperloglog.New() + for _, s := range values { + hll.Insert([]byte(s)) + } + + v, err := hll.MarshalBinary() + if err != nil { + panic(err) + } + m := &metricpb.Metric{ + Name: name, + Value: &metricpb.Metric_Set{Set: &metricpb.SetValue{HyperLogLog: v}}, + Type: metricpb.Type_Set, + } + + for _, opt := range opts { + opt(m) + } + return m } diff --git a/metricingester/aggingestor.go b/metricingester/aggingestor.go index 09378051b..1e8aa1d38 100644 --- a/metricingester/aggingestor.go +++ b/metricingester/aggingestor.go @@ -38,6 +38,10 @@ func NewFlushingIngester( aggregates samplers.Aggregate, options ...ingesterOption, ) AggregatingIngestor { + if workers < 1 { + panic("more than one worker required") + } + var aggW []aggWorker for i := 0; i < workers; i++ { aggW = append(aggW, newAggWorker()) @@ -97,6 +101,7 @@ func (a AggregatingIngestor) Start() { } func (a AggregatingIngestor) Stop() { + // nb: tickers must be explicitly stopped to be GCed. a.ticker.Stop() close(a.quit) for _, w := range a.workers { diff --git a/metricingester/aggworker.go b/metricingester/aggworker.go index 6da9bba49..c65e820c2 100644 --- a/metricingester/aggworker.go +++ b/metricingester/aggworker.go @@ -67,22 +67,30 @@ func (a aggWorker) ingest(m Metric) { switch m.metricType { case counter: if _, present := a.samplers.counters[key]; !present { - a.samplers.counters[key] = samplers.NewCounter(m.name, m.tags) + a.samplers.counters[key] = &samplers.Counter{ + Name: m.name, + Hostname: m.hostname, + Tags: m.tags, + } } a.samplers.counters[key].Sample(float64(m.countervalue), m.samplerate) case gauge: if _, present := a.samplers.gauges[key]; !present { - a.samplers.gauges[key] = samplers.NewGauge(m.name, m.tags) + a.samplers.gauges[key] = &samplers.Gauge{ + Name: m.name, + Hostname: m.hostname, + Tags: m.tags, + } } a.samplers.gauges[key].Sample(m.gaugevalue, m.samplerate) case set: if _, present := a.samplers.sets[key]; !present { - a.samplers.sets[key] = samplers.NewSet(m.name, m.tags) + a.samplers.sets[key] = samplers.NewSet(m.name, m.tags, samplers.OptSetHostname(m.hostname)) } a.samplers.sets[key].Sample(m.setvalue, m.samplerate) case histogram: if _, present := a.samplers.histograms[key]; !present { - a.samplers.histograms[key] = samplers.NewHist(m.name, m.tags) + a.samplers.histograms[key] = samplers.NewHist(m.name, m.tags, samplers.OptHistHostname(m.hostname)) } a.samplers.histograms[key].Sample(m.histovalue, m.samplerate) } @@ -93,19 +101,19 @@ func (a aggWorker) merge(d Digest) { case mixedHistoDigest: key := d.MixedKey() if _, present := a.samplers.mixedHistograms[key]; !present { - a.samplers.mixedHistograms[key] = samplers.NewMixedHisto(d.name, d.tags) + a.samplers.mixedHistograms[key] = samplers.NewMixedHisto(d.name, d.tags, samplers.OptMixedHistoHostname(d.hostname)) } a.samplers.mixedHistograms[key].Merge(d.hostname, d.histodigest) case histoDigest: key := d.Key() if _, present := a.samplers.histograms[key]; !present { - a.samplers.histograms[key] = samplers.NewHist(d.name, d.tags) + a.samplers.histograms[key] = samplers.NewHist(d.name, d.tags, samplers.OptHistHostname(d.hostname)) } a.samplers.histograms[key].Merge(d.histodigest) case setDigest: key := d.Key() if _, present := a.samplers.sets[key]; !present { - a.samplers.sets[key] = samplers.NewSet(d.name, d.tags) + a.samplers.sets[key] = samplers.NewSet(d.name, d.tags, samplers.OptSetHostname(d.hostname)) } a.samplers.sets[key].Merge(d.setdigest) } diff --git a/samplers/mixedhisto.go b/samplers/mixedhisto.go index 6d6948205..a887ddf79 100644 --- a/samplers/mixedhisto.go +++ b/samplers/mixedhisto.go @@ -9,16 +9,29 @@ import ( "github.com/stripe/veneur/tdigest" ) +type optMixedHisto func(MixedHisto) MixedHisto + +func OptMixedHistoHostname(hn string) optMixedHisto { + return func(histo MixedHisto) MixedHisto { + histo.hostname = hn + return histo + } +} + // NewMixedHisto creates a new mixed histogram. -func NewMixedHisto(name string, tags []string) MixedHisto { - return MixedHisto{ - histo: NewHist(name, tags), +func NewMixedHisto(name string, tags []string, opts ...optMixedHisto) MixedHisto { + m := MixedHisto{ min: make(map[string]float64), max: make(map[string]float64), weight: make(map[string]float64), sum: make(map[string]float64), reciprocalSum: make(map[string]float64), } + for _, opt := range opts { + m = opt(m) + } + m.histo = NewHist(name, tags) + return m } // MixedHisto is a sampler for the MixedScope histogram case. @@ -33,6 +46,7 @@ func NewMixedHisto(name string, tags []string) MixedHisto { // Note that we don't support the "median" aggregate for mixed histograms. type MixedHisto struct { histo *Histo + hostname string min map[string]float64 max map[string]float64 weight map[string]float64 diff --git a/samplers/samplers.go b/samplers/samplers.go index 4c753911a..d10f5c4b5 100644 --- a/samplers/samplers.go +++ b/samplers/samplers.go @@ -128,9 +128,10 @@ func routeInfo(tags []string) RouteInformation { // Counter is an accumulator type Counter struct { - Name string - Tags []string - value int64 + Name string + Tags []string + Hostname string + value int64 } // GetName returns the name of the counter. @@ -154,6 +155,7 @@ func (c *Counter) Flush(interval time.Duration) []InterMetric { Tags: tags, Type: CounterMetric, Sinks: routeInfo(tags), + HostName: c.Hostname, }} } @@ -216,9 +218,10 @@ func NewCounter(Name string, Tags []string) *Counter { // Gauge retains whatever the last value was. type Gauge struct { - Name string - Tags []string - value float64 + Name string + Tags []string + Hostname string + value float64 } // Sample takes on whatever value is passed in as a sample. @@ -234,6 +237,7 @@ func (g *Gauge) Flush() []InterMetric { Name: g.Name, Timestamp: time.Now().Unix(), Value: float64(g.value), + HostName: g.Hostname, Tags: tags, Type: GaugeMetric, Sinks: routeInfo(tags), @@ -365,9 +369,10 @@ func NewStatusCheck(Name string, Tags []string) *StatusCheck { // Set is a list of unique values seen. type Set struct { - Name string - Tags []string - Hll *hyperloglog.Sketch + Name string + Tags []string + Hostname string + Hll *hyperloglog.Sketch } // Sample checks if the supplied value has is already in the filter. If not, it increments @@ -376,16 +381,28 @@ func (s *Set) Sample(sample string, sampleRate float32) { s.Hll.Insert([]byte(sample)) } +type setOpt func(*Set) + +func OptSetHostname(hn string) setOpt { + return func(s *Set) { + s.Hostname = hn + } +} + // NewSet generates a new Set and returns it -func NewSet(Name string, Tags []string) *Set { +func NewSet(Name string, Tags []string, opts ...setOpt) *Set { // error is only returned if precision is outside the 4-18 range // TODO: this is the maximum precision, should it be configurable? Hll := hyperloglog.New() - return &Set{ + s := &Set{ Name: Name, Tags: Tags, Hll: Hll, } + for _, opt := range opts { + opt(s) + } + return s } // Flush generates an InterMetric for the state of this Set. @@ -465,9 +482,10 @@ func (s *Set) Merge(v *metricpb.SetValue) error { // Histo is a collection of values that generates max, min, count, and // percentiles over time. type Histo struct { - Name string - Tags []string - Value *tdigest.MergingDigest + Name string + Tags []string + Value *tdigest.MergingDigest + Hostname string // these values are computed from only the samples that came through this // veneur instance, ignoring any histograms merged from elsewhere // we separate them because they're easy to aggregate on the backend without @@ -493,9 +511,17 @@ func (h *Histo) Sample(sample float64, sampleRate float32) { h.LocalReciprocalSum += (1 / sample) * weight } +type histOpt func(*Histo) + +func OptHistHostname(hn string) histOpt { + return func(h *Histo) { + h.Hostname = hn + } +} + // NewHist generates a new Histo and returns it. -func NewHist(Name string, Tags []string) *Histo { - return &Histo{ +func NewHist(Name string, Tags []string, opts ...histOpt) *Histo { + h := &Histo{ Name: Name, Tags: Tags, // we're going to allocate a lot of these, so we don't want them to be huge @@ -504,6 +530,10 @@ func NewHist(Name string, Tags []string) *Histo { LocalMax: math.Inf(-1), LocalSum: 0, } + for _, opt := range opts { + opt(h) + } + return h } // Flush generates InterMetrics for the current state of the Histo. percentiles @@ -541,6 +571,7 @@ func (h *Histo) Flush(interval time.Duration, percentiles []float64, aggregates Name: fmt.Sprintf("%s.max", h.Name), Timestamp: now, Value: val, + HostName: h.Hostname, Tags: tags, Type: GaugeMetric, Sinks: sinks, @@ -557,6 +588,7 @@ func (h *Histo) Flush(interval time.Duration, percentiles []float64, aggregates Name: fmt.Sprintf("%s.min", h.Name), Timestamp: now, Value: val, + HostName: h.Hostname, Tags: tags, Type: GaugeMetric, Sinks: sinks, @@ -574,6 +606,7 @@ func (h *Histo) Flush(interval time.Duration, percentiles []float64, aggregates Name: fmt.Sprintf("%s.sum", h.Name), Timestamp: now, Value: val, + HostName: h.Hostname, Tags: tags, Type: GaugeMetric, Sinks: sinks, @@ -593,6 +626,7 @@ func (h *Histo) Flush(interval time.Duration, percentiles []float64, aggregates Name: fmt.Sprintf("%s.avg", h.Name), Timestamp: now, Value: val, + HostName: h.Hostname, Tags: tags, Type: GaugeMetric, Sinks: sinks, @@ -613,6 +647,7 @@ func (h *Histo) Flush(interval time.Duration, percentiles []float64, aggregates Name: fmt.Sprintf("%s.count", h.Name), Timestamp: now, Value: val, + HostName: h.Hostname, Tags: tags, Type: CounterMetric, Sinks: sinks, @@ -628,6 +663,7 @@ func (h *Histo) Flush(interval time.Duration, percentiles []float64, aggregates Name: fmt.Sprintf("%s.median", h.Name), Timestamp: now, Value: float64(h.Value.Quantile(0.5)), + HostName: h.Hostname, Tags: tags, Type: GaugeMetric, Sinks: sinks, @@ -648,6 +684,7 @@ func (h *Histo) Flush(interval time.Duration, percentiles []float64, aggregates Name: fmt.Sprintf("%s.hmean", h.Name), Timestamp: now, Value: val, + HostName: h.Hostname, Tags: tags, Type: GaugeMetric, Sinks: sinks, @@ -664,6 +701,7 @@ func (h *Histo) Flush(interval time.Duration, percentiles []float64, aggregates Name: fmt.Sprintf("%s.%dpercentile", h.Name, int(p*100)), Timestamp: now, Value: float64(h.Value.Quantile(p)), + HostName: h.Hostname, Tags: tags, Type: GaugeMetric, Sinks: sinks, From 8d6d34412852b0ce619a3002d2f15f47a7099414 Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Mon, 19 Nov 2018 16:08:09 -0800 Subject: [PATCH 10/17] Add support for a mixedglobal scope to facilitate migrating leaf veneurs. --- config.go | 4 ++ forward_grpc_test.go | 2 +- importsrv/server.go | 19 +++++++-- importsrv/server_test.go | 39 +++++++++++++++--- metricingester/aggingestor.go | 6 +-- metricingester/aggworker.go | 3 ++ metricingester/sinkflusher.go | 2 +- metricingester/types.go | 25 ++++++++++++ samplers/metricpb/metric.pb.go | 73 ++++++++++++++++++---------------- samplers/metricpb/metric.proto | 1 + samplers/mixedhisto.go | 5 ++- samplers/mixedhisto_test.go | 8 +++- server.go | 24 ++++++++++- server_test.go | 11 ++--- 14 files changed, 161 insertions(+), 61 deletions(-) diff --git a/config.go b/config.go index 506425899..d74e6e83b 100644 --- a/config.go +++ b/config.go @@ -1,5 +1,7 @@ package veneur +import "github.com/stripe/veneur/metricingester" + type Config struct { Aggregates []string `yaml:"aggregates"` AwsAccessKeyID string `yaml:"aws_access_key_id"` @@ -95,4 +97,6 @@ type Config struct { TraceLightstepNumClients int `yaml:"trace_lightstep_num_clients"` TraceLightstepReconnectPeriod string `yaml:"trace_lightstep_reconnect_period"` TraceMaxLengthBytes int `yaml:"trace_max_length_bytes"` + + AdditionalMetricSinks []metricingester.Sink } diff --git a/forward_grpc_test.go b/forward_grpc_test.go index 4603fc1c7..872c70959 100644 --- a/forward_grpc_test.go +++ b/forward_grpc_test.go @@ -226,7 +226,7 @@ func TestE2EForwardingGRPCMetrics(t *testing.T) { } assert.ElementsMatch(t, expectedNames, actualNames, - "The global Veneur didn't flush the right metrics") + "The global Veneur didn't flush the right metrics.\nEXPECTED: %v\nACTUAL: %v", expectedNames, actualNames) close(done) }() ff.local.Flush(context.TODO()) diff --git a/importsrv/server.go b/importsrv/server.go index 2d64cc9b6..bffb2c8be 100644 --- a/importsrv/server.go +++ b/importsrv/server.go @@ -122,14 +122,25 @@ func (s *Server) SendMetrics(ctx context.Context, mlist *forwardrpc.MetricList) // min/max/count values locally. // // We need a special metric type to represent this: the MixedHistogram. + // + // We also need to distinguish between the behavior before all metrics were forwarded + // to the behavior after. The before case is represented by Scope_Mixed, which makes that host's + // "min/max/count" not get emitted by the central veneur, since the local veneur is emitting it + // still. Scoped_MixedGlobal makes the central veneur emit the min/max/count. + // + // TODO(clin): After we completely migrate to the new veneur, delete support for this latter distinction! switch m.GetScope() { - case metricpb.Scope_Local, metricpb.Scope_Global: + case metricpb.Scope_Mixed: s.ingester.Merge( - metricingester.NewHistogramDigest(m.Name, v.Histogram, tags, hostname), + metricingester.NewMixedHistogramDigest(m.Name, v.Histogram, tags, hostname, false), ) - case metricpb.Scope_Mixed: + case metricpb.Scope_MixedGlobal: s.ingester.Merge( - metricingester.NewMixedHistogramDigest(m.Name, v.Histogram, tags, hostname), + metricingester.NewMixedHistogramDigest(m.Name, v.Histogram, tags, hostname, true), + ) + case metricpb.Scope_Local, metricpb.Scope_Global: + s.ingester.Merge( + metricingester.NewHistogramDigest(m.Name, v.Histogram, tags, hostname), ) } case nil: diff --git a/importsrv/server_test.go b/importsrv/server_test.go index 828dd8c50..762db0ead 100644 --- a/importsrv/server_test.go +++ b/importsrv/server_test.go @@ -34,7 +34,7 @@ var testE2EFlushingCases = []struct { "gauge not present", }, { - pbmetrics(pbhisto("test", []float64{1, 2, 3})), + pbmetrics(pbhisto("test", []float64{1, 2, 3}, pbscope(metricpb.Scope_MixedGlobal))), samplers.TMetrics( samplers.TGauge("test.min", 1), samplers.TGauge("test.max", 3), @@ -42,7 +42,15 @@ var testE2EFlushingCases = []struct { samplers.TGauge("test.50percentile", 2), samplers.TGauge("test.95percentile", 2.925), ), - "histo not present", + "mixed global histo not present", + }, + { + pbmetrics(pbhisto("test", []float64{1, 2, 3}, pbscope(metricpb.Scope_Mixed))), + samplers.TMetrics( + samplers.TGauge("test.50percentile", 2), + samplers.TGauge("test.95percentile", 2.925), + ), + "mixed histo not correct", }, { pbmetrics(pbset("test", []string{"asf", "clin", "aditya"})), @@ -131,7 +139,7 @@ var testE2EFlushingCases = []struct { { pbmetrics( - pbhisto("test", []float64{1, 2, 3}, pbscope(metricpb.Scope_Mixed), pbhostname("a")), + pbhisto("test", []float64{1, 2, 3}, pbscope(metricpb.Scope_MixedGlobal), pbhostname("a")), ), samplers.TMetrics( samplers.TGauge("test.min", 1, samplers.OptHostname("a")), @@ -140,12 +148,22 @@ var testE2EFlushingCases = []struct { samplers.TGauge("test.50percentile", 2), samplers.TGauge("test.95percentile", 2.925), ), - "mixed histos not reporting host level aggregates for one host", + "global mixed histos not reporting host level aggregates for one host", }, { pbmetrics( pbhisto("test", []float64{1, 2, 3}, pbscope(metricpb.Scope_Mixed), pbhostname("a")), - pbhisto("test", []float64{4, 5, 6}, pbscope(metricpb.Scope_Mixed), pbhostname("b")), + ), + samplers.TMetrics( + samplers.TGauge("test.50percentile", 2), + samplers.TGauge("test.95percentile", 2.925), + ), + "mixed histos should not report host metrics", + }, + { + pbmetrics( + pbhisto("test", []float64{1, 2, 3}, pbscope(metricpb.Scope_MixedGlobal), pbhostname("a")), + pbhisto("test", []float64{4, 5, 6}, pbscope(metricpb.Scope_MixedGlobal), pbhostname("b")), ), samplers.TMetrics( samplers.TGauge("test.min", 1, samplers.OptHostname("a")), @@ -157,6 +175,17 @@ var testE2EFlushingCases = []struct { samplers.TGauge("test.50percentile", 3.5), samplers.TGauge("test.95percentile", 5.85), ), + "global mixed histos not reporting host level aggregates for two hosts", + }, + { + pbmetrics( + pbhisto("test", []float64{1, 2, 3}, pbscope(metricpb.Scope_Mixed), pbhostname("a")), + pbhisto("test", []float64{4, 5, 6}, pbscope(metricpb.Scope_Mixed), pbhostname("b")), + ), + samplers.TMetrics( + samplers.TGauge("test.50percentile", 3.5), + samplers.TGauge("test.95percentile", 5.85), + ), "mixed histos not reporting host level aggregates for two hosts", }, diff --git a/metricingester/aggingestor.go b/metricingester/aggingestor.go index 1e8aa1d38..8e66796d5 100644 --- a/metricingester/aggingestor.go +++ b/metricingester/aggingestor.go @@ -38,10 +38,6 @@ func NewFlushingIngester( aggregates samplers.Aggregate, options ...ingesterOption, ) AggregatingIngestor { - if workers < 1 { - panic("more than one worker required") - } - var aggW []aggWorker for i := 0; i < workers; i++ { aggW = append(aggW, newAggWorker()) @@ -74,7 +70,7 @@ func (a AggregatingIngestor) Ingest(m Metric) error { func (a AggregatingIngestor) Merge(d Digest) error { var workerid metricHash - if d.digestType == mixedHistoDigest { + if d.digestType == mixedHistoDigest || d.digestType == mixedGlobalHistoDigest { workerid = d.MixedHash() % metricHash(len(a.workers)) } else { workerid = d.Hash() % metricHash(len(a.workers)) diff --git a/metricingester/aggworker.go b/metricingester/aggworker.go index c65e820c2..a669eff47 100644 --- a/metricingester/aggworker.go +++ b/metricingester/aggworker.go @@ -104,6 +104,9 @@ func (a aggWorker) merge(d Digest) { a.samplers.mixedHistograms[key] = samplers.NewMixedHisto(d.name, d.tags, samplers.OptMixedHistoHostname(d.hostname)) } a.samplers.mixedHistograms[key].Merge(d.hostname, d.histodigest) + if d.flushMixed { + a.samplers.mixedHosts[d.hostname] = struct{}{} + } case histoDigest: key := d.Key() if _, present := a.samplers.histograms[key]; !present { diff --git a/metricingester/sinkflusher.go b/metricingester/sinkflusher.go index 52e614f14..afe89104f 100644 --- a/metricingester/sinkflusher.go +++ b/metricingester/sinkflusher.go @@ -36,7 +36,7 @@ func (s sinkFlusher) Flush(ctx context.Context, envelope samplerEnvelope) { metrics = append(metrics, sampler.Flush(time.Second, s.percentiles, s.aggregates, true)...) } for _, sampler := range envelope.mixedHistograms { - metrics = append(metrics, sampler.Flush(s.percentiles, s.aggregates)...) + metrics = append(metrics, sampler.Flush(s.percentiles, s.aggregates, envelope.mixedHosts)...) } if len(metrics) == 0 { diff --git a/metricingester/types.go b/metricingester/types.go index ca4009b77..69d1d369e 100644 --- a/metricingester/types.go +++ b/metricingester/types.go @@ -96,6 +96,7 @@ type digestType int const ( histoDigest digestType = iota + 1 mixedHistoDigest + mixedGlobalHistoDigest setDigest ) @@ -109,6 +110,8 @@ type Digest struct { histodigest *metricpb.HistogramValue setdigest *metricpb.SetValue + + flushMixed bool } // TODO(clin): Ideally merge this with Hash when we choose an algorithm with sufficiently @@ -164,6 +167,7 @@ func NewMixedHistogramDigest( digest *metricpb.HistogramValue, tags []string, hostname string, + flushMixed bool, ) Digest { sort.Sort(sort.StringSlice(tags)) return Digest{ @@ -172,6 +176,23 @@ func NewMixedHistogramDigest( digestType: mixedHistoDigest, hostname: hostname, histodigest: digest, + flushMixed: flushMixed, + } +} + +func NewMixedGlobalHistogramDigest( + name string, + digest *metricpb.HistogramValue, + tags []string, + hostname string, +) Digest { + sort.Sort(sort.StringSlice(tags)) + return Digest{ + name: name, + tags: tags, + digestType: mixedGlobalHistoDigest, + hostname: hostname, + histodigest: digest, } } @@ -214,6 +235,8 @@ type samplerEnvelope struct { histograms map[metricKey]*samplers.Histo mixedHistograms map[metricKey]samplers.MixedHisto sets map[metricKey]*samplers.Set + + mixedHosts map[string]struct{} } func newSamplerEnvelope() samplerEnvelope { @@ -223,5 +246,7 @@ func newSamplerEnvelope() samplerEnvelope { histograms: make(map[metricKey]*samplers.Histo), mixedHistograms: make(map[metricKey]samplers.MixedHisto), sets: make(map[metricKey]*samplers.Set), + + mixedHosts: make(map[string]struct{}), } } diff --git a/samplers/metricpb/metric.pb.go b/samplers/metricpb/metric.pb.go index 737be3e99..0d1b288ae 100644 --- a/samplers/metricpb/metric.pb.go +++ b/samplers/metricpb/metric.pb.go @@ -40,20 +40,23 @@ const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package type Scope int32 const ( - Scope_Mixed Scope = 0 - Scope_Local Scope = 1 - Scope_Global Scope = 2 + Scope_Mixed Scope = 0 + Scope_Local Scope = 1 + Scope_Global Scope = 2 + Scope_MixedGlobal Scope = 3 ) var Scope_name = map[int32]string{ 0: "Mixed", 1: "Local", 2: "Global", + 3: "MixedGlobal", } var Scope_value = map[string]int32{ - "Mixed": 0, - "Local": 1, - "Global": 2, + "Mixed": 0, + "Local": 1, + "Global": 2, + "MixedGlobal": 3, } func (x Scope) String() string { @@ -1445,34 +1448,34 @@ var ( func init() { proto.RegisterFile("samplers/metricpb/metric.proto", fileDescriptorMetric) } var fileDescriptorMetric = []byte{ - // 451 bytes of a gzipped FileDescriptorProto + // 459 bytes of a gzipped FileDescriptorProto 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x54, 0x92, 0xd1, 0x6a, 0xdb, 0x30, - 0x14, 0x86, 0xa3, 0x38, 0x8e, 0xed, 0x93, 0x36, 0x33, 0x87, 0x6e, 0x88, 0x5c, 0x98, 0x60, 0xb6, - 0x91, 0x95, 0xe1, 0x42, 0xc6, 0x60, 0xb7, 0xeb, 0x0a, 0xe9, 0x45, 0x72, 0xe3, 0x96, 0xdd, 0x16, - 0x25, 0x15, 0x8a, 0xc1, 0x8e, 0x8c, 0xad, 0x8c, 0xe5, 0x2d, 0xf6, 0x58, 0xbb, 0xdc, 0x23, 0x8c, - 0x6c, 0x0f, 0x52, 0x24, 0x5b, 0x75, 0x73, 0x11, 0x72, 0xce, 0x7f, 0xbe, 0x5f, 0xe6, 0x3f, 0x12, - 0x44, 0x35, 0x2b, 0xca, 0x9c, 0x57, 0xf5, 0x55, 0xc1, 0x55, 0x95, 0x6d, 0xca, 0x75, 0x5b, 0x24, - 0x65, 0x25, 0x95, 0x44, 0xdf, 0xca, 0x93, 0xd7, 0xea, 0x31, 0x13, 0xbc, 0x56, 0x57, 0xed, 0x7f, - 0x03, 0xc4, 0xff, 0xfb, 0x30, 0x5c, 0x19, 0x06, 0x11, 0x06, 0x3b, 0x56, 0x70, 0x4a, 0xa6, 0x64, - 0x16, 0xa4, 0xa6, 0xd6, 0x9a, 0x62, 0xa2, 0xa6, 0xfd, 0xa9, 0xa3, 0x35, 0x5d, 0x63, 0x0c, 0x03, - 0x75, 0x28, 0x39, 0x75, 0xa6, 0x64, 0x36, 0x9e, 0x8f, 0x13, 0xfb, 0x89, 0xe4, 0xfe, 0x50, 0xf2, - 0xd4, 0xcc, 0x70, 0x0e, 0xde, 0x46, 0xee, 0x77, 0x8a, 0x57, 0xd4, 0x9d, 0x92, 0xd9, 0x68, 0xfe, - 0xa6, 0xc3, 0xbe, 0x35, 0x83, 0xef, 0x2c, 0xdf, 0xf3, 0xdb, 0x5e, 0x6a, 0x41, 0xfc, 0x08, 0xae, - 0x60, 0x7b, 0xc1, 0xe9, 0xd0, 0x38, 0x2e, 0x3a, 0xc7, 0x42, 0xcb, 0x96, 0x6f, 0x20, 0xfc, 0x02, - 0xc1, 0x36, 0xab, 0x95, 0x14, 0x15, 0x2b, 0xa8, 0x67, 0x1c, 0xb4, 0x73, 0xdc, 0xda, 0x91, 0x75, - 0x75, 0x30, 0xbe, 0x07, 0xa7, 0xe6, 0x8a, 0xfa, 0xc6, 0x83, 0x9d, 0xe7, 0x8e, 0x2b, 0x4b, 0x6b, - 0x00, 0xdf, 0x81, 0x5b, 0x6f, 0x64, 0xc9, 0x69, 0x60, 0x82, 0xbe, 0x7a, 0x41, 0x6a, 0x39, 0x6d, - 0xa6, 0x38, 0x01, 0x7f, 0x2b, 0x6b, 0x65, 0x56, 0x07, 0x66, 0x75, 0xcf, 0xfd, 0xb5, 0x07, 0xee, - 0x0f, 0x7d, 0x64, 0xfc, 0x16, 0xce, 0x5e, 0xc6, 0xc6, 0x8b, 0x76, 0x60, 0x96, 0xed, 0xa4, 0x2d, - 0x15, 0x03, 0x74, 0x51, 0x4f, 0x19, 0x62, 0x99, 0x05, 0x8c, 0x4f, 0xc3, 0xe1, 0x67, 0xf0, 0xd5, - 0x43, 0x73, 0xa9, 0x06, 0x1d, 0xcd, 0x27, 0x89, 0xbd, 0xe4, 0x15, 0xaf, 0x44, 0xb6, 0x13, 0x37, - 0xa6, 0xbb, 0x61, 0x8a, 0xa5, 0x9e, 0x6a, 0x9a, 0x38, 0x01, 0xdf, 0x26, 0xc6, 0x18, 0xce, 0xb7, - 0x87, 0x92, 0x57, 0x0f, 0xb9, 0x14, 0xfa, 0x67, 0xce, 0x39, 0x4b, 0x47, 0x46, 0x5c, 0x4a, 0xb1, - 0x94, 0xe2, 0xf2, 0x03, 0xb8, 0x26, 0x37, 0x06, 0xe0, 0xae, 0xb2, 0x9f, 0xfc, 0x31, 0xec, 0xe9, - 0x72, 0x29, 0x37, 0x2c, 0x0f, 0x09, 0x02, 0x0c, 0x17, 0xb9, 0x5c, 0xb3, 0x3c, 0xec, 0x5f, 0x7e, - 0x85, 0x81, 0x7e, 0x0b, 0x38, 0x02, 0xaf, 0x4d, 0xdd, 0xb0, 0x26, 0x5c, 0x48, 0xf0, 0x1c, 0x82, - 0xe7, 0x0c, 0x61, 0x1f, 0x3d, 0x70, 0xee, 0xb8, 0x0a, 0x1d, 0x8d, 0xdc, 0x67, 0x05, 0xaf, 0xc2, - 0xc1, 0x75, 0xf8, 0xfb, 0x18, 0x91, 0x3f, 0xc7, 0x88, 0xfc, 0x3d, 0x46, 0xe4, 0xd7, 0xbf, 0xa8, - 0xb7, 0x1e, 0x9a, 0x07, 0xfb, 0xe9, 0x29, 0x00, 0x00, 0xff, 0xff, 0x7b, 0x6d, 0x88, 0x2f, 0xf3, - 0x02, 0x00, 0x00, + 0x14, 0x86, 0xab, 0x38, 0x8e, 0xed, 0xe3, 0x36, 0x35, 0x87, 0x6e, 0x88, 0x5c, 0x98, 0x60, 0xb6, + 0x11, 0xca, 0x70, 0x21, 0x63, 0x30, 0x76, 0xb7, 0xae, 0x90, 0x5e, 0x24, 0x37, 0x6e, 0xd9, 0x6d, + 0x51, 0x52, 0xa1, 0x18, 0xec, 0xc8, 0xd8, 0xca, 0x58, 0xde, 0x62, 0x8f, 0xb5, 0xcb, 0x3d, 0xc2, + 0xc8, 0xf6, 0x20, 0x43, 0xb2, 0x55, 0xb7, 0x17, 0x21, 0xe7, 0xfc, 0xe7, 0xfb, 0x65, 0xfe, 0x23, + 0x41, 0xdc, 0xb0, 0xb2, 0x2a, 0x78, 0xdd, 0x5c, 0x95, 0x5c, 0xd5, 0xf9, 0xa6, 0x5a, 0x77, 0x45, + 0x5a, 0xd5, 0x52, 0x49, 0xf4, 0xad, 0x3c, 0x79, 0xa5, 0x1e, 0x73, 0xc1, 0x1b, 0x75, 0xd5, 0xfd, + 0xb7, 0x40, 0xf2, 0x6f, 0x00, 0xa3, 0x95, 0x61, 0x10, 0x61, 0xb8, 0x63, 0x25, 0xa7, 0x64, 0x4a, + 0x66, 0x41, 0x66, 0x6a, 0xad, 0x29, 0x26, 0x1a, 0x3a, 0x98, 0x3a, 0x5a, 0xd3, 0x35, 0x26, 0x30, + 0x54, 0x87, 0x8a, 0x53, 0x67, 0x4a, 0x66, 0xe3, 0xf9, 0x38, 0xb5, 0x9f, 0x48, 0xef, 0x0f, 0x15, + 0xcf, 0xcc, 0x0c, 0xe7, 0xe0, 0x6d, 0xe4, 0x7e, 0xa7, 0x78, 0x4d, 0xdd, 0x29, 0x99, 0x85, 0xf3, + 0xd7, 0x3d, 0xf6, 0xb5, 0x1d, 0x7c, 0x63, 0xc5, 0x9e, 0xdf, 0x9e, 0x64, 0x16, 0xc4, 0xf7, 0xe0, + 0x0a, 0xb6, 0x17, 0x9c, 0x8e, 0x8c, 0xe3, 0xa2, 0x77, 0x2c, 0xb4, 0x6c, 0xf9, 0x16, 0xc2, 0x4f, + 0x10, 0x6c, 0xf3, 0x46, 0x49, 0x51, 0xb3, 0x92, 0x7a, 0xc6, 0x41, 0x7b, 0xc7, 0xad, 0x1d, 0x59, + 0x57, 0x0f, 0xe3, 0x3b, 0x70, 0x1a, 0xae, 0xa8, 0x6f, 0x3c, 0xd8, 0x7b, 0xee, 0xb8, 0xb2, 0xb4, + 0x06, 0xf0, 0x2d, 0xb8, 0xcd, 0x46, 0x56, 0x9c, 0x06, 0x26, 0xe8, 0xf9, 0x33, 0x52, 0xcb, 0x59, + 0x3b, 0xc5, 0x09, 0xf8, 0x5b, 0xd9, 0x28, 0xb3, 0x3a, 0x30, 0xab, 0x7b, 0xea, 0xaf, 0x3d, 0x70, + 0xbf, 0xeb, 0x23, 0x93, 0x37, 0x70, 0xfa, 0x3c, 0x36, 0x5e, 0x74, 0x03, 0xb3, 0x6c, 0x27, 0xeb, + 0xa8, 0x04, 0xa0, 0x8f, 0xfa, 0x92, 0x21, 0x96, 0x59, 0xc0, 0xf8, 0x65, 0x38, 0xfc, 0x08, 0xbe, + 0x7a, 0x68, 0x2f, 0xd5, 0xa0, 0xe1, 0x7c, 0x92, 0xda, 0x4b, 0x5e, 0xf1, 0x5a, 0xe4, 0x3b, 0x71, + 0x63, 0xba, 0x1b, 0xa6, 0x58, 0xe6, 0xa9, 0xb6, 0x49, 0x52, 0xf0, 0x6d, 0x62, 0x4c, 0xe0, 0x6c, + 0x7b, 0xa8, 0x78, 0xfd, 0x50, 0x48, 0xa1, 0x7f, 0xe6, 0x9c, 0xd3, 0x2c, 0x34, 0xe2, 0x52, 0x8a, + 0xa5, 0x14, 0x97, 0x9f, 0xc1, 0x35, 0xb9, 0x31, 0x00, 0x77, 0x95, 0xff, 0xe0, 0x8f, 0xd1, 0x89, + 0x2e, 0x97, 0x72, 0xc3, 0x8a, 0x88, 0x20, 0xc0, 0x68, 0x51, 0xc8, 0x35, 0x2b, 0xa2, 0x01, 0x9e, + 0x43, 0x68, 0x88, 0x4e, 0x70, 0x2e, 0xbf, 0xc0, 0x50, 0x3f, 0x0e, 0x0c, 0xc1, 0xeb, 0xd6, 0xd0, + 0x9a, 0x4d, 0xda, 0x88, 0xe0, 0x19, 0x04, 0x4f, 0xa1, 0xa2, 0x01, 0x7a, 0xe0, 0xdc, 0x71, 0x15, + 0x39, 0x1a, 0xb9, 0xcf, 0x4b, 0x5e, 0x47, 0xc3, 0xeb, 0xe8, 0xd7, 0x31, 0x26, 0xbf, 0x8f, 0x31, + 0xf9, 0x73, 0x8c, 0xc9, 0xcf, 0xbf, 0xf1, 0xc9, 0x7a, 0x64, 0x5e, 0xf0, 0x87, 0xff, 0x01, 0x00, + 0x00, 0xff, 0xff, 0xd3, 0x80, 0x0a, 0x59, 0x04, 0x03, 0x00, 0x00, } diff --git a/samplers/metricpb/metric.proto b/samplers/metricpb/metric.proto index dd56c918c..90e4b9820 100644 --- a/samplers/metricpb/metric.proto +++ b/samplers/metricpb/metric.proto @@ -27,6 +27,7 @@ enum Scope { Mixed = 0; Local = 1; Global = 2; + MixedGlobal = 3; } // Type can be any of the valid metric types recognized by Veneur. diff --git a/samplers/mixedhisto.go b/samplers/mixedhisto.go index a887ddf79..593933fa2 100644 --- a/samplers/mixedhisto.go +++ b/samplers/mixedhisto.go @@ -67,7 +67,7 @@ func (m MixedHisto) Sample(sample float64, sampleRate float32, host string) { } // Flush returns metrics for the specified percentiles and aggregates. -func (m MixedHisto) Flush(percentiles []float64, aggregates HistogramAggregates) []InterMetric { +func (m MixedHisto) Flush(percentiles []float64, aggregates HistogramAggregates, mixedHosts map[string]struct{}) []InterMetric { ms := m.histo.Flush(0, percentiles, HistogramAggregates{}, false) // doesn't support median! Would require implementing separated digests. @@ -84,6 +84,9 @@ func (m MixedHisto) Flush(percentiles []float64, aggregates HistogramAggregates) } } for host, _ := range m.max { + if _, ok := mixedHosts[host]; !ok { + continue + } if (aggregates.Value & AggregateMax) != 0 { ms = append(ms, metric("max", m.max[host], host)) } diff --git a/samplers/mixedhisto_test.go b/samplers/mixedhisto_test.go index 3894fd9fd..4cd2accbd 100644 --- a/samplers/mixedhisto_test.go +++ b/samplers/mixedhisto_test.go @@ -100,11 +100,13 @@ func testSample(ps []float64, aggs Aggregate, inms []sampleCase, ts []TestMetric return func(t *testing.T) { t.Parallel() mh := NewMixedHisto("test", nil) + mixedServers := make(map[string]struct{}) for _, inm := range inms { mh.Sample(inm.val, 1, inm.host) + mixedServers[inm.host] = struct{}{} } - results := ToTestMetrics(mh.Flush(ps, HistogramAggregates{aggs, 0})) + results := ToTestMetrics(mh.Flush(ps, HistogramAggregates{aggs, 0}, mixedServers)) assert.ElementsMatch( t, results, @@ -283,11 +285,13 @@ func testMerge(ps []float64, aggs Aggregate, mergeCase []mergeCase, ts []TestMet return func(t *testing.T) { t.Parallel() mh := NewMixedHisto("test", nil) + mixedServers := make(map[string]struct{}) for _, c := range mergeCase { mh.Merge(c.host, histvalue(c.samples)) + mixedServers[c.host] = struct{}{} } - results := ToTestMetrics(mh.Flush(ps, HistogramAggregates{aggs, 0})) + results := ToTestMetrics(mh.Flush(ps, HistogramAggregates{aggs, 0}, mixedServers)) assert.ElementsMatch( t, results, diff --git a/server.go b/server.go index 89d67de10..c349774e9 100644 --- a/server.go +++ b/server.go @@ -585,9 +585,29 @@ func NewFromConfig(logger *logrus.Logger, conf Config) (*Server, error) { ret.grpcListenAddress = conf.GrpcAddress if ret.grpcListenAddress != "" { // convert all the workers to the proper interface + var sinks []metricingester.Sink - ret.grpcServer = importsrv.New(metricingester.AggregatingIngestor{}, - importsrv.WithTraceClient(ret.TraceClient)) + sinks = append(sinks, conf.AdditionalMetricSinks...) + + for _, s := range ret.metricSinks { + sinks = append(sinks, s) + } + for _, s := range ret.plugins { + sinks = append(sinks, s) + } + + ing := metricingester.NewFlushingIngester( + conf.NumWorkers, + ret.interval, + sinks, + conf.Percentiles, + ret.HistogramAggregates.Value, + ) + ing.Start() + ret.grpcServer = importsrv.New( + ing, + importsrv.WithTraceClient(ret.TraceClient), + ) } logger.WithField("config", conf).Debug("Initialized server") diff --git a/server_test.go b/server_test.go index 3a438178e..f24b3555b 100644 --- a/server_test.go +++ b/server_test.go @@ -134,6 +134,12 @@ func generateMetrics() (metricValues []float64, expectedMetrics map[string]float // so that flushes to these sinks do "nothing". func setupVeneurServer(t testing.TB, config Config, transport http.RoundTripper, mSink sinks.MetricSink, sSink sinks.SpanSink) *Server { logger := logrus.New() + if mSink == nil { + // Install a blackhole sink if we have no other sinks + bhs, _ := blackhole.NewBlackholeMetricSink() + mSink = bhs + } + config.AdditionalMetricSinks = append(config.AdditionalMetricSinks, mSink) server, err := NewFromConfig(logger, config) if err != nil { t.Fatal(err) @@ -150,11 +156,6 @@ func setupVeneurServer(t testing.TB, config Config, transport http.RoundTripper, trace.NeutralizeClient(server.TraceClient) server.TraceClient = nil - if mSink == nil { - // Install a blackhole sink if we have no other sinks - bhs, _ := blackhole.NewBlackholeMetricSink() - mSink = bhs - } server.metricSinks = append(server.metricSinks, mSink) if sSink == nil { From a20128c289b76872d32c6828b9469581372ceccf Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Mon, 19 Nov 2018 16:26:16 -0800 Subject: [PATCH 11/17] Allow host tag override in signalfx. --- sinks/signalfx/signalfx.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sinks/signalfx/signalfx.go b/sinks/signalfx/signalfx.go index 7be9e4109..6a033365e 100644 --- a/sinks/signalfx/signalfx.go +++ b/sinks/signalfx/signalfx.go @@ -193,6 +193,9 @@ METRICLOOP: // Convenience label so that inner nested loops and `continue` easil dims := map[string]string{} // Set the hostname as a tag, since SFx doesn't have a first-class hostname field dims[sfx.hostnameTag] = sfx.hostname + if metric.HostName != "" { + dims[sfx.hostnameTag] = metric.HostName + } for _, tag := range metric.Tags { kv := strings.SplitN(tag, ":", 2) key := kv[0] From 591a5f9fb8072a386457ce9699ffbe4473daaba4 Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Mon, 19 Nov 2018 17:51:42 -0800 Subject: [PATCH 12/17] Add back instrumentation. --- importsrv/server_test.go | 2 +- metricingester/aggingestor.go | 32 +++++++++++++++++++++++++------- metricingester/obs.go | 32 ++++++++++++++++++++++++++++++++ metricingester/sinkflusher.go | 25 ++++++++++++++++++++++--- 4 files changed, 80 insertions(+), 11 deletions(-) create mode 100644 metricingester/obs.go diff --git a/importsrv/server_test.go b/importsrv/server_test.go index 762db0ead..037da92ca 100644 --- a/importsrv/server_test.go +++ b/importsrv/server_test.go @@ -220,7 +220,7 @@ func test(in []*metricpb.Metric, out []samplers.TestMetric, msg string) func(*te []metricingester.Sink{cms}, []float64{0.5, 0.95}, samplers.AggregateMin|samplers.AggregateMax|samplers.AggregateCount, - metricingester.FlushChan(flushc), // override the flush ticker channel so we control when flush + metricingester.OptFlushChan(flushc), // override the flush ticker channel so we control when flush ) s := importsrv.New(ing) defer s.Stop() diff --git a/metricingester/aggingestor.go b/metricingester/aggingestor.go index 8e66796d5..1ab1a79ad 100644 --- a/metricingester/aggingestor.go +++ b/metricingester/aggingestor.go @@ -4,6 +4,10 @@ import ( "context" "time" + "github.com/sirupsen/logrus" + + "github.com/stripe/veneur/trace" + "github.com/stripe/veneur/samplers" ) @@ -13,6 +17,7 @@ type AggregatingIngestor struct { ticker *time.Ticker tickerC <-chan time.Time quit chan struct{} + logger *logrus.Logger } type flusher interface { @@ -22,13 +27,20 @@ type flusher interface { type ingesterOption func(AggregatingIngestor) AggregatingIngestor // Override the ticker channel that triggers flushing. Useful for testing. -func FlushChan(tckr <-chan time.Time) ingesterOption { +func OptFlushChan(tckr <-chan time.Time) ingesterOption { return func(option AggregatingIngestor) AggregatingIngestor { option.tickerC = tckr return option } } +func OptLogger(logger *logrus.Logger) ingesterOption { + return func(option AggregatingIngestor) AggregatingIngestor { + option.logger = logger + return option + } +} + // NewFlushingIngester creates an ingester that flushes metrics to the specified sinks. func NewFlushingIngester( workers int, @@ -46,18 +58,22 @@ func NewFlushingIngester( t := time.NewTicker(interval) ing := AggregatingIngestor{ workers: aggW, - flusher: sinkFlusher{ - sinks: sinks, - percentiles: percentiles, - aggregates: samplers.HistogramAggregates{aggregates, 4}, - }, ticker: t, tickerC: t.C, quit: make(chan struct{}), + logger: logrus.StandardLogger(), } for _, opt := range options { ing = opt(ing) } + + flusher := sinkFlusher{ + sinks: sinks, + percentiles: percentiles, + aggregates: samplers.HistogramAggregates{aggregates, 4}, + log: ing.logger, + } + ing.flusher = flusher return ing } @@ -108,7 +124,9 @@ func (a AggregatingIngestor) Stop() { func (a AggregatingIngestor) flush() { for _, w := range a.workers { go func(worker aggWorker) { - a.flusher.Flush(context.Background(), worker.Flush()) + span, ctx := trace.StartSpanFromContext(context.Background(), "flush") + defer span.Finish() + a.flusher.Flush(ctx, worker.Flush()) }(w) } } diff --git a/metricingester/obs.go b/metricingester/obs.go new file mode 100644 index 000000000..df2dc1bf8 --- /dev/null +++ b/metricingester/obs.go @@ -0,0 +1,32 @@ +package metricingester + +import ( + "context" + + "github.com/opentracing/opentracing-go" + "github.com/sirupsen/logrus" + "github.com/stripe/veneur/ssf" + "github.com/stripe/veneur/trace" +) + +func traceLogger(log *logrus.Logger, ctx context.Context) *logrus.Entry { + if span, ok := opentracing.SpanFromContext(ctx).(*trace.Span); ok { + return log. + WithField("trace_id", span.TraceID). + WithField("span_id", span.SpanID) + } + return log.WithField("trace_id", "") +} + +type mclient interface { + Add(...*ssf.SSFSample) +} + +func traceMetrics(ctx context.Context) mclient { + if span, ok := opentracing.SpanFromContext(ctx).(*trace.Span); ok { + return span + } + // this is essentially a noop client right now--we must make sure the context has a span object + // to report metrics. + return &ssf.Samples{} +} diff --git a/metricingester/sinkflusher.go b/metricingester/sinkflusher.go index afe89104f..462491fc8 100644 --- a/metricingester/sinkflusher.go +++ b/metricingester/sinkflusher.go @@ -2,25 +2,30 @@ package metricingester import ( "context" + "fmt" "time" + "github.com/sirupsen/logrus" "github.com/stripe/veneur/samplers" + "github.com/stripe/veneur/ssf" ) type sinkFlusher struct { aggregates samplers.HistogramAggregates percentiles []float64 sinks []Sink + log *logrus.Logger } -type sinkFlusherOpt func(sinkFlusher) sinkFlusher - type Sink interface { Name() string Flush(context.Context, []samplers.InterMetric) error } func (s sinkFlusher) Flush(ctx context.Context, envelope samplerEnvelope) { + logger := traceLogger(s.log, ctx) + mc := traceMetrics(ctx) + metrics := make([]samplers.InterMetric, 0, countMetrics(envelope)) // get metrics from envelope for _, sampler := range envelope.counters { @@ -43,13 +48,27 @@ func (s sinkFlusher) Flush(ctx context.Context, envelope samplerEnvelope) { return } + tags := map[string]string{"part": "post"} for _, sinkInstance := range s.sinks { // TODO(clin): Add back metrics once we finalize the metrics client pull request. go func(ms Sink) { + start := time.Now() err := ms.Flush(ctx, metrics) if err != nil { - //s.log.WithError(err).WithField("Sink", ms.Name()).Warn("Error flushing Sink") + mc.Add( + ssf.Count(fmt.Sprintf("flush.plugins.%s.error_total", ms.Name()), 1, nil), + ) + logger.WithError(err).WithField("Sink", ms.Name()).Warn("Error flushing Sink") } + mc.Add( + ssf.Timing( + fmt.Sprintf("flush.plugins.%s.total_duration_ns", ms.Name()), + time.Since(start), + time.Nanosecond, + tags, + ), + ssf.Gauge(fmt.Sprintf("flush.plugins.%s.post_metrics_total", ms.Name()), float32(len(metrics)), nil), + ) }(sinkInstance) } return From 5b7bcb810029028c9cbd0c5c70ea28397181d520 Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Mon, 19 Nov 2018 18:15:12 -0800 Subject: [PATCH 13/17] Minor cleanups: add arbitrary sinks to server initialization and fix imports. --- config.go | 4 ---- server.go | 4 ++-- server_test.go | 3 +-- tdigest/merging_digest.go | 3 ++- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/config.go b/config.go index d74e6e83b..506425899 100644 --- a/config.go +++ b/config.go @@ -1,7 +1,5 @@ package veneur -import "github.com/stripe/veneur/metricingester" - type Config struct { Aggregates []string `yaml:"aggregates"` AwsAccessKeyID string `yaml:"aws_access_key_id"` @@ -97,6 +95,4 @@ type Config struct { TraceLightstepNumClients int `yaml:"trace_lightstep_num_clients"` TraceLightstepReconnectPeriod string `yaml:"trace_lightstep_reconnect_period"` TraceMaxLengthBytes int `yaml:"trace_max_length_bytes"` - - AdditionalMetricSinks []metricingester.Sink } diff --git a/server.go b/server.go index c349774e9..95a8145d3 100644 --- a/server.go +++ b/server.go @@ -156,7 +156,7 @@ func SetLogger(logger *logrus.Logger) { // NewFromConfig creates a new veneur server from a configuration // specification and sets up the passed logger according to the // configuration. -func NewFromConfig(logger *logrus.Logger, conf Config) (*Server, error) { +func NewFromConfig(logger *logrus.Logger, conf Config, moreSinks ...metricingester.Sink) (*Server, error) { ret := &Server{} ret.Hostname = conf.Hostname @@ -587,7 +587,7 @@ func NewFromConfig(logger *logrus.Logger, conf Config) (*Server, error) { // convert all the workers to the proper interface var sinks []metricingester.Sink - sinks = append(sinks, conf.AdditionalMetricSinks...) + sinks = append(sinks, moreSinks...) for _, s := range ret.metricSinks { sinks = append(sinks, s) diff --git a/server_test.go b/server_test.go index f24b3555b..6a85ff4a9 100644 --- a/server_test.go +++ b/server_test.go @@ -139,8 +139,7 @@ func setupVeneurServer(t testing.TB, config Config, transport http.RoundTripper, bhs, _ := blackhole.NewBlackholeMetricSink() mSink = bhs } - config.AdditionalMetricSinks = append(config.AdditionalMetricSinks, mSink) - server, err := NewFromConfig(logger, config) + server, err := NewFromConfig(logger, config, mSink) if err != nil { t.Fatal(err) } diff --git a/tdigest/merging_digest.go b/tdigest/merging_digest.go index 1272b83f1..0d9fe5f64 100644 --- a/tdigest/merging_digest.go +++ b/tdigest/merging_digest.go @@ -10,11 +10,12 @@ package tdigest import ( "bytes" "encoding/gob" - "github.com/pkg/errors" "io" "math" "math/rand" "sort" + + "github.com/pkg/errors" ) // A t-digest using the merging implementation. MergingDigest is not safe for From 52258130c135cf64b4c22566ed5cdbc1e89e5ab2 Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Mon, 19 Nov 2018 18:29:54 -0800 Subject: [PATCH 14/17] Fix race condition due to unthreadsafe trace client. --- metricingester/aggingestor.go | 10 +++++++++ metricingester/obs.go | 14 ------------ metricingester/sinkflusher.go | 42 +++++++++++++++++++---------------- server.go | 1 + 4 files changed, 34 insertions(+), 33 deletions(-) diff --git a/metricingester/aggingestor.go b/metricingester/aggingestor.go index 1ab1a79ad..d9e1c82e3 100644 --- a/metricingester/aggingestor.go +++ b/metricingester/aggingestor.go @@ -18,6 +18,7 @@ type AggregatingIngestor struct { tickerC <-chan time.Time quit chan struct{} logger *logrus.Logger + tc *trace.Client } type flusher interface { @@ -41,6 +42,13 @@ func OptLogger(logger *logrus.Logger) ingesterOption { } } +func OptTraceClient(tc *trace.Client) ingesterOption { + return func(option AggregatingIngestor) AggregatingIngestor { + option.tc = tc + return option + } +} + // NewFlushingIngester creates an ingester that flushes metrics to the specified sinks. func NewFlushingIngester( workers int, @@ -62,6 +70,7 @@ func NewFlushingIngester( tickerC: t.C, quit: make(chan struct{}), logger: logrus.StandardLogger(), + tc: trace.DefaultClient, } for _, opt := range options { ing = opt(ing) @@ -72,6 +81,7 @@ func NewFlushingIngester( percentiles: percentiles, aggregates: samplers.HistogramAggregates{aggregates, 4}, log: ing.logger, + tc: ing.tc, } ing.flusher = flusher return ing diff --git a/metricingester/obs.go b/metricingester/obs.go index df2dc1bf8..b4347d061 100644 --- a/metricingester/obs.go +++ b/metricingester/obs.go @@ -5,7 +5,6 @@ import ( "github.com/opentracing/opentracing-go" "github.com/sirupsen/logrus" - "github.com/stripe/veneur/ssf" "github.com/stripe/veneur/trace" ) @@ -17,16 +16,3 @@ func traceLogger(log *logrus.Logger, ctx context.Context) *logrus.Entry { } return log.WithField("trace_id", "") } - -type mclient interface { - Add(...*ssf.SSFSample) -} - -func traceMetrics(ctx context.Context) mclient { - if span, ok := opentracing.SpanFromContext(ctx).(*trace.Span); ok { - return span - } - // this is essentially a noop client right now--we must make sure the context has a span object - // to report metrics. - return &ssf.Samples{} -} diff --git a/metricingester/sinkflusher.go b/metricingester/sinkflusher.go index 462491fc8..d4daa90a7 100644 --- a/metricingester/sinkflusher.go +++ b/metricingester/sinkflusher.go @@ -5,9 +5,12 @@ import ( "fmt" "time" + "github.com/stripe/veneur/trace/metrics" + "github.com/sirupsen/logrus" "github.com/stripe/veneur/samplers" "github.com/stripe/veneur/ssf" + "github.com/stripe/veneur/trace" ) type sinkFlusher struct { @@ -15,6 +18,7 @@ type sinkFlusher struct { percentiles []float64 sinks []Sink log *logrus.Logger + tc *trace.Client } type Sink interface { @@ -24,50 +28,50 @@ type Sink interface { func (s sinkFlusher) Flush(ctx context.Context, envelope samplerEnvelope) { logger := traceLogger(s.log, ctx) - mc := traceMetrics(ctx) - - metrics := make([]samplers.InterMetric, 0, countMetrics(envelope)) - // get metrics from envelope + ms := make([]samplers.InterMetric, 0, countMetrics(envelope)) + // get ms from envelope for _, sampler := range envelope.counters { - metrics = append(metrics, sampler.Flush(time.Second)...) + ms = append(ms, sampler.Flush(time.Second)...) } for _, sampler := range envelope.sets { - metrics = append(metrics, sampler.Flush()...) + ms = append(ms, sampler.Flush()...) } for _, sampler := range envelope.gauges { - metrics = append(metrics, sampler.Flush()...) + ms = append(ms, sampler.Flush()...) } for _, sampler := range envelope.histograms { - metrics = append(metrics, sampler.Flush(time.Second, s.percentiles, s.aggregates, true)...) + ms = append(ms, sampler.Flush(time.Second, s.percentiles, s.aggregates, true)...) } for _, sampler := range envelope.mixedHistograms { - metrics = append(metrics, sampler.Flush(s.percentiles, s.aggregates, envelope.mixedHosts)...) + ms = append(ms, sampler.Flush(s.percentiles, s.aggregates, envelope.mixedHosts)...) } - if len(metrics) == 0 { + if len(ms) == 0 { return } tags := map[string]string{"part": "post"} for _, sinkInstance := range s.sinks { - // TODO(clin): Add back metrics once we finalize the metrics client pull request. - go func(ms Sink) { + // TODO(clin): Add back ms once we finalize the ms client pull request. + go func(sink Sink) { + samples := &ssf.Samples{} + defer metrics.Report(s.tc, samples) start := time.Now() - err := ms.Flush(ctx, metrics) + err := sink.Flush(ctx, ms) if err != nil { - mc.Add( - ssf.Count(fmt.Sprintf("flush.plugins.%s.error_total", ms.Name()), 1, nil), + samples.Add( + ssf.Count(fmt.Sprintf("flush.plugins.%s.error_total", sink.Name()), 1, nil), ) - logger.WithError(err).WithField("Sink", ms.Name()).Warn("Error flushing Sink") + logger.WithError(err).WithField("Sink", sink.Name()).Warn("Error flushing Sink") } - mc.Add( + samples.Add( ssf.Timing( - fmt.Sprintf("flush.plugins.%s.total_duration_ns", ms.Name()), + fmt.Sprintf("flush.plugins.%s.total_duration_ns", sink.Name()), time.Since(start), time.Nanosecond, tags, ), - ssf.Gauge(fmt.Sprintf("flush.plugins.%s.post_metrics_total", ms.Name()), float32(len(metrics)), nil), + ssf.Gauge(fmt.Sprintf("flush.plugins.%s.post_metrics_total", sink.Name()), float32(len(ms)), nil), ) }(sinkInstance) } diff --git a/server.go b/server.go index 95a8145d3..24cb3136d 100644 --- a/server.go +++ b/server.go @@ -602,6 +602,7 @@ func NewFromConfig(logger *logrus.Logger, conf Config, moreSinks ...metricingest sinks, conf.Percentiles, ret.HistogramAggregates.Value, + metricingester.OptTraceClient(ret.TraceClient), ) ing.Start() ret.grpcServer = importsrv.New( From 7571656eeaa97ca42289a630fdc320ac215ecbe2 Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Thu, 22 Nov 2018 11:01:33 -0800 Subject: [PATCH 15/17] Remove unnecessary hostname passing to mixedhisto. --- metricingester/aggworker.go | 2 +- samplers/mixedhisto.go | 8 -------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/metricingester/aggworker.go b/metricingester/aggworker.go index a669eff47..41c033a8d 100644 --- a/metricingester/aggworker.go +++ b/metricingester/aggworker.go @@ -101,7 +101,7 @@ func (a aggWorker) merge(d Digest) { case mixedHistoDigest: key := d.MixedKey() if _, present := a.samplers.mixedHistograms[key]; !present { - a.samplers.mixedHistograms[key] = samplers.NewMixedHisto(d.name, d.tags, samplers.OptMixedHistoHostname(d.hostname)) + a.samplers.mixedHistograms[key] = samplers.NewMixedHisto(d.name, d.tags) } a.samplers.mixedHistograms[key].Merge(d.hostname, d.histodigest) if d.flushMixed { diff --git a/samplers/mixedhisto.go b/samplers/mixedhisto.go index 593933fa2..d3cf364d1 100644 --- a/samplers/mixedhisto.go +++ b/samplers/mixedhisto.go @@ -11,13 +11,6 @@ import ( type optMixedHisto func(MixedHisto) MixedHisto -func OptMixedHistoHostname(hn string) optMixedHisto { - return func(histo MixedHisto) MixedHisto { - histo.hostname = hn - return histo - } -} - // NewMixedHisto creates a new mixed histogram. func NewMixedHisto(name string, tags []string, opts ...optMixedHisto) MixedHisto { m := MixedHisto{ @@ -46,7 +39,6 @@ func NewMixedHisto(name string, tags []string, opts ...optMixedHisto) MixedHisto // Note that we don't support the "median" aggregate for mixed histograms. type MixedHisto struct { histo *Histo - hostname string min map[string]float64 max map[string]float64 weight map[string]float64 From a9ee9de80c603923d0e90edd1307088e7846bc8f Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Thu, 22 Nov 2018 11:21:06 -0800 Subject: [PATCH 16/17] Code review feedback. --- importsrv/server_test.go | 35 ----------------------------------- metricingester/obs.go | 5 +++-- metricingester/sinkflusher.go | 1 - 3 files changed, 3 insertions(+), 38 deletions(-) diff --git a/importsrv/server_test.go b/importsrv/server_test.go index 037da92ca..5050d9a11 100644 --- a/importsrv/server_test.go +++ b/importsrv/server_test.go @@ -257,38 +257,3 @@ func test(in []*metricpb.Metric, out []samplers.TestMetric, msg string) func(*te ) } } - -//func TestOptions_WithTraceClient(t *testing.T) { -// c, err := trace.NewClient(trace.DefaultVeneurAddress) -// if err != nil { -// t.Fatalf("failed to initialize a trace client: %v", err) -// } -// -// s := New([]MetricIngester{}, WithTraceClient(c)) -// assert.Equal(t, c, s.opts.traceClient, "WithTraceClient didn't correctly "+ -// "set the trace client") -//} -// -//func BenchmarkImportServerSendMetrics(b *testing.B) { -// rand.Seed(time.Now().Unix()) -// -// metrics := metrictest.RandomForwardMetrics(10000) -// for _, inputSize := range []int{10, 100, 1000, 10000} { -// ingesters := make([]MetricIngester, 100) -// for i := range ingesters { -// ingester := newNoopChannelMetricIngester() -// ingester.start() -// defer ingester.stop() -// ingesters[i] = ingester -// } -// s := New(ingesters) -// ctx := context.Background() -// input := &forwardrpc.MetricList{Metrics: metrics[:inputSize]} -// -// b.Run(fmt.Sprintf("InputSize=%d", inputSize), func(b *testing.B) { -// for i := 0; i < b.N; i++ { -// s.SendMetrics(ctx, input) -// } -// }) -// } -//} diff --git a/metricingester/obs.go b/metricingester/obs.go index b4347d061..0ccff519b 100644 --- a/metricingester/obs.go +++ b/metricingester/obs.go @@ -2,6 +2,7 @@ package metricingester import ( "context" + "strconv" "github.com/opentracing/opentracing-go" "github.com/sirupsen/logrus" @@ -11,8 +12,8 @@ import ( func traceLogger(log *logrus.Logger, ctx context.Context) *logrus.Entry { if span, ok := opentracing.SpanFromContext(ctx).(*trace.Span); ok { return log. - WithField("trace_id", span.TraceID). - WithField("span_id", span.SpanID) + WithField("trace_id", strconv.FormatInt(span.TraceID, 16)). + WithField("span_id", strconv.FormatInt(span.SpanID, 16)) } return log.WithField("trace_id", "") } diff --git a/metricingester/sinkflusher.go b/metricingester/sinkflusher.go index d4daa90a7..956349fb6 100644 --- a/metricingester/sinkflusher.go +++ b/metricingester/sinkflusher.go @@ -52,7 +52,6 @@ func (s sinkFlusher) Flush(ctx context.Context, envelope samplerEnvelope) { tags := map[string]string{"part": "post"} for _, sinkInstance := range s.sinks { - // TODO(clin): Add back ms once we finalize the ms client pull request. go func(sink Sink) { samples := &ssf.Samples{} defer metrics.Report(s.tc, samples) From b5404ab63c8627afab25edf9af90dbf3a676e0d4 Mon Sep 17 00:00:00 2001 From: Chen Lin Date: Thu, 22 Nov 2018 11:22:51 -0800 Subject: [PATCH 17/17] Add benchmark. --- metricingester/aggworker_test.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 metricingester/aggworker_test.go diff --git a/metricingester/aggworker_test.go b/metricingester/aggworker_test.go new file mode 100644 index 000000000..e02418b0b --- /dev/null +++ b/metricingester/aggworker_test.go @@ -0,0 +1,15 @@ +package metricingester + +import "testing" + +func BenchmarkWorkerIngest(b *testing.B) { + w := newAggWorker() + w.Start() + // we don't want slice allocation to be part of the benchmark since this is normally done + // outside of creating a metric. + tags := []string{"c:d", "f:g", "a:b"} + b.ResetTimer() + for i := 0; i < b.N; i++ { + w.Ingest(NewCounter("mycounter", 100, tags, 1.0, "myhost")) + } +}