morbius/config.example.yaml at main · sapslaj/morbius · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
# Top level server configuration. Mainly used for configuring enabled flow
# protocols and ports
server:

  # At least one flow protocol should be enabled.
  sflow:
    # Enable listeners with `enable: true`
    enable: true

    # Each protocol has a default port associated with it, but it's overridable
    # by setting `port`
    port: 6343

    # goflow spins up workers for each flow protocol. By default it is 1 worker
    # but it might be useful to add more on multi-core systems
    workers: 2

    # enables port reuse via SO_REUSEADDR and SO_REUSEPORT
    reuse_port: false

  netflowv5:
    # By default listeners are disabled, but they can also be disabled
    # explicitly here.
    enable: false

  netflowv9:
    enable: true

    # Each server listens on 0.0.0.0 (all interfaces) by default but that's
    # tweakable
    addr: 127.0.0.1

  # Embeded HTTP server is optional, but necessary if you want Prometheus
  # metrics or profiling information.
  http:
    enable: true

    # All options of other protocols are accepted here except for `workers` and
    # `reuse_port`
    address: 0.0.0.0
    port: 9269

# Transport/Dispatch settings
transport:

  # There are three different dispatch methods: `linear`, `worker_pool`, and
  # `goroutine`. `linear` is essentially single threaded and will process each
  # message in the order received one at a time. `linear` is the slowest but
  # least resource intensive. `worker_pool` starts a worker pool and doles out
  # flow messages to workers. `worker_pool` is the best balance of performance
  # and resource utilization. `goroutine` spins up a new Goroutine for each flow
  # message received. It is the most performant but can cause cascading failures
  # during periods of resource contention.
  dispatch_method: worker_pool

  # When used with `dispatch_method: worker_pool` this will set the number of
  # workers. This value can be given as a number (for a static value) or as a
  # string which is parsed as a Go template. Currently the only variable
  # provided is `NumCPU` which is the number of the CPUs the current system has.
  # Includes sprig template functions.
  worker_count: '{{ .NumCPU }}'

  # When used with `dispatch_method: worker_pool` this is the channel used to
  # send flow messages to workers can be given a buffer length. Like
  # `worker_count` is can be a static number or a template string.
  message_buffer: '{{ mul .NumCPU 4 }}'

  # When used with `dispatch_method: goroutine` this will set a maximum number
  # of concurrent Goroutines doing flow message processing. Flow messages
  # received after hitting this ceiling will be dropped. Setting this value to 0
  # disables the ceiling and allows for as many Goroutines as the system can
  # handle until it crashes. Like `worker_count` is can be a static number or a
  # template string.
  max_goroutines: 1000

  # Will execute all pushes to destinations concurrently. Nice performance bump
  # if your system has the CPUs to spare.
  parallelize_destinations: true

# Enricher config
enrichers:

  # Setting any setting, even a nonsensical one, is good enough to enable it
  addr_type:
    not_enabled: false

  rdns:
    # Enables RDNS LRU lookup cache. This is _very_ highly recommended as
    # otherwise every single flow makes a network request to do a lookup.
    enable_cache: true

    # Cache size. Default is 128 but increasing it can being performance
    # benefits at the cost of memory
    cache_size: 2048

    # Will only append hostname information to flows where the address is
    # already in the cache. This is only necessary if you need to squeeze the
    # most performance out at the cost of accuracy on flows with addresses not
    # in the cache.
    cache_only: false

  maxmind_db:
    # Enables the MaxmindDB LRU lookup cache. This isn't strictly necessary
    # especially on machines backed by an SSD since disk access is so fast. This
    # is most useful on machines where disk access is slow like NFS or, god
    # forbid, a 5400RPM HDD.
    enable_cache: true

    # Cache size. Default is 128 but increasing it can being marginal
    # performance benefits at the cost of memory.
    cache_size: 128

    # Sets the preferred language used for names. If that language is not
    # available it will fall back to "en" (English).
    locale: en

    # Paths to GeoIP2 or GeoLite2 mmdb databases. When given multiple DBs the
    # enricher will merge the results together in the order given with the last
    # one taking precedence.
    database_paths:
      - /opt/MaxmindDB/GeoLite2-ASN.mmdb
      - /opt/MaxmindDB/GeoLite2-City.mmdb

    # Enable groups of fields to populate each flow. It's usually a good idea to
    # match these up with the databases you have.
    enabled_field_groups:
      - asn
      - city

    # You can also append on any extra fields you want, or leave out the
    # `enabled_field_groups` and set all of the enabled fields here.
    enabled_fields:
      - asn_org
      - city_name
      - country_name

  netdb:
    ethertypes:
      # Enable the built-in EtherType database.
      built_in: true

      # You can also load /etc/ethertypes from the local file system.
      source_files:
        - /etc/ethertypes

      # ... or define your own in the same format as /etc/ethertypes.
      source_inline:
        - PROFINET 8892

      # Set aliases to use in the final flow value instead of the
      # defined name. Since the /etc/ethertypes standard doesn't allow
      # for spaces in the name, you can set your own name _with_
      # spaces here if you want. The key is the protocol name (or one
      # of its built-in aliases) and the value is the value you want.
      name_aliases:
        IPv4: Internet Protocol version 4 (IPv4)

    protocols:
      # Protocols supports the same parameters as `ethertypes`, but
      # instead is relevant for /etc/protocols
      built_in: true
      source_files:
        - /etc/protocols

    services:
      # The service database depends on protocol database to also be
      # populated, otherwise lookups won't work correctly. Make sure
      # all of the associated protocols are defined for all of the
      # services you want defined.
      built_in: true
      source_files:
        - /etc/services

  field_mapper:
    # YAML anchors are supported and are very useful in the
    # `field_mapper` config.
    fields:
      - source_field: sampler_address
        target_field: sampler_name
        mapping:
          10.0.0.1: router1
          10.0.0.2: router2
      - source_field: out_interface
        target_field: out_interface_name
        template: &interface_template >
          {{ if eq .Msg.sampler_name "router1" }}
          igb{{ .SourceField }}
          {{ else if eq .Msg.sampler_name "router2" }}
          eth{{ .SourceField }}
          {{ end }}
      - source_field: in_interface
        target_field: in_interface_name
        template: *interface_template

# Destination config
destinations:

  # The stdout destination is good for debugging or if you want to redirect
  # stdout to a file for storage. If you are using Loki or Elasticsearch for log
  # collection it's considered more efficient to use the dedicated destinations
  # for those.
  stdout:

    # `json` and `logfmt` are supported
    format: json

  # Elasticsearch destination config
  elasticsearch:

    # The ES index to use
    index: netflow

    # The timestamp field is added based on the time the flow was processed. The
    # default is `@timestamp`.
    timestamp_field: '@timestamp'

    # By default the ES client will use a bulk indexer in conjunction with the
    # _bulk endpoint. If you want to instead use regular indexing endpoints and
    # index each flow one at a time, set `synchronous_indexing: true`. This is
    # usually not a good idea and puts undue stress on the cluster, but hey,
    # it's your infra!
    synchronous_indexing: false

    # Elasticsearch endpoints. By default the client will read from the
    # ELASTICSEARCH_URL environment variable.
    addresses:
      - http://elasticsearch:9200

  # Loki destination config
  loki:

    # Push URL for loki. This is set up assuming it is running in Kubernetes but
    # by default it will try to connect to localhost:3100.
    push_url: http://loki.monitoring.svc.cluster.local:3100/loki/api/v1/push

    # Static/External labels are labels applied to every log entry. Stuff like
    # the `job` label or what cluster this is running in are good things to put
    # here.
    static_labels:
      job: netflow
      cluster: letolab-us-east-1

    # Dynamic labels are labels extracted from flow messages and added to the
    # corresponding log entries. It's better to be conservative since Loki
    # really hates high cardinality labels but query performance can be really
    # attrocious without a handful of labels indexed. Experiment and find the
    # right balance for your workload.
    dynamic_labels:
      - dst_addr
      - src_addr

    # The Loki client is based on Promtail and uses batch pushes to write to
    # Loki. `batch_wait` is the max amount of time to wait before sending a
    # batch and `batch_size` is the max batch size (in bytes) to accumulate
    # before forcefully sending the batch. The defaults are the same as
    # Promtail's defaults.
    batch_wait: 1s
    batch_size: 1048576

  # Prometheus destination config
  prometheus:

    # Metrics namespace. Default is `netflow`
    namespace: netflow

    # Enables aggregate byte counting
    count_bytes: true

    # Enables aggregate packet counting
    count_packets: true

    # Enables aggregate flow counting
    count_flows: true

    # Labels to use for aggregate metrics. Prometheus does not like lots of high
    # cardinality metrics so unless you have shitloads of RAM it's a good idea
    # to be conservative here and join on the  `_ip_info` metric described
    # below.
    metric_labels:
      - dst_addr
      - src_addr

    # Enables generating an `_ip_info` info metric containing all of the labels
    # in the `ip_info_labels` config option. This is useful for memory
    # constrained Prometheus servers that can't handle very high cardinalities
    # in the aggregate metrics. These info metrics can be joined on to query
    # address information instead of duplicating and storing it in metric
    # labels. The downside is joins in PromQL are a pain the ass so query
    # complexity goes way up.
    export_ip_info: true
    ip_info_labels:
      - addr
      - hostname
      - asn_org
      - asn
      - city_name
      - continent_name
      - country_name
      - loc_lat
      - loc_long
      - organization

    # Enables generating histogram metrics based on the flow start and end
    # timestamps in the flow. The resolution of the flow timestamps are 1 second
    # so this not very useful in most circumstances as most flows are shorter
    # than 1 second.
    observe_flow_duration: false

    # If `observe_flow_duration` is enabled, `flow_duration_buckets` sets the
    # histogram buckets. Defaults to prometheus.DefBuckets
    flow_duration_buckets: [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]

    # The Prometheus destination implements its own metrics persistence in order
    # to conserve memory. One of the benefits of this is there is a "GC" process
    # that runs on a certain interval (`gc_interval`) that will clear away
    # metrics which haven't been touched in a certain time period
    # `visibility_timeout`. After a metric has been GC'd and it gets hit again
    # it will register as a reset to zero to Promtheus so `rate` queries will be
    # handled gracefully.
    gc_interval: 15s
    visibility_timeout: 5m