diff --git a/monitoring/datadog-agent/README.md b/monitoring/datadog-agent/README.md index b16c578..695010c 100644 --- a/monitoring/datadog-agent/README.md +++ b/monitoring/datadog-agent/README.md @@ -1,9 +1,11 @@ -# Datadog Openmetrics Configuration Example +# Datadog Openmetrics Configuration and Dashboard Example Datadog provides an agent that will scrape metrics across various sources including an Openmetrics endpoint like the one provided by Rockset. You can configure this agent to scrape metrics from your Rockset metrics endpoint using the Openmetrics integration. +You can also import a sample dashboard that visualizes these metrics to get you started quickly. ## Dependencies 1. Datadog Agent version 6.6.0 and above 2. Enable Rockset metrics endpoint +3. Access to a Datadog account ## Instructions Copy the [example](./openmetrics.d/conf.yaml) from this repository to your agent's host. The location of this configuration file on your agent host will generally be located at: @@ -13,6 +15,10 @@ Copy the [example](./openmetrics.d/conf.yaml) from this repository to your agent The syntax is very specific and please note any differences between your file and the example provided. For exampe, the password must be an empty string in order to be encoded properly. +You can import the sample dashboard into your Datadog account by following the instructions here: https://docs.datadoghq.com/dashboards/#copy-import-or-export-dashboard-json + +![image](https://github.com/lukalovosevic/community/assets/62242783/850dd6a2-f9d8-444a-9faa-7ecea01b6268) + ## References - https://rockset.com/docs/monitoring-and-alerting/ -- https://docs.datadoghq.com/integrations/openmetrics/ \ No newline at end of file +- https://docs.datadoghq.com/integrations/openmetrics/ diff --git a/monitoring/datadog-agent/openmetrics.d/RocksetMetricsDashboard.json b/monitoring/datadog-agent/openmetrics.d/RocksetMetricsDashboard.json new file mode 100644 index 0000000..565241e --- /dev/null +++ b/monitoring/datadog-agent/openmetrics.d/RocksetMetricsDashboard.json @@ -0,0 +1 @@ +{"title":"Rockset Metrics Dashboard","description":"","widgets":[{"id":1246456925018978,"definition":{"type":"image","url":"https://rockset-demo-ecommerce.s3.us-west-2.amazonaws.com/rockset.svg","sizing":"scale-down","has_background":false,"has_border":false,"vertical_align":"center","horizontal_align":"center"}},{"id":7186493897479482,"definition":{"title":"Collection Details","background_color":"purple","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":6058642202006128,"definition":{"title":"Number of Collections per Workspace","title_size":"16","title_align":"left","type":"query_table","requests":[{"response_format":"scalar","queries":[{"name":"query1","data_source":"metrics","query":"sum:rockset.rockset_collections{*} by {workspace_name}","aggregator":"avg"}],"formulas":[{"cell_display_mode":"number","alias":"# of coll.","formula":"query1","limit":{"count":500,"order":"desc"},"number_format":{"unit":{"type":"canonical_unit","unit_name":"item"}}}]}],"has_search_bar":"auto"}},{"id":11,"definition":{"title":"Number of Documents per Collection","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_collection_documents{$scope} by {collection_name}"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"area"}]}},{"id":8,"definition":{"title":"Collection Size in Bytes","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1","number_format":{"unit":{"type":"canonical_unit","unit_name":"byte"}}}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_collection_size_bytes{$scope} by {collection_name}"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"area"}]}},{"id":8709473059158392,"definition":{"title":"Collection Size in Bytes","type":"query_table","requests":[{"response_format":"scalar","queries":[{"data_source":"metrics","name":"query1","query":"max:rockset.rockset_collection_size_bytes{*} by {collection_name}","aggregator":"avg"}],"formulas":[{"cell_display_mode":"bar","alias":"Coll. size","formula":"query1","limit":{"count":500,"order":"desc"},"number_format":{"unit":{"type":"canonical_unit","unit_name":"byte"}}}]}],"has_search_bar":"auto"}}]}},{"id":4564612793581622,"definition":{"title":"Virtual Instance Details","background_color":"purple","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":4,"definition":{"title":"VI CPU Utilization","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"percent"}},"formula":"query1"},{"number_format":{"unit":{"type":"canonical_unit","unit_name":"percent"}},"formula":"100 * query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"avg:rockset.rockset_leaf_cpu_utilization_percentage{$scope} by {virtual_instance_id}"}],"response_format":"timeseries","style":{"palette":"purple","order_reverse":false,"line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","min":"0","max":"100"}}},{"id":2168985221699616,"definition":{"title":"VI Memory Utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"percent"}},"formula":"query1"},{"number_format":{"unit":{"type":"canonical_unit","unit_name":"percent"}},"formula":"100 * query1"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:rockset.rockset_leaf_memory_utilization_percentage{*} by {virtual_instance_id}"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"min":"0","max":"100"}}}]}},{"id":5479815899399802,"definition":{"title":"Ingestion Details","background_color":"purple","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":10,"definition":{"title":"Total Ingested Bytes per Collection","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1","number_format":{"unit":{"type":"canonical_unit","unit_name":"byte"}}}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_collection_total_ingest_bytes.count{$scope} by {collection_name}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"area"}]}},{"id":22,"definition":{"title":"Parse Errors per Collection","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_collection_parse_errors.count{$scope} by {collection_name}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"line"}]}},{"id":18,"definition":{"title":"Ingest Transformation Errors per Collection","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_collection_ingest_transformation_errors.count{$scope}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"line"}]}},{"id":29,"definition":{"title":"Data Discovery Latency per Collection (ms)","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"millisecond"}},"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"avg:rockset.rockset_collection_data_discovery_latency.bucket{$scope} by {collection_name}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}},{"id":14,"definition":{"title":"Data Processing Latency per Collection (ms)","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"millisecond"}},"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"avg:rockset.rockset_collection_data_process_latency.bucket{$scope} by {collection_name}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}}]}},{"id":8525032580543298,"definition":{"title":"Query Metrics","background_color":"purple","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":1,"definition":{"title":"Requested Queries","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_queries.count{virtual_instance_id:*} by {virtual_instance_id}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}},{"id":17,"definition":{"title":"Query Queue","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1","number_format":{"unit":{"type":"canonical_unit","unit_name":"item"}}}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_query_queue_size{$scope} by {virtual_instance_id}"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}},{"id":6,"definition":{"title":"Query Errors","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_query_errors.count{$scope} by {virtual_instance_id}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}},{"id":28,"definition":{"title":"Query Admission Control (seconds)","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1","number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}}}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_query_admission_latency_seconds.sum{$scope} by {virtual_instance_id}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}},{"id":0,"definition":{"title":"Query Lambda Latency (ms)","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1","number_format":{"unit":{"type":"custom_unit_label","label":"ms"}}}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_query_lambda_latency_seconds.bucket{*} by {query_lambda}.as_rate()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}],"yaxis":{"include_zero":false}}},{"id":12,"definition":{"title":"Query Lambda Errors","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_query_lambda_errors.count{$scope} by {query_lambda}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}},{"id":27,"definition":{"title":"Query Lambda Admission Control (seconds)","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1","number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}}}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_query_lambda_admission_latency_seconds.sum{$scope} by {query_lambda}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}}]}}],"template_variables":[{"name":"scope","available_values":[],"default":"*"}],"layout_type":"ordered","notify_list":[],"reflow_type":"auto"} \ No newline at end of file diff --git a/monitoring/datadog-agent/openmetrics.d/conf.yaml b/monitoring/datadog-agent/openmetrics.d/conf.yaml index bf352c6..1e0adae 100644 --- a/monitoring/datadog-agent/openmetrics.d/conf.yaml +++ b/monitoring/datadog-agent/openmetrics.d/conf.yaml @@ -2,10 +2,32 @@ init_config: service: rockset instances: - - openmetrics_endpoint: https://api.rs2.usw2.rockset.com/v1/orgs/self/metrics + - openmetrics_endpoint: https://api.usw2a1.rockset.com/v1/orgs/self/metrics ### make sure the api endpoint matches your region's endpoint url namespace: rockset extra_headers: - authorization: Apikey + authorization: Apikey + max_returned_metrics: 50000 metrics: - - .+ + - rockset_leaf_cpu_utilization_percentage + - rockset_leaf_memory_utilization_percentage + - rockset_agg_cpu_utilization_percentage + - rockset_agg_memory_utilization_percentage + - rockset_collections + - rockset_collection_size_bytes + - rockset_collection_documents + - rockset_collection_total_ingest_bytes + - rockset_collection_parse_errors + - rockset_collection_data_discovery_latency + - rockset_collection_data_process_latency + - rockset_data_discovery_latency + - rockset_data_process_latency + - rockset_queries + - rockset_query_latency_seconds + - rockset_query_admission_latency_seconds + - rockset_query_queue_size + - rockset_query_errors + - rockset_query_lambda_queries + - rockset_query_lambda_latency_seconds + - rockset_query_lambda_admission_latency_seconds + - rockset_query_lambda_errors \ No newline at end of file