From f67f16f6b6c16b195b8321c2952d2f7624026f7b Mon Sep 17 00:00:00 2001 From: Luka Lovosevic <62242783+lukalovosevic@users.noreply.github.com> Date: Tue, 5 Mar 2024 08:37:16 +0100 Subject: [PATCH 1/3] Update README.md Add reference to a sample JSON dashboard --- monitoring/datadog-agent/README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/monitoring/datadog-agent/README.md b/monitoring/datadog-agent/README.md index b16c578..af1c402 100644 --- a/monitoring/datadog-agent/README.md +++ b/monitoring/datadog-agent/README.md @@ -1,9 +1,11 @@ -# Datadog Openmetrics Configuration Example +# Datadog Openmetrics Configuration and Dashboard Example Datadog provides an agent that will scrape metrics across various sources including an Openmetrics endpoint like the one provided by Rockset. You can configure this agent to scrape metrics from your Rockset metrics endpoint using the Openmetrics integration. +You can also import a sample dashboard that visualizes these metrics to get you started quickly. ## Dependencies 1. Datadog Agent version 6.6.0 and above 2. Enable Rockset metrics endpoint +3. Access to a Datadog account ## Instructions Copy the [example](./openmetrics.d/conf.yaml) from this repository to your agent's host. The location of this configuration file on your agent host will generally be located at: @@ -13,6 +15,8 @@ Copy the [example](./openmetrics.d/conf.yaml) from this repository to your agent The syntax is very specific and please note any differences between your file and the example provided. For exampe, the password must be an empty string in order to be encoded properly. +You can import the sample dashboard into your Datadog account by following the instructions here: https://docs.datadoghq.com/dashboards/#copy-import-or-export-dashboard-json + ## References - https://rockset.com/docs/monitoring-and-alerting/ -- https://docs.datadoghq.com/integrations/openmetrics/ \ No newline at end of file +- https://docs.datadoghq.com/integrations/openmetrics/ From 6a35656cf2ad3ae2a1f4962195f4eeb01b890df2 Mon Sep 17 00:00:00 2001 From: Luka Lovosevic <62242783+lukalovosevic@users.noreply.github.com> Date: Tue, 5 Mar 2024 08:39:48 +0100 Subject: [PATCH 2/3] Update README.md Added screenshot --- monitoring/datadog-agent/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/monitoring/datadog-agent/README.md b/monitoring/datadog-agent/README.md index af1c402..695010c 100644 --- a/monitoring/datadog-agent/README.md +++ b/monitoring/datadog-agent/README.md @@ -17,6 +17,8 @@ The syntax is very specific and please note any differences between your file an You can import the sample dashboard into your Datadog account by following the instructions here: https://docs.datadoghq.com/dashboards/#copy-import-or-export-dashboard-json +![image](https://github.com/lukalovosevic/community/assets/62242783/850dd6a2-f9d8-444a-9faa-7ecea01b6268) + ## References - https://rockset.com/docs/monitoring-and-alerting/ - https://docs.datadoghq.com/integrations/openmetrics/ From cb0d64662db32201a6bd82edf42dcf02a6f14430 Mon Sep 17 00:00:00 2001 From: Luka Lovosevic <62242783+lukalovosevic@users.noreply.github.com> Date: Tue, 5 Mar 2024 08:42:18 +0100 Subject: [PATCH 3/3] Updated Datadog agent configuration and sample dashboard --- .../RocksetMetricsDashboard.json | 1 + .../datadog-agent/openmetrics.d/conf.yaml | 28 +++++++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 monitoring/datadog-agent/openmetrics.d/RocksetMetricsDashboard.json diff --git a/monitoring/datadog-agent/openmetrics.d/RocksetMetricsDashboard.json b/monitoring/datadog-agent/openmetrics.d/RocksetMetricsDashboard.json new file mode 100644 index 0000000..565241e --- /dev/null +++ b/monitoring/datadog-agent/openmetrics.d/RocksetMetricsDashboard.json @@ -0,0 +1 @@ +{"title":"Rockset Metrics Dashboard","description":"","widgets":[{"id":1246456925018978,"definition":{"type":"image","url":"https://rockset-demo-ecommerce.s3.us-west-2.amazonaws.com/rockset.svg","sizing":"scale-down","has_background":false,"has_border":false,"vertical_align":"center","horizontal_align":"center"}},{"id":7186493897479482,"definition":{"title":"Collection Details","background_color":"purple","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":6058642202006128,"definition":{"title":"Number of Collections per Workspace","title_size":"16","title_align":"left","type":"query_table","requests":[{"response_format":"scalar","queries":[{"name":"query1","data_source":"metrics","query":"sum:rockset.rockset_collections{*} by {workspace_name}","aggregator":"avg"}],"formulas":[{"cell_display_mode":"number","alias":"# of coll.","formula":"query1","limit":{"count":500,"order":"desc"},"number_format":{"unit":{"type":"canonical_unit","unit_name":"item"}}}]}],"has_search_bar":"auto"}},{"id":11,"definition":{"title":"Number of Documents per Collection","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_collection_documents{$scope} by {collection_name}"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"area"}]}},{"id":8,"definition":{"title":"Collection Size in Bytes","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1","number_format":{"unit":{"type":"canonical_unit","unit_name":"byte"}}}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_collection_size_bytes{$scope} by {collection_name}"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"area"}]}},{"id":8709473059158392,"definition":{"title":"Collection Size in Bytes","type":"query_table","requests":[{"response_format":"scalar","queries":[{"data_source":"metrics","name":"query1","query":"max:rockset.rockset_collection_size_bytes{*} by {collection_name}","aggregator":"avg"}],"formulas":[{"cell_display_mode":"bar","alias":"Coll. size","formula":"query1","limit":{"count":500,"order":"desc"},"number_format":{"unit":{"type":"canonical_unit","unit_name":"byte"}}}]}],"has_search_bar":"auto"}}]}},{"id":4564612793581622,"definition":{"title":"Virtual Instance Details","background_color":"purple","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":4,"definition":{"title":"VI CPU Utilization","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"percent"}},"formula":"query1"},{"number_format":{"unit":{"type":"canonical_unit","unit_name":"percent"}},"formula":"100 * query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"avg:rockset.rockset_leaf_cpu_utilization_percentage{$scope} by {virtual_instance_id}"}],"response_format":"timeseries","style":{"palette":"purple","order_reverse":false,"line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"scale":"linear","min":"0","max":"100"}}},{"id":2168985221699616,"definition":{"title":"VI Memory Utilization","title_size":"16","title_align":"left","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"percent"}},"formula":"query1"},{"number_format":{"unit":{"type":"canonical_unit","unit_name":"percent"}},"formula":"100 * query1"}],"queries":[{"name":"query1","data_source":"metrics","query":"avg:rockset.rockset_leaf_memory_utilization_percentage{*} by {virtual_instance_id}"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"line"}],"yaxis":{"include_zero":true,"min":"0","max":"100"}}}]}},{"id":5479815899399802,"definition":{"title":"Ingestion Details","background_color":"purple","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":10,"definition":{"title":"Total Ingested Bytes per Collection","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1","number_format":{"unit":{"type":"canonical_unit","unit_name":"byte"}}}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_collection_total_ingest_bytes.count{$scope} by {collection_name}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"area"}]}},{"id":22,"definition":{"title":"Parse Errors per Collection","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_collection_parse_errors.count{$scope} by {collection_name}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"line"}]}},{"id":18,"definition":{"title":"Ingest Transformation Errors per Collection","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_collection_ingest_transformation_errors.count{$scope}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"line"}]}},{"id":29,"definition":{"title":"Data Discovery Latency per Collection (ms)","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"millisecond"}},"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"avg:rockset.rockset_collection_data_discovery_latency.bucket{$scope} by {collection_name}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}},{"id":14,"definition":{"title":"Data Processing Latency per Collection (ms)","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"number_format":{"unit":{"type":"canonical_unit","unit_name":"millisecond"}},"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"avg:rockset.rockset_collection_data_process_latency.bucket{$scope} by {collection_name}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}}]}},{"id":8525032580543298,"definition":{"title":"Query Metrics","background_color":"purple","show_title":true,"type":"group","layout_type":"ordered","widgets":[{"id":1,"definition":{"title":"Requested Queries","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_queries.count{virtual_instance_id:*} by {virtual_instance_id}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}},{"id":17,"definition":{"title":"Query Queue","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1","number_format":{"unit":{"type":"canonical_unit","unit_name":"item"}}}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_query_queue_size{$scope} by {virtual_instance_id}"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}},{"id":6,"definition":{"title":"Query Errors","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_query_errors.count{$scope} by {virtual_instance_id}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}},{"id":28,"definition":{"title":"Query Admission Control (seconds)","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1","number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}}}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_query_admission_latency_seconds.sum{$scope} by {virtual_instance_id}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}},{"id":0,"definition":{"title":"Query Lambda Latency (ms)","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1","number_format":{"unit":{"type":"custom_unit_label","label":"ms"}}}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_query_lambda_latency_seconds.bucket{*} by {query_lambda}.as_rate()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}],"yaxis":{"include_zero":false}}},{"id":12,"definition":{"title":"Query Lambda Errors","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1"}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_query_lambda_errors.count{$scope} by {query_lambda}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}},{"id":27,"definition":{"title":"Query Lambda Admission Control (seconds)","show_legend":true,"legend_layout":"auto","legend_columns":["avg","min","max","value","sum"],"type":"timeseries","requests":[{"formulas":[{"formula":"query1","number_format":{"unit":{"type":"canonical_unit","unit_name":"second"}}}],"queries":[{"data_source":"metrics","name":"query1","query":"sum:rockset.rockset_query_lambda_admission_latency_seconds.sum{$scope} by {query_lambda}.as_count()"}],"response_format":"timeseries","style":{"palette":"purple","line_type":"solid","line_width":"normal"},"display_type":"bars"}]}}]}}],"template_variables":[{"name":"scope","available_values":[],"default":"*"}],"layout_type":"ordered","notify_list":[],"reflow_type":"auto"} \ No newline at end of file diff --git a/monitoring/datadog-agent/openmetrics.d/conf.yaml b/monitoring/datadog-agent/openmetrics.d/conf.yaml index bf352c6..1e0adae 100644 --- a/monitoring/datadog-agent/openmetrics.d/conf.yaml +++ b/monitoring/datadog-agent/openmetrics.d/conf.yaml @@ -2,10 +2,32 @@ init_config: service: rockset instances: - - openmetrics_endpoint: https://api.rs2.usw2.rockset.com/v1/orgs/self/metrics + - openmetrics_endpoint: https://api.usw2a1.rockset.com/v1/orgs/self/metrics ### make sure the api endpoint matches your region's endpoint url namespace: rockset extra_headers: - authorization: Apikey + authorization: Apikey + max_returned_metrics: 50000 metrics: - - .+ + - rockset_leaf_cpu_utilization_percentage + - rockset_leaf_memory_utilization_percentage + - rockset_agg_cpu_utilization_percentage + - rockset_agg_memory_utilization_percentage + - rockset_collections + - rockset_collection_size_bytes + - rockset_collection_documents + - rockset_collection_total_ingest_bytes + - rockset_collection_parse_errors + - rockset_collection_data_discovery_latency + - rockset_collection_data_process_latency + - rockset_data_discovery_latency + - rockset_data_process_latency + - rockset_queries + - rockset_query_latency_seconds + - rockset_query_admission_latency_seconds + - rockset_query_queue_size + - rockset_query_errors + - rockset_query_lambda_queries + - rockset_query_lambda_latency_seconds + - rockset_query_lambda_admission_latency_seconds + - rockset_query_lambda_errors \ No newline at end of file