From 0fff495def01766226fc6412bf683f0a38761dc9 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Tue, 11 Mar 2025 10:37:05 -0400 Subject: [PATCH 1/5] Generate Metrics Tables --- lib/manifold/api/schema_manager.rb | 30 +++++++++++++++ spec/manifold/api/workspace_spec.rb | 37 +++++++++++++++++++ .../terraform/workspace_configuration_spec.rb | 22 +++++++++++ 3 files changed, 89 insertions(+) diff --git a/lib/manifold/api/schema_manager.rb b/lib/manifold/api/schema_manager.rb index 6aa5797..20b8ca9 100644 --- a/lib/manifold/api/schema_manager.rb +++ b/lib/manifold/api/schema_manager.rb @@ -17,6 +17,7 @@ def write_schemas(tables_directory) tables_directory.mkpath write_dimensions_schema(tables_directory) write_manifold_schema(tables_directory) + write_metrics_schemas(tables_directory) end # Returns the dimensions schema structure @@ -52,6 +53,35 @@ def write_manifold_schema(tables_directory) manifold_path.write(manifold_schema_json.concat("\n")) end + def write_metrics_schemas(tables_directory) + return unless @manifold_yaml["metrics"] + + @manifold_yaml["metrics"].each do |group_name, group_config| + metrics_table_path = tables_directory.join("metrics_#{group_name}.json") + metrics_table_schema = metrics_table_schema(group_name, group_config) + metrics_table_path.write(JSON.pretty_generate(metrics_table_schema).concat("\n")) + @logger.info("Generated metrics table schema for '#{group_name}'.") + end + end + + def metrics_table_schema(group_name, group_config) + [ + { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }, + { "type" => "TIMESTAMP", "name" => "timestamp", "mode" => "REQUIRED" }, + { "type" => "RECORD", "name" => "metrics", "mode" => "REQUIRED", + "fields" => [metrics_group_field(group_name, group_config)] } + ] + end + + def metrics_group_field(group_name, group_config) + { + "name" => group_name, + "type" => "RECORD", + "mode" => "NULLABLE", + "fields" => group_metrics_fields(group_config) + } + end + def dimensions_fields @dimensions_fields ||= @vectors.filter_map do |vector| @logger.info("Loading vector schema for '#{vector}'.") diff --git a/spec/manifold/api/workspace_spec.rb b/spec/manifold/api/workspace_spec.rb index 0642cb3..0d7c308 100644 --- a/spec/manifold/api/workspace_spec.rb +++ b/spec/manifold/api/workspace_spec.rb @@ -178,6 +178,39 @@ expect(schema_fields[:metrics]["mode"]).to eq("REQUIRED") end + it "generates a metrics table schema file for each metrics group" do + expect(workspace.tables_directory.join("metrics_taps.json")).to be_file + end + + it "includes required id field in metrics table schema" do + metrics_schema = parse_metrics_schema("taps") + expect(metrics_schema).to include( + { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" } + ) + end + + it "includes required timestamp field in metrics table schema" do + metrics_schema = parse_metrics_schema("taps") + expect(metrics_schema).to include( + { "type" => "TIMESTAMP", "name" => "timestamp", "mode" => "REQUIRED" } + ) + end + + it "includes required metrics field in metrics table schema" do + metrics_schema = parse_metrics_schema("taps") + metrics_field = metrics_schema.find { |f| f["name"] == "metrics" } + expect(metrics_field["type"]).to eq("RECORD") + expect(metrics_field["mode"]).to eq("REQUIRED") + end + + it "includes the correct metrics group in the metrics table schema" do + metrics_schema = parse_metrics_schema("taps") + metrics_field = metrics_schema.find { |f| f["name"] == "metrics" } + group_field = metrics_field["fields"].first + expect(group_field["name"]).to eq("taps") + expect(group_field["type"]).to eq("RECORD") + end + shared_examples "breakout metrics" do |breakout_name| let(:breakout) do schema_fields[:metrics]["fields"] @@ -244,6 +277,10 @@ def get_dimension(field) dimensions = parse_dimensions_schema.find { |f| f["name"] == "dimensions" } dimensions["fields"].find { |f| f["name"] == field } end + + def parse_metrics_schema(group_name) + JSON.parse(workspace.tables_directory.join("metrics_#{group_name}.json").read) + end end context "when the manifold configuration is missing" do diff --git a/spec/manifold/terraform/workspace_configuration_spec.rb b/spec/manifold/terraform/workspace_configuration_spec.rb index c6e3d3e..7004039 100644 --- a/spec/manifold/terraform/workspace_configuration_spec.rb +++ b/spec/manifold/terraform/workspace_configuration_spec.rb @@ -103,6 +103,18 @@ expect(dimensions_routine_details[:sql_content]).to include(source_sql) end end + + context "when metrics configuration is present" do + before do + config.manifold_config = manifold_config + end + + it "includes metrics table configurations" do + expect(json["resource"]["google_bigquery_table"]).to include( + "metrics_taps" => expected_metrics_table("taps") + ) + end + end end context "when manifold configuration is present" do @@ -208,6 +220,16 @@ def expected_routine_config } end + def expected_metrics_table(group_name) + { + "dataset_id" => name, + "project" => "${var.project_id}", + "table_id" => "Metrics_#{group_name}", + "schema" => "${file(\"${path.module}/tables/metrics_#{group_name}.json\")}", + "depends_on" => ["google_bigquery_dataset.#{name}"] + } + end + def setup_merge_vector_config Pathname.pwd.join("lib/routines").mkpath Pathname.pwd.join("lib/routines/select_pages.sql").write(source_sql) From abb1f549fb988df9d90d0df495038ec71e6e6b69 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Tue, 11 Mar 2025 10:51:27 -0400 Subject: [PATCH 2/5] silly rule --- lib/manifold/api/schema_manager.rb | 2 ++ .../terraform/workspace_configuration.rb | 19 ++++++++++++++++--- lib/manifold/version.rb | 2 +- spec/manifold/api/workspace_spec.rb | 15 +++++++++++++-- 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/lib/manifold/api/schema_manager.rb b/lib/manifold/api/schema_manager.rb index 20b8ca9..de16c56 100644 --- a/lib/manifold/api/schema_manager.rb +++ b/lib/manifold/api/schema_manager.rb @@ -3,6 +3,7 @@ module Manifold module API # Handles schema generation and writing for Manifold tables + # rubocop:disable Metrics/ClassLength class SchemaManager def initialize(name, vectors, vector_service, manifold_yaml, logger) @name = name @@ -152,5 +153,6 @@ def sumif_fields(group_config) end end end + # rubocop:enable Metrics/ClassLength end end diff --git a/lib/manifold/terraform/workspace_configuration.rb b/lib/manifold/terraform/workspace_configuration.rb index 3ed3b37..41ac270 100644 --- a/lib/manifold/terraform/workspace_configuration.rb +++ b/lib/manifold/terraform/workspace_configuration.rb @@ -107,15 +107,24 @@ def build_metric_joins # Handles building table configurations class TableConfigBuilder - def initialize(name) + def initialize(name, manifold_config = nil) @name = name + @manifold_config = manifold_config end def build_table_configs - { + configs = { "dimensions" => dimensions_table_config, "manifold" => manifold_table_config } + + if @manifold_config&.dig("metrics") + @manifold_config["metrics"].each_key do |group_name| + configs["metrics_#{group_name}"] = metrics_table_config(group_name) + end + end + + configs end private @@ -128,6 +137,10 @@ def manifold_table_config build_table_config("Manifold") end + def metrics_table_config(group_name) + build_table_config("Metrics_#{group_name}") + end + def build_table_config(table_id) { "dataset_id" => @name, @@ -160,7 +173,7 @@ def as_json "variable" => variables_block, "resource" => { "google_bigquery_dataset" => dataset_config, - "google_bigquery_table" => TableConfigBuilder.new(name).build_table_configs, + "google_bigquery_table" => TableConfigBuilder.new(name, @manifold_config).build_table_configs, "google_bigquery_routine" => routine_config }.compact } diff --git a/lib/manifold/version.rb b/lib/manifold/version.rb index d1318ca..1dd715f 100644 --- a/lib/manifold/version.rb +++ b/lib/manifold/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module Manifold - VERSION = "0.0.18" + VERSION = "0.1.0" end diff --git a/spec/manifold/api/workspace_spec.rb b/spec/manifold/api/workspace_spec.rb index 0d7c308..791fa24 100644 --- a/spec/manifold/api/workspace_spec.rb +++ b/spec/manifold/api/workspace_spec.rb @@ -196,18 +196,29 @@ ) end - it "includes required metrics field in metrics table schema" do + it "includes metrics field of type RECORD in metrics table schema" do metrics_schema = parse_metrics_schema("taps") metrics_field = metrics_schema.find { |f| f["name"] == "metrics" } expect(metrics_field["type"]).to eq("RECORD") + end + + it "includes metrics field with REQUIRED mode in metrics table schema" do + metrics_schema = parse_metrics_schema("taps") + metrics_field = metrics_schema.find { |f| f["name"] == "metrics" } expect(metrics_field["mode"]).to eq("REQUIRED") end - it "includes the correct metrics group in the metrics table schema" do + it "includes metrics group with correct name in metrics table schema" do metrics_schema = parse_metrics_schema("taps") metrics_field = metrics_schema.find { |f| f["name"] == "metrics" } group_field = metrics_field["fields"].first expect(group_field["name"]).to eq("taps") + end + + it "includes metrics group with RECORD type in metrics table schema" do + metrics_schema = parse_metrics_schema("taps") + metrics_field = metrics_schema.find { |f| f["name"] == "metrics" } + group_field = metrics_field["fields"].first expect(group_field["type"]).to eq("RECORD") end From 82a358952d1c1c8b9e47aaa48a3c260f1a386563 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Tue, 11 Mar 2025 11:02:39 -0400 Subject: [PATCH 3/5] Titlecase tables --- lib/manifold/api/schema_manager.rb | 4 +++- lib/manifold/terraform/workspace_configuration.rb | 9 +++++++-- spec/manifold/api/workspace_spec.rb | 4 ++-- spec/manifold/terraform/workspace_configuration_spec.rb | 7 ++++--- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/lib/manifold/api/schema_manager.rb b/lib/manifold/api/schema_manager.rb index de16c56..375ce18 100644 --- a/lib/manifold/api/schema_manager.rb +++ b/lib/manifold/api/schema_manager.rb @@ -58,7 +58,9 @@ def write_metrics_schemas(tables_directory) return unless @manifold_yaml["metrics"] @manifold_yaml["metrics"].each do |group_name, group_config| - metrics_table_path = tables_directory.join("metrics_#{group_name}.json") + # Convert group_name to titlecase and append "Metrics" + titlecased_name = "#{group_name.capitalize}Metrics" + metrics_table_path = tables_directory.join("#{titlecased_name.downcase}.json") metrics_table_schema = metrics_table_schema(group_name, group_config) metrics_table_path.write(JSON.pretty_generate(metrics_table_schema).concat("\n")) @logger.info("Generated metrics table schema for '#{group_name}'.") diff --git a/lib/manifold/terraform/workspace_configuration.rb b/lib/manifold/terraform/workspace_configuration.rb index 41ac270..a2457a2 100644 --- a/lib/manifold/terraform/workspace_configuration.rb +++ b/lib/manifold/terraform/workspace_configuration.rb @@ -120,7 +120,7 @@ def build_table_configs if @manifold_config&.dig("metrics") @manifold_config["metrics"].each_key do |group_name| - configs["metrics_#{group_name}"] = metrics_table_config(group_name) + configs[metrics_table_name(group_name).downcase] = metrics_table_config(group_name) end end @@ -129,6 +129,10 @@ def build_table_configs private + def metrics_table_name(group_name) + "#{group_name.capitalize}Metrics" + end + def dimensions_table_config build_table_config("Dimensions") end @@ -138,7 +142,8 @@ def manifold_table_config end def metrics_table_config(group_name) - build_table_config("Metrics_#{group_name}") + titlecased_name = "#{group_name.capitalize}Metrics" + build_table_config(titlecased_name) end def build_table_config(table_id) diff --git a/spec/manifold/api/workspace_spec.rb b/spec/manifold/api/workspace_spec.rb index 791fa24..82aa7e0 100644 --- a/spec/manifold/api/workspace_spec.rb +++ b/spec/manifold/api/workspace_spec.rb @@ -179,7 +179,7 @@ end it "generates a metrics table schema file for each metrics group" do - expect(workspace.tables_directory.join("metrics_taps.json")).to be_file + expect(workspace.tables_directory.join("tapsmetrics.json")).to be_file end it "includes required id field in metrics table schema" do @@ -290,7 +290,7 @@ def get_dimension(field) end def parse_metrics_schema(group_name) - JSON.parse(workspace.tables_directory.join("metrics_#{group_name}.json").read) + JSON.parse(workspace.tables_directory.join("#{"#{group_name.capitalize}Metrics".downcase}.json").read) end end diff --git a/spec/manifold/terraform/workspace_configuration_spec.rb b/spec/manifold/terraform/workspace_configuration_spec.rb index 7004039..2997f42 100644 --- a/spec/manifold/terraform/workspace_configuration_spec.rb +++ b/spec/manifold/terraform/workspace_configuration_spec.rb @@ -111,7 +111,7 @@ it "includes metrics table configurations" do expect(json["resource"]["google_bigquery_table"]).to include( - "metrics_taps" => expected_metrics_table("taps") + "tapsmetrics" => expected_metrics_table("taps") ) end end @@ -221,11 +221,12 @@ def expected_routine_config end def expected_metrics_table(group_name) + titlecased_name = "#{group_name.capitalize}Metrics" { "dataset_id" => name, "project" => "${var.project_id}", - "table_id" => "Metrics_#{group_name}", - "schema" => "${file(\"${path.module}/tables/metrics_#{group_name}.json\")}", + "table_id" => titlecased_name, + "schema" => "${file(\"${path.module}/tables/#{titlecased_name.downcase}.json\")}", "depends_on" => ["google_bigquery_dataset.#{name}"] } end From b178f2fdb8a2c693b6b8a9f0123e12f0c060d9fc Mon Sep 17 00:00:00 2001 From: claytongentry Date: Tue, 11 Mar 2025 11:06:48 -0400 Subject: [PATCH 4/5] write metrics to metrics dir --- lib/manifold/api/schema_manager.rb | 8 +++++--- lib/manifold/terraform/workspace_configuration.rb | 9 +++++---- spec/manifold/api/workspace_spec.rb | 4 ++-- spec/manifold/terraform/workspace_configuration_spec.rb | 2 +- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/lib/manifold/api/schema_manager.rb b/lib/manifold/api/schema_manager.rb index 375ce18..dc7dcb6 100644 --- a/lib/manifold/api/schema_manager.rb +++ b/lib/manifold/api/schema_manager.rb @@ -57,10 +57,12 @@ def write_manifold_schema(tables_directory) def write_metrics_schemas(tables_directory) return unless @manifold_yaml["metrics"] + # Create metrics subdirectory + metrics_directory = tables_directory.join("metrics") + metrics_directory.mkpath + @manifold_yaml["metrics"].each do |group_name, group_config| - # Convert group_name to titlecase and append "Metrics" - titlecased_name = "#{group_name.capitalize}Metrics" - metrics_table_path = tables_directory.join("#{titlecased_name.downcase}.json") + metrics_table_path = metrics_directory.join("#{group_name}.json") metrics_table_schema = metrics_table_schema(group_name, group_config) metrics_table_path.write(JSON.pretty_generate(metrics_table_schema).concat("\n")) @logger.info("Generated metrics table schema for '#{group_name}'.") diff --git a/lib/manifold/terraform/workspace_configuration.rb b/lib/manifold/terraform/workspace_configuration.rb index a2457a2..0cb2ab6 100644 --- a/lib/manifold/terraform/workspace_configuration.rb +++ b/lib/manifold/terraform/workspace_configuration.rb @@ -142,16 +142,17 @@ def manifold_table_config end def metrics_table_config(group_name) - titlecased_name = "#{group_name.capitalize}Metrics" - build_table_config(titlecased_name) + titlecased_name = metrics_table_name(group_name) + build_table_config(titlecased_name, "metrics/#{group_name}.json") end - def build_table_config(table_id) + def build_table_config(table_id, schema_path = nil) + schema_path ||= "#{table_id.downcase}.json" { "dataset_id" => @name, "project" => "${var.project_id}", "table_id" => table_id, - "schema" => "${file(\"${path.module}/tables/#{table_id.downcase}.json\")}", + "schema" => "${file(\"${path.module}/tables/#{schema_path}\")}", "depends_on" => ["google_bigquery_dataset.#{@name}"] } end diff --git a/spec/manifold/api/workspace_spec.rb b/spec/manifold/api/workspace_spec.rb index 82aa7e0..5822fda 100644 --- a/spec/manifold/api/workspace_spec.rb +++ b/spec/manifold/api/workspace_spec.rb @@ -179,7 +179,7 @@ end it "generates a metrics table schema file for each metrics group" do - expect(workspace.tables_directory.join("tapsmetrics.json")).to be_file + expect(workspace.tables_directory.join("metrics/taps.json")).to be_file end it "includes required id field in metrics table schema" do @@ -290,7 +290,7 @@ def get_dimension(field) end def parse_metrics_schema(group_name) - JSON.parse(workspace.tables_directory.join("#{"#{group_name.capitalize}Metrics".downcase}.json").read) + JSON.parse(workspace.tables_directory.join("metrics/#{group_name}.json").read) end end diff --git a/spec/manifold/terraform/workspace_configuration_spec.rb b/spec/manifold/terraform/workspace_configuration_spec.rb index 2997f42..92e6ec0 100644 --- a/spec/manifold/terraform/workspace_configuration_spec.rb +++ b/spec/manifold/terraform/workspace_configuration_spec.rb @@ -226,7 +226,7 @@ def expected_metrics_table(group_name) "dataset_id" => name, "project" => "${var.project_id}", "table_id" => titlecased_name, - "schema" => "${file(\"${path.module}/tables/#{titlecased_name.downcase}.json\")}", + "schema" => "${file(\"${path.module}/tables/metrics/#{group_name}.json\")}", "depends_on" => ["google_bigquery_dataset.#{name}"] } end From 56560f9e9f7b1c189801e380f66a54ce4ef36bc6 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Tue, 11 Mar 2025 11:22:48 -0400 Subject: [PATCH 5/5] Factor out metrics sources --- lib/manifold/templates/workspace_template.yml | 1 - .../terraform/workspace_configuration.rb | 17 ++++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/lib/manifold/templates/workspace_template.yml b/lib/manifold/templates/workspace_template.yml index 2e489da..d2a54ce 100644 --- a/lib/manifold/templates/workspace_template.yml +++ b/lib/manifold/templates/workspace_template.yml @@ -24,5 +24,4 @@ metrics: sequenceSum: field: context.sequence - source: my_project.render_metrics filter: timestamp >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 90 DAY) diff --git a/lib/manifold/terraform/workspace_configuration.rb b/lib/manifold/terraform/workspace_configuration.rb index 0cb2ab6..5cd7aa1 100644 --- a/lib/manifold/terraform/workspace_configuration.rb +++ b/lib/manifold/terraform/workspace_configuration.rb @@ -36,18 +36,17 @@ def build_dimensions_merge_sql(source_sql) private def valid_config? - source_table && timestamp_field && @manifold_config["metrics"] - end - - def source_table - first_group = @manifold_config["metrics"]&.values&.first - first_group&.dig("source") + timestamp_field && @manifold_config["metrics"] && !@manifold_config["metrics"].empty? end def timestamp_field @manifold_config&.dig("timestamp", "field") end + def metrics_table_name(group_name) + "#{group_name.capitalize}Metrics" + end + def build_source_query <<~SQL WITH Metrics AS ( @@ -97,7 +96,11 @@ def build_metrics_struct def build_metric_joins metric_groups = @manifold_config["metrics"] - joins = metric_groups.map { |group, config| "#{config["source"]} AS #{group}" } + joins = metric_groups.map do |group, config| + table = "#{@name}.#{metrics_table_name(group)}" + filter = config["filter"] ? " WHERE #{config["filter"]}" : "" + "(SELECT * FROM #{table}#{filter}) AS #{group}" + end first = joins.shift return first if joins.empty?