diff --git a/lib/manifold/api/schema_manager.rb b/lib/manifold/api/schema_manager.rb index 6aa5797..dc7dcb6 100644 --- a/lib/manifold/api/schema_manager.rb +++ b/lib/manifold/api/schema_manager.rb @@ -3,6 +3,7 @@ module Manifold module API # Handles schema generation and writing for Manifold tables + # rubocop:disable Metrics/ClassLength class SchemaManager def initialize(name, vectors, vector_service, manifold_yaml, logger) @name = name @@ -17,6 +18,7 @@ def write_schemas(tables_directory) tables_directory.mkpath write_dimensions_schema(tables_directory) write_manifold_schema(tables_directory) + write_metrics_schemas(tables_directory) end # Returns the dimensions schema structure @@ -52,6 +54,39 @@ def write_manifold_schema(tables_directory) manifold_path.write(manifold_schema_json.concat("\n")) end + def write_metrics_schemas(tables_directory) + return unless @manifold_yaml["metrics"] + + # Create metrics subdirectory + metrics_directory = tables_directory.join("metrics") + metrics_directory.mkpath + + @manifold_yaml["metrics"].each do |group_name, group_config| + metrics_table_path = metrics_directory.join("#{group_name}.json") + metrics_table_schema = metrics_table_schema(group_name, group_config) + metrics_table_path.write(JSON.pretty_generate(metrics_table_schema).concat("\n")) + @logger.info("Generated metrics table schema for '#{group_name}'.") + end + end + + def metrics_table_schema(group_name, group_config) + [ + { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }, + { "type" => "TIMESTAMP", "name" => "timestamp", "mode" => "REQUIRED" }, + { "type" => "RECORD", "name" => "metrics", "mode" => "REQUIRED", + "fields" => [metrics_group_field(group_name, group_config)] } + ] + end + + def metrics_group_field(group_name, group_config) + { + "name" => group_name, + "type" => "RECORD", + "mode" => "NULLABLE", + "fields" => group_metrics_fields(group_config) + } + end + def dimensions_fields @dimensions_fields ||= @vectors.filter_map do |vector| @logger.info("Loading vector schema for '#{vector}'.") @@ -122,5 +157,6 @@ def sumif_fields(group_config) end end end + # rubocop:enable Metrics/ClassLength end end diff --git a/lib/manifold/templates/workspace_template.yml b/lib/manifold/templates/workspace_template.yml index 2e489da..d2a54ce 100644 --- a/lib/manifold/templates/workspace_template.yml +++ b/lib/manifold/templates/workspace_template.yml @@ -24,5 +24,4 @@ metrics: sequenceSum: field: context.sequence - source: my_project.render_metrics filter: timestamp >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 90 DAY) diff --git a/lib/manifold/terraform/workspace_configuration.rb b/lib/manifold/terraform/workspace_configuration.rb index 3ed3b37..5cd7aa1 100644 --- a/lib/manifold/terraform/workspace_configuration.rb +++ b/lib/manifold/terraform/workspace_configuration.rb @@ -36,18 +36,17 @@ def build_dimensions_merge_sql(source_sql) private def valid_config? - source_table && timestamp_field && @manifold_config["metrics"] - end - - def source_table - first_group = @manifold_config["metrics"]&.values&.first - first_group&.dig("source") + timestamp_field && @manifold_config["metrics"] && !@manifold_config["metrics"].empty? end def timestamp_field @manifold_config&.dig("timestamp", "field") end + def metrics_table_name(group_name) + "#{group_name.capitalize}Metrics" + end + def build_source_query <<~SQL WITH Metrics AS ( @@ -97,7 +96,11 @@ def build_metrics_struct def build_metric_joins metric_groups = @manifold_config["metrics"] - joins = metric_groups.map { |group, config| "#{config["source"]} AS #{group}" } + joins = metric_groups.map do |group, config| + table = "#{@name}.#{metrics_table_name(group)}" + filter = config["filter"] ? " WHERE #{config["filter"]}" : "" + "(SELECT * FROM #{table}#{filter}) AS #{group}" + end first = joins.shift return first if joins.empty? @@ -107,19 +110,32 @@ def build_metric_joins # Handles building table configurations class TableConfigBuilder - def initialize(name) + def initialize(name, manifold_config = nil) @name = name + @manifold_config = manifold_config end def build_table_configs - { + configs = { "dimensions" => dimensions_table_config, "manifold" => manifold_table_config } + + if @manifold_config&.dig("metrics") + @manifold_config["metrics"].each_key do |group_name| + configs[metrics_table_name(group_name).downcase] = metrics_table_config(group_name) + end + end + + configs end private + def metrics_table_name(group_name) + "#{group_name.capitalize}Metrics" + end + def dimensions_table_config build_table_config("Dimensions") end @@ -128,12 +144,18 @@ def manifold_table_config build_table_config("Manifold") end - def build_table_config(table_id) + def metrics_table_config(group_name) + titlecased_name = metrics_table_name(group_name) + build_table_config(titlecased_name, "metrics/#{group_name}.json") + end + + def build_table_config(table_id, schema_path = nil) + schema_path ||= "#{table_id.downcase}.json" { "dataset_id" => @name, "project" => "${var.project_id}", "table_id" => table_id, - "schema" => "${file(\"${path.module}/tables/#{table_id.downcase}.json\")}", + "schema" => "${file(\"${path.module}/tables/#{schema_path}\")}", "depends_on" => ["google_bigquery_dataset.#{@name}"] } end @@ -160,7 +182,7 @@ def as_json "variable" => variables_block, "resource" => { "google_bigquery_dataset" => dataset_config, - "google_bigquery_table" => TableConfigBuilder.new(name).build_table_configs, + "google_bigquery_table" => TableConfigBuilder.new(name, @manifold_config).build_table_configs, "google_bigquery_routine" => routine_config }.compact } diff --git a/lib/manifold/version.rb b/lib/manifold/version.rb index d1318ca..1dd715f 100644 --- a/lib/manifold/version.rb +++ b/lib/manifold/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module Manifold - VERSION = "0.0.18" + VERSION = "0.1.0" end diff --git a/spec/manifold/api/workspace_spec.rb b/spec/manifold/api/workspace_spec.rb index 0642cb3..5822fda 100644 --- a/spec/manifold/api/workspace_spec.rb +++ b/spec/manifold/api/workspace_spec.rb @@ -178,6 +178,50 @@ expect(schema_fields[:metrics]["mode"]).to eq("REQUIRED") end + it "generates a metrics table schema file for each metrics group" do + expect(workspace.tables_directory.join("metrics/taps.json")).to be_file + end + + it "includes required id field in metrics table schema" do + metrics_schema = parse_metrics_schema("taps") + expect(metrics_schema).to include( + { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" } + ) + end + + it "includes required timestamp field in metrics table schema" do + metrics_schema = parse_metrics_schema("taps") + expect(metrics_schema).to include( + { "type" => "TIMESTAMP", "name" => "timestamp", "mode" => "REQUIRED" } + ) + end + + it "includes metrics field of type RECORD in metrics table schema" do + metrics_schema = parse_metrics_schema("taps") + metrics_field = metrics_schema.find { |f| f["name"] == "metrics" } + expect(metrics_field["type"]).to eq("RECORD") + end + + it "includes metrics field with REQUIRED mode in metrics table schema" do + metrics_schema = parse_metrics_schema("taps") + metrics_field = metrics_schema.find { |f| f["name"] == "metrics" } + expect(metrics_field["mode"]).to eq("REQUIRED") + end + + it "includes metrics group with correct name in metrics table schema" do + metrics_schema = parse_metrics_schema("taps") + metrics_field = metrics_schema.find { |f| f["name"] == "metrics" } + group_field = metrics_field["fields"].first + expect(group_field["name"]).to eq("taps") + end + + it "includes metrics group with RECORD type in metrics table schema" do + metrics_schema = parse_metrics_schema("taps") + metrics_field = metrics_schema.find { |f| f["name"] == "metrics" } + group_field = metrics_field["fields"].first + expect(group_field["type"]).to eq("RECORD") + end + shared_examples "breakout metrics" do |breakout_name| let(:breakout) do schema_fields[:metrics]["fields"] @@ -244,6 +288,10 @@ def get_dimension(field) dimensions = parse_dimensions_schema.find { |f| f["name"] == "dimensions" } dimensions["fields"].find { |f| f["name"] == field } end + + def parse_metrics_schema(group_name) + JSON.parse(workspace.tables_directory.join("metrics/#{group_name}.json").read) + end end context "when the manifold configuration is missing" do diff --git a/spec/manifold/terraform/workspace_configuration_spec.rb b/spec/manifold/terraform/workspace_configuration_spec.rb index c6e3d3e..92e6ec0 100644 --- a/spec/manifold/terraform/workspace_configuration_spec.rb +++ b/spec/manifold/terraform/workspace_configuration_spec.rb @@ -103,6 +103,18 @@ expect(dimensions_routine_details[:sql_content]).to include(source_sql) end end + + context "when metrics configuration is present" do + before do + config.manifold_config = manifold_config + end + + it "includes metrics table configurations" do + expect(json["resource"]["google_bigquery_table"]).to include( + "tapsmetrics" => expected_metrics_table("taps") + ) + end + end end context "when manifold configuration is present" do @@ -208,6 +220,17 @@ def expected_routine_config } end + def expected_metrics_table(group_name) + titlecased_name = "#{group_name.capitalize}Metrics" + { + "dataset_id" => name, + "project" => "${var.project_id}", + "table_id" => titlecased_name, + "schema" => "${file(\"${path.module}/tables/metrics/#{group_name}.json\")}", + "depends_on" => ["google_bigquery_dataset.#{name}"] + } + end + def setup_merge_vector_config Pathname.pwd.join("lib/routines").mkpath Pathname.pwd.join("lib/routines/select_pages.sql").write(source_sql)