From 9044c91f202c2777f9cc29840a2a3f1f1be69b83 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Mon, 31 Mar 2025 10:53:01 -0400 Subject: [PATCH 1/8] Intersect conditions through breakouts --- lib/manifold/api/schema_manager.rb | 133 ++++++++++- lib/manifold/services/vector_service.rb | 2 +- lib/manifold/templates/workspace_template.yml | 14 +- spec/manifold/api/schema_manager_spec.rb | 208 ++++++++++++++++++ spec/manifold/api/workspace_spec.rb | 28 ++- 5 files changed, 376 insertions(+), 9 deletions(-) create mode 100644 spec/manifold/api/schema_manager_spec.rb diff --git a/lib/manifold/api/schema_manager.rb b/lib/manifold/api/schema_manager.rb index dc7dcb6..9e7e5b3 100644 --- a/lib/manifold/api/schema_manager.rb +++ b/lib/manifold/api/schema_manager.rb @@ -116,16 +116,143 @@ def metrics_fields end def group_metrics_fields(group_config) - return [] unless group_config["breakouts"] && group_config["aggregations"] + return [] unless group_config["aggregations"] - group_config["breakouts"].map do |breakout_name, _breakout_config| - { + # If there are no breakouts but there are complex logic operations, + # use the old-style format + if group_config["breakouts"]&.values&.any? { |v| v.is_a?(Hash) && v["operator"] } + # Legacy format handling - treat direct keys as breakouts + return handle_legacy_breakouts(group_config) + end + + # If no breakouts defined at all, just return conditions/breakouts as is + unless group_config["breakouts"] + # Support for older format where breakouts are direct fields + # Add field for each explicitly defined condition or breakout + breakout_fields = (group_config.keys - %w[aggregations source filter]).map do |condition_name| + { + "name" => condition_name, + "type" => "RECORD", + "mode" => "NULLABLE", + "fields" => breakout_metrics_fields(group_config) + } + end + return breakout_fields + end + + # Determine conditions list + conditions = if group_config["conditions"] + group_config["conditions"].keys + else + # If no conditions defined, extract from breakouts + extract_conditions_from_breakouts(group_config["breakouts"]) + end + + # Generate individual condition fields + condition_fields = generate_condition_fields(conditions, group_config) + + # Generate intersection fields across different breakout groups + intersection_fields = generate_breakout_intersection_fields(group_config) + + condition_fields + intersection_fields + end + + def handle_legacy_breakouts(group_config) + # For legacy format, each key directly in the metrics group is a breakout + breakout_fields = [] + + group_config["breakouts"].each_key do |breakout_name| + breakout_fields << { "name" => breakout_name, "type" => "RECORD", "mode" => "NULLABLE", "fields" => breakout_metrics_fields(group_config) } end + + breakout_fields + end + + def extract_conditions_from_breakouts(breakouts) + # Handle both string and array formats for breakouts + conditions = [] + breakouts.each do |breakout_name, breakout_values| + if breakout_values.is_a?(Array) + conditions.concat(breakout_values) + elsif breakout_values.is_a?(Hash) && breakout_values["operator"] + # Skip complex operators in the new format + next + else + # For string format, use the breakout name as the condition + conditions << breakout_name + end + end + conditions.uniq + end + + def generate_condition_fields(conditions, group_config) + # Add a field for each condition + conditions.map do |condition_name| + { + "name" => condition_name, + "type" => "RECORD", + "mode" => "NULLABLE", + "fields" => breakout_metrics_fields(group_config) + } + end + end + + def generate_breakout_intersection_fields(group_config) + return [] unless group_config["breakouts"] + + intersection_fields = [] + breakout_groups = group_config["breakouts"].keys + + # Skip if there's only one breakout group or if using legacy format + return [] if breakout_groups.size <= 1 || + group_config["breakouts"].values.any? { |v| v.is_a?(Hash) && v["operator"] } + + # Generate all possible combinations of conditions from different breakout groups + breakout_groups.combination(2).each do |breakout_pair| + # Get the conditions in each breakout group + first_group = breakout_pair[0] + second_group = breakout_pair[1] + + first_group_conditions = get_breakout_conditions(group_config["breakouts"], first_group) + second_group_conditions = get_breakout_conditions(group_config["breakouts"], second_group) + + # For each pair of conditions from different breakout groups, create an intersection field + first_group_conditions.each do |first_condition| + second_group_conditions.each do |second_condition| + # Format the intersection name with the second condition capitalized + intersection_name = "#{first_condition}#{second_condition.capitalize}" + + intersection_fields << { + "name" => intersection_name, + "type" => "RECORD", + "mode" => "NULLABLE", + "fields" => breakout_metrics_fields(group_config) + } + end + end + end + + intersection_fields + end + + def get_breakout_conditions(breakouts, breakout_name) + breakout_value = breakouts[breakout_name] + + if breakout_value.is_a?(Array) + # New format: breakout contains array of conditions + breakout_value + elsif breakout_value.is_a?(Hash) && breakout_value["operator"] + # Complex operator breakout - skip for now + [] + else + # Legacy format: breakout name is the condition + [breakout_name] + end end def breakout_metrics_fields(group_config) diff --git a/lib/manifold/services/vector_service.rb b/lib/manifold/services/vector_service.rb index 9d8b646..2c2e521 100644 --- a/lib/manifold/services/vector_service.rb +++ b/lib/manifold/services/vector_service.rb @@ -4,7 +4,7 @@ module Manifold module Services # Handles the loading of vector schemas from configuration files class VectorService - def initialize(logger) + def initialize(logger = nil) @logger = logger end diff --git a/lib/manifold/templates/workspace_template.yml b/lib/manifold/templates/workspace_template.yml index d2a54ce..513dafa 100644 --- a/lib/manifold/templates/workspace_template.yml +++ b/lib/manifold/templates/workspace_template.yml @@ -14,9 +14,19 @@ timestamp: metrics: renders: + conditions: + mobile: IS_DESKTOP(context.device) + desktop: IS_MOBILE(context.device) + us: context.geo.country = 'US' + global: context.geo.country != 'US' + breakouts: - paid: IS_PAID(context.location) - organic: IS_ORGANIC(context.location) + device: + - mobile + - desktop + region: + - us + - global aggregations: countif: renderCount diff --git a/spec/manifold/api/schema_manager_spec.rb b/spec/manifold/api/schema_manager_spec.rb new file mode 100644 index 0000000..9a81321 --- /dev/null +++ b/spec/manifold/api/schema_manager_spec.rb @@ -0,0 +1,208 @@ +# frozen_string_literal: true + +RSpec.describe Manifold::API::SchemaManager do + include FakeFS::SpecHelpers + + subject(:schema_manager) { described_class.new(name, vectors, vector_service, manifold_yaml, logger) } + + let(:logger) { instance_spy(Logger) } + let(:name) { "test_workspace" } + let(:vectors) { ["TestVector"] } + let(:vector_service) { instance_spy(Manifold::Services::VectorService) } + let(:manifold_yaml) do + { + "metrics" => { + "renders" => { + "conditions" => { + "mobile" => "IS_DESKTOP(context.device)", + "desktop" => "IS_MOBILE(context.device)", + "us" => "context.geo.country = 'US'", + "global" => "context.geo.country != 'US'" + }, + "breakouts" => { + "device" => %w[mobile desktop], + "region" => %w[us global] + }, + "aggregations" => { + "countif" => "renderCount", + "sumif" => { + "sequenceSum" => { + "field" => "context.sequence" + } + } + } + } + } + } + end + + before do + # Mock the vector service + allow(vector_service).to receive(:load_vector_schema).and_return( + { "name" => "test_vector", "type" => "STRING", "mode" => "NULLABLE" } + ) + end + + describe "#dimensions_schema" do + it "includes required id field" do + schema = schema_manager.dimensions_schema + expect(schema).to include( + { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" } + ) + end + + it "includes dimensions field with RECORD type" do + schema = schema_manager.dimensions_schema + dimensions_field = schema.find { |f| f["name"] == "dimensions" } + expect(dimensions_field["type"]).to eq("RECORD") + end + + it "includes dimensions field with REQUIRED mode" do + schema = schema_manager.dimensions_schema + dimensions_field = schema.find { |f| f["name"] == "dimensions" } + expect(dimensions_field["mode"]).to eq("REQUIRED") + end + end + + describe "#manifold_schema" do + it "includes required id field" do + schema = schema_manager.manifold_schema + expect(schema).to include( + { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" } + ) + end + + it "includes required timestamp field" do + schema = schema_manager.manifold_schema + expect(schema).to include( + { "type" => "TIMESTAMP", "name" => "timestamp", "mode" => "REQUIRED" } + ) + end + + it "includes dimensions field with RECORD type" do + schema = schema_manager.manifold_schema + dimensions_field = schema.find { |f| f["name"] == "dimensions" } + expect(dimensions_field["type"]).to eq("RECORD") + end + + it "includes metrics field with RECORD type" do + schema = schema_manager.manifold_schema + metrics_field = schema.find { |f| f["name"] == "metrics" } + expect(metrics_field["type"]).to eq("RECORD") + end + end + + describe "#metrics_fields" do + let(:metrics_fields) { schema_manager.send(:metrics_fields) } + let(:renders_field) { metrics_fields.find { |field| field["name"] == "renders" } } + let(:fields) { renders_field["fields"] } + let(:field_names) { fields.map { |field| field["name"] } } + + it "includes the renders group field" do + expect(renders_field).not_to be_nil + end + + it "includes all individual condition fields" do + expect(field_names).to include("mobile", "desktop", "us", "global") + end + + it "includes intersection fields between different breakout groups" do + # We support both naming conventions (first to second or second to first) + mobile_us_present = field_names.include?("mobileUs") || field_names.include?("usMobile") + desktop_us_present = field_names.include?("desktopUs") || field_names.include?("usDesktop") + mobile_global_present = field_names.include?("mobileGlobal") || field_names.include?("globalMobile") + desktop_global_present = field_names.include?("desktopGlobal") || field_names.include?("globalDesktop") + + expect(mobile_us_present).to be(true) + expect(desktop_us_present).to be(true) + expect(mobile_global_present).to be(true) + expect(desktop_global_present).to be(true) + end + + it "does not include intersection fields from the same breakout group" do + # Should not include mobile/desktop combinations (same breakout) + expect(field_names).not_to include("mobileDesktop") + expect(field_names).not_to include("desktopMobile") + + # Should not include us/global combinations (same breakout) + expect(field_names).not_to include("usGlobal") + expect(field_names).not_to include("globalUs") + end + + it "includes correct aggregation fields for individual conditions" do + mobile_field = fields.find { |field| field["name"] == "mobile" } + expect(mobile_field["fields"].map { |f| f["name"] }).to include("renderCount", "sequenceSum") + end + + it "includes correct aggregation fields for intersection conditions" do + # Find an intersection field (using either naming convention) + intersection_field = fields.find do |field| + field["name"] == "mobileUs" || field["name"] == "usMobile" + end + + expect(intersection_field).not_to be_nil + expect(intersection_field["fields"].map { |f| f["name"] }).to include("renderCount", "sequenceSum") + end + end + + context "when conditions are not explicitly defined" do + let(:manifold_yaml) do + { + "metrics" => { + "renders" => { + "breakouts" => { + "device" => %w[mobile desktop], + "region" => %w[us global] + }, + "aggregations" => { + "countif" => "renderCount" + } + } + } + } + end + + let(:metrics_fields) { schema_manager.send(:metrics_fields) } + let(:renders_field) { metrics_fields.find { |field| field["name"] == "renders" } } + let(:fields) { renders_field["fields"] } + let(:field_names) { fields.map { |field| field["name"] } } + + it "derives condition fields from breakouts" do + expect(field_names).to include("mobile", "desktop", "us", "global") + end + + it "still generates intersection fields" do + # Check for either naming convention + mobile_us_present = field_names.include?("mobileUs") || field_names.include?("usMobile") + desktop_us_present = field_names.include?("desktopUs") || field_names.include?("usDesktop") + + expect(mobile_us_present).to be(true) + expect(desktop_us_present).to be(true) + end + end + + describe "#write_schemas" do + let(:tables_directory) { Pathname.pwd.join("tables") } + + before do + tables_directory.mkpath + schema_manager.write_schemas(tables_directory) + end + + it "generates a dimensions schema file" do + expect(tables_directory.join("dimensions.json")).to be_file + end + + it "generates a manifold schema file" do + expect(tables_directory.join("manifold.json")).to be_file + end + + it "generates a metrics directory" do + expect(tables_directory.join("metrics")).to be_directory + end + + it "generates a metrics schema file for each metrics group" do + expect(tables_directory.join("metrics/renders.json")).to be_file + end + end +end diff --git a/spec/manifold/api/workspace_spec.rb b/spec/manifold/api/workspace_spec.rb index 5822fda..19b107a 100644 --- a/spec/manifold/api/workspace_spec.rb +++ b/spec/manifold/api/workspace_spec.rb @@ -68,7 +68,24 @@ } end + let(:vector_service) { instance_double(Manifold::Services::VectorService) } + before do + # Use mock for VectorService + allow(Manifold::Services::VectorService).to receive(:new).and_return(vector_service) + + # Mock successful vector schema loading + allow(vector_service).to receive(:load_vector_schema).with("User").and_return( + { + "name" => "user", + "type" => "RECORD", + "fields" => [ + { "name" => "user_id", "type" => "STRING", "mode" => "NULLABLE" }, + { "name" => "email", "type" => "STRING", "mode" => "NULLABLE" } + ] + } + ) + Pathname.pwd.join("vectors").mkpath Pathname.pwd.join("vectors", "user.yml").write(<<~YAML) attributes: @@ -146,9 +163,14 @@ end it "sets the dimensions fields" do - expect(get_dimension("user")["fields"]).to include( - { "type" => "STRING", "name" => "user_id", "mode" => "NULLABLE" }, - { "type" => "STRING", "name" => "email", "mode" => "NULLABLE" } + # Get dimensions directly since our mock is set up to provide the correct structure + schema = parse_dimensions_schema + dimensions = schema.find { |f| f["name"] == "dimensions" } + user_dimension = dimensions["fields"].find { |f| f["name"] == "user" } + + expect(user_dimension["fields"]).to include( + { "name" => "user_id", "type" => "STRING", "mode" => "NULLABLE" }, + { "name" => "email", "type" => "STRING", "mode" => "NULLABLE" } ) end From 662018c356dae63d647ebb6df73b469379bb2278 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Mon, 31 Mar 2025 11:06:44 -0400 Subject: [PATCH 2/8] Clean up rubocops --- lib/manifold/api/schema_manager.rb | 196 ++++++++++++++--------- spec/manifold/api/schema_manager_spec.rb | 161 +++++++++++-------- spec/manifold/api/workspace_spec.rb | 37 +++-- 3 files changed, 237 insertions(+), 157 deletions(-) diff --git a/lib/manifold/api/schema_manager.rb b/lib/manifold/api/schema_manager.rb index 9e7e5b3..7855513 100644 --- a/lib/manifold/api/schema_manager.rb +++ b/lib/manifold/api/schema_manager.rb @@ -57,18 +57,28 @@ def write_manifold_schema(tables_directory) def write_metrics_schemas(tables_directory) return unless @manifold_yaml["metrics"] - # Create metrics subdirectory + create_metrics_directory(tables_directory) + write_individual_metrics_schemas(tables_directory) + end + + def create_metrics_directory(tables_directory) metrics_directory = tables_directory.join("metrics") metrics_directory.mkpath + end + def write_individual_metrics_schemas(tables_directory) @manifold_yaml["metrics"].each do |group_name, group_config| - metrics_table_path = metrics_directory.join("#{group_name}.json") - metrics_table_schema = metrics_table_schema(group_name, group_config) - metrics_table_path.write(JSON.pretty_generate(metrics_table_schema).concat("\n")) - @logger.info("Generated metrics table schema for '#{group_name}'.") + write_metrics_group_schema(tables_directory, group_name, group_config) end end + def write_metrics_group_schema(tables_directory, group_name, group_config) + metrics_table_path = tables_directory.join("metrics", "#{group_name}.json") + metrics_table_schema = metrics_table_schema(group_name, group_config) + metrics_table_path.write(JSON.pretty_generate(metrics_table_schema).concat("\n")) + @logger.info("Generated metrics table schema for '#{group_name}'.") + end + def metrics_table_schema(group_name, group_config) [ { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }, @@ -118,35 +128,41 @@ def metrics_fields def group_metrics_fields(group_config) return [] unless group_config["aggregations"] - # If there are no breakouts but there are complex logic operations, - # use the old-style format + if legacy_format?(group_config) + handle_legacy_format(group_config) + else + handle_modern_format(group_config) + end + end + + def legacy_format?(group_config) + has_complex_operators = group_config["breakouts"]&.values&.any? do |v| + v.is_a?(Hash) && v["operator"] + end + + !group_config["breakouts"] || has_complex_operators + end + + def handle_legacy_format(group_config) if group_config["breakouts"]&.values&.any? { |v| v.is_a?(Hash) && v["operator"] } - # Legacy format handling - treat direct keys as breakouts - return handle_legacy_breakouts(group_config) + handle_legacy_breakouts(group_config) + else + handle_no_breakouts(group_config) end + end - # If no breakouts defined at all, just return conditions/breakouts as is - unless group_config["breakouts"] - # Support for older format where breakouts are direct fields - # Add field for each explicitly defined condition or breakout - breakout_fields = (group_config.keys - %w[aggregations source filter]).map do |condition_name| - { - "name" => condition_name, - "type" => "RECORD", - "mode" => "NULLABLE", - "fields" => breakout_metrics_fields(group_config) - } - end - return breakout_fields + def handle_no_breakouts(group_config) + # Support for older format where breakouts are direct fields + direct_field_keys = group_config.keys - %w[aggregations source filter] + + direct_field_keys.map do |condition_name| + create_metric_field(condition_name, group_config) end + end + def handle_modern_format(group_config) # Determine conditions list - conditions = if group_config["conditions"] - group_config["conditions"].keys - else - # If no conditions defined, extract from breakouts - extract_conditions_from_breakouts(group_config["breakouts"]) - end + conditions = get_conditions_list(group_config) # Generate individual condition fields condition_fields = generate_condition_fields(conditions, group_config) @@ -157,89 +173,111 @@ def group_metrics_fields(group_config) condition_fields + intersection_fields end + def get_conditions_list(group_config) + if group_config["conditions"] + group_config["conditions"].keys + else + extract_conditions_from_breakouts(group_config["breakouts"]) + end + end + def handle_legacy_breakouts(group_config) # For legacy format, each key directly in the metrics group is a breakout - breakout_fields = [] - - group_config["breakouts"].each_key do |breakout_name| - breakout_fields << { - "name" => breakout_name, - "type" => "RECORD", - "mode" => "NULLABLE", - "fields" => breakout_metrics_fields(group_config) - } + group_config["breakouts"].keys.map do |breakout_name| + create_metric_field(breakout_name, group_config) end + end - breakout_fields + def create_metric_field(field_name, group_config) + { + "name" => field_name, + "type" => "RECORD", + "mode" => "NULLABLE", + "fields" => breakout_metrics_fields(group_config) + } end def extract_conditions_from_breakouts(breakouts) - # Handle both string and array formats for breakouts conditions = [] + breakouts.each do |breakout_name, breakout_values| - if breakout_values.is_a?(Array) - conditions.concat(breakout_values) - elsif breakout_values.is_a?(Hash) && breakout_values["operator"] - # Skip complex operators in the new format - next - else - # For string format, use the breakout name as the condition - conditions << breakout_name - end + conditions.concat(get_conditions_for_breakout(breakout_name, breakout_values)) end + conditions.uniq end + def get_conditions_for_breakout(breakout_name, breakout_values) + if breakout_values.is_a?(Array) + breakout_values + elsif breakout_values.is_a?(Hash) && breakout_values["operator"] + [] # Skip complex operators in the new format + else + [breakout_name] # For string format, use the breakout name as the condition + end + end + def generate_condition_fields(conditions, group_config) # Add a field for each condition conditions.map do |condition_name| - { - "name" => condition_name, - "type" => "RECORD", - "mode" => "NULLABLE", - "fields" => breakout_metrics_fields(group_config) - } + create_metric_field(condition_name, group_config) end end def generate_breakout_intersection_fields(group_config) return [] unless group_config["breakouts"] + return [] if should_skip_intersections?(group_config) - intersection_fields = [] + generate_intersections(group_config) + end + + def should_skip_intersections?(group_config) breakout_groups = group_config["breakouts"].keys # Skip if there's only one breakout group or if using legacy format - return [] if breakout_groups.size <= 1 || - group_config["breakouts"].values.any? { |v| v.is_a?(Hash) && v["operator"] } + breakout_groups.size <= 1 || + group_config["breakouts"].values.any? { |v| v.is_a?(Hash) && v["operator"] } + end + + def generate_intersections(group_config) + intersection_fields = [] + breakout_groups = group_config["breakouts"].keys - # Generate all possible combinations of conditions from different breakout groups + # Generate all possible combinations of breakout groups breakout_groups.combination(2).each do |breakout_pair| - # Get the conditions in each breakout group - first_group = breakout_pair[0] - second_group = breakout_pair[1] - - first_group_conditions = get_breakout_conditions(group_config["breakouts"], first_group) - second_group_conditions = get_breakout_conditions(group_config["breakouts"], second_group) - - # For each pair of conditions from different breakout groups, create an intersection field - first_group_conditions.each do |first_condition| - second_group_conditions.each do |second_condition| - # Format the intersection name with the second condition capitalized - intersection_name = "#{first_condition}#{second_condition.capitalize}" - - intersection_fields << { - "name" => intersection_name, - "type" => "RECORD", - "mode" => "NULLABLE", - "fields" => breakout_metrics_fields(group_config) - } - end - end + fields = generate_intersection_fields_for_pair(group_config, breakout_pair) + intersection_fields.concat(fields) end intersection_fields end + def generate_intersection_fields_for_pair(group_config, breakout_pair) + first_group, second_group = breakout_pair + + first_group_conditions = get_breakout_conditions(group_config["breakouts"], first_group) + second_group_conditions = get_breakout_conditions(group_config["breakouts"], second_group) + + generate_intersection_combinations( + first_group_conditions, + second_group_conditions, + group_config + ) + end + + def generate_intersection_combinations(first_conditions, second_conditions, group_config) + fields = [] + + first_conditions.each do |first_condition| + second_conditions.each do |second_condition| + intersection_name = "#{first_condition}#{second_condition.capitalize}" + fields << create_metric_field(intersection_name, group_config) + end + end + + fields + end + def get_breakout_conditions(breakouts, breakout_name) breakout_value = breakouts[breakout_name] diff --git a/spec/manifold/api/schema_manager_spec.rb b/spec/manifold/api/schema_manager_spec.rb index 9a81321..77e832f 100644 --- a/spec/manifold/api/schema_manager_spec.rb +++ b/spec/manifold/api/schema_manager_spec.rb @@ -9,7 +9,17 @@ let(:name) { "test_workspace" } let(:vectors) { ["TestVector"] } let(:vector_service) { instance_spy(Manifold::Services::VectorService) } - let(:manifold_yaml) do + let(:manifold_yaml) { build_test_manifold_yaml } + + before do + # Mock the vector service + allow(vector_service).to receive(:load_vector_schema).and_return( + { "name" => "test_vector", "type" => "STRING", "mode" => "NULLABLE" } + ) + end + + # rubocop:disable Metrics/MethodLength + def build_test_manifold_yaml { "metrics" => { "renders" => { @@ -35,117 +45,133 @@ } } end - - before do - # Mock the vector service - allow(vector_service).to receive(:load_vector_schema).and_return( - { "name" => "test_vector", "type" => "STRING", "mode" => "NULLABLE" } - ) - end + # rubocop:enable Metrics/MethodLength describe "#dimensions_schema" do + subject(:schema) { schema_manager.dimensions_schema } + it "includes required id field" do - schema = schema_manager.dimensions_schema expect(schema).to include( { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" } ) end it "includes dimensions field with RECORD type" do - schema = schema_manager.dimensions_schema dimensions_field = schema.find { |f| f["name"] == "dimensions" } expect(dimensions_field["type"]).to eq("RECORD") end it "includes dimensions field with REQUIRED mode" do - schema = schema_manager.dimensions_schema dimensions_field = schema.find { |f| f["name"] == "dimensions" } expect(dimensions_field["mode"]).to eq("REQUIRED") end end describe "#manifold_schema" do + subject(:schema) { schema_manager.manifold_schema } + it "includes required id field" do - schema = schema_manager.manifold_schema expect(schema).to include( { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" } ) end it "includes required timestamp field" do - schema = schema_manager.manifold_schema expect(schema).to include( { "type" => "TIMESTAMP", "name" => "timestamp", "mode" => "REQUIRED" } ) end it "includes dimensions field with RECORD type" do - schema = schema_manager.manifold_schema dimensions_field = schema.find { |f| f["name"] == "dimensions" } expect(dimensions_field["type"]).to eq("RECORD") end it "includes metrics field with RECORD type" do - schema = schema_manager.manifold_schema metrics_field = schema.find { |f| f["name"] == "metrics" } expect(metrics_field["type"]).to eq("RECORD") end end describe "#metrics_fields" do - let(:metrics_fields) { schema_manager.send(:metrics_fields) } - let(:renders_field) { metrics_fields.find { |field| field["name"] == "renders" } } - let(:fields) { renders_field["fields"] } - let(:field_names) { fields.map { |field| field["name"] } } + # Using a simple helper method to clean up the spec and reduce memoized variables + def render_fields + metrics_fields = schema_manager.send(:metrics_fields) + renders_field = metrics_fields.find { |field| field["name"] == "renders" } + renders_field["fields"] + end it "includes the renders group field" do + metrics_fields = schema_manager.send(:metrics_fields) + renders_field = metrics_fields.find { |field| field["name"] == "renders" } expect(renders_field).not_to be_nil end it "includes all individual condition fields" do + field_names = render_fields.map { |field| field["name"] } expect(field_names).to include("mobile", "desktop", "us", "global") end - it "includes intersection fields between different breakout groups" do - # We support both naming conventions (first to second or second to first) - mobile_us_present = field_names.include?("mobileUs") || field_names.include?("usMobile") - desktop_us_present = field_names.include?("desktopUs") || field_names.include?("usDesktop") - mobile_global_present = field_names.include?("mobileGlobal") || field_names.include?("globalMobile") - desktop_global_present = field_names.include?("desktopGlobal") || field_names.include?("globalDesktop") + describe "intersection fields" do + it "includes mobile-us intersection" do + field_names = render_fields.map { |field| field["name"] } + expect(field_names.any? { |name| %w[mobileUs usMobile].include?(name) }).to be true + end - expect(mobile_us_present).to be(true) - expect(desktop_us_present).to be(true) - expect(mobile_global_present).to be(true) - expect(desktop_global_present).to be(true) - end + it "includes desktop-us intersection" do + field_names = render_fields.map { |field| field["name"] } + expect(field_names.any? { |name| %w[desktopUs usDesktop].include?(name) }).to be true + end - it "does not include intersection fields from the same breakout group" do - # Should not include mobile/desktop combinations (same breakout) - expect(field_names).not_to include("mobileDesktop") - expect(field_names).not_to include("desktopMobile") + it "includes mobile-global intersection" do + field_names = render_fields.map { |field| field["name"] } + expect(field_names.any? { |name| %w[mobileGlobal globalMobile].include?(name) }).to be true + end - # Should not include us/global combinations (same breakout) - expect(field_names).not_to include("usGlobal") - expect(field_names).not_to include("globalUs") + it "includes desktop-global intersection" do + field_names = render_fields.map { |field| field["name"] } + expect(field_names.any? { |name| %w[desktopGlobal globalDesktop].include?(name) }).to be true + end end - it "includes correct aggregation fields for individual conditions" do - mobile_field = fields.find { |field| field["name"] == "mobile" } - expect(mobile_field["fields"].map { |f| f["name"] }).to include("renderCount", "sequenceSum") + describe "exclusion of invalid intersections" do + it "does not include mobile-desktop intersection" do + field_names = render_fields.map { |field| field["name"] } + expect(field_names).not_to include("mobileDesktop", "desktopMobile") + end + + it "does not include us-global intersection" do + field_names = render_fields.map { |field| field["name"] } + expect(field_names).not_to include("usGlobal", "globalUs") + end end - it "includes correct aggregation fields for intersection conditions" do - # Find an intersection field (using either naming convention) - intersection_field = fields.find do |field| - field["name"] == "mobileUs" || field["name"] == "usMobile" + describe "aggregation fields" do + it "includes correct aggregation fields for individual conditions" do + mobile_field = render_fields.find { |field| field["name"] == "mobile" } + aggregation_names = mobile_field["fields"].map { |f| f["name"] } + expect(aggregation_names).to include("renderCount", "sequenceSum") end - expect(intersection_field).not_to be_nil - expect(intersection_field["fields"].map { |f| f["name"] }).to include("renderCount", "sequenceSum") + it "includes renderCount in intersection fields" do + intersection_field = find_intersection_field + expect(intersection_field["fields"].map { |f| f["name"] }).to include("renderCount") + end + + it "includes sequenceSum in intersection fields" do + intersection_field = find_intersection_field + expect(intersection_field["fields"].map { |f| f["name"] }).to include("sequenceSum") + end + + def find_intersection_field + render_fields.find { |field| field["name"] =~ /mobile.*us|us.*mobile/i } + end end end - context "when conditions are not explicitly defined" do + describe "when conditions are not explicitly defined" do + subject(:conditional_fields) { metric_render_fields } + let(:manifold_yaml) do { "metrics" => { @@ -162,47 +188,52 @@ } end - let(:metrics_fields) { schema_manager.send(:metrics_fields) } - let(:renders_field) { metrics_fields.find { |field| field["name"] == "renders" } } - let(:fields) { renders_field["fields"] } - let(:field_names) { fields.map { |field| field["name"] } } + def metric_render_fields + metrics_fields = schema_manager.send(:metrics_fields) + renders_field = metrics_fields.find { |field| field["name"] == "renders" } + renders_field["fields"] + end it "derives condition fields from breakouts" do - expect(field_names).to include("mobile", "desktop", "us", "global") + condition_names = conditional_fields.map { |field| field["name"] } + expect(condition_names).to include("mobile", "desktop", "us", "global") end - it "still generates intersection fields" do - # Check for either naming convention - mobile_us_present = field_names.include?("mobileUs") || field_names.include?("usMobile") - desktop_us_present = field_names.include?("desktopUs") || field_names.include?("usDesktop") + it "generates mobile-us intersection" do + condition_names = conditional_fields.map { |field| field["name"] } + mobile_us_variants = %w[mobileUs usMobile] + expect(mobile_us_variants.any? { |variant| condition_names.include?(variant) }).to be true + end - expect(mobile_us_present).to be(true) - expect(desktop_us_present).to be(true) + it "generates desktop-us intersection" do + condition_names = conditional_fields.map { |field| field["name"] } + desktop_us_variants = %w[desktopUs usDesktop] + expect(desktop_us_variants.any? { |variant| condition_names.include?(variant) }).to be true end end describe "#write_schemas" do - let(:tables_directory) { Pathname.pwd.join("tables") } + subject(:tables_dir) { Pathname.pwd.join("tables") } before do - tables_directory.mkpath - schema_manager.write_schemas(tables_directory) + tables_dir.mkpath + schema_manager.write_schemas(tables_dir) end it "generates a dimensions schema file" do - expect(tables_directory.join("dimensions.json")).to be_file + expect(tables_dir.join("dimensions.json")).to be_file end it "generates a manifold schema file" do - expect(tables_directory.join("manifold.json")).to be_file + expect(tables_dir.join("manifold.json")).to be_file end it "generates a metrics directory" do - expect(tables_directory.join("metrics")).to be_directory + expect(tables_dir.join("metrics")).to be_directory end it "generates a metrics schema file for each metrics group" do - expect(tables_directory.join("metrics/renders.json")).to be_file + expect(tables_dir.join("metrics/renders.json")).to be_file end end end diff --git a/spec/manifold/api/workspace_spec.rb b/spec/manifold/api/workspace_spec.rb index 19b107a..5a3c996 100644 --- a/spec/manifold/api/workspace_spec.rb +++ b/spec/manifold/api/workspace_spec.rb @@ -162,18 +162,27 @@ expect(schema).to include({ "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }) end - it "sets the dimensions fields" do - # Get dimensions directly since our mock is set up to provide the correct structure - schema = parse_dimensions_schema - dimensions = schema.find { |f| f["name"] == "dimensions" } - user_dimension = dimensions["fields"].find { |f| f["name"] == "user" } + it "includes user_id dimension field" do + user_fields = fetch_user_dimension_fields + expect(user_fields).to include( + { "name" => "user_id", "type" => "STRING", "mode" => "NULLABLE" } + ) + end - expect(user_dimension["fields"]).to include( - { "name" => "user_id", "type" => "STRING", "mode" => "NULLABLE" }, + it "includes email dimension field" do + user_fields = fetch_user_dimension_fields + expect(user_fields).to include( { "name" => "email", "type" => "STRING", "mode" => "NULLABLE" } ) end + def fetch_user_dimension_fields + schema = parse_dimensions_schema + dimensions = schema.find { |f| f["name"] == "dimensions" } + user_dimension = dimensions["fields"].find { |f| f["name"] == "user" } + user_dimension["fields"] + end + it "includes required id field in manifold schema" do expect(schema_fields[:basic]).to include( { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" } @@ -245,23 +254,25 @@ end shared_examples "breakout metrics" do |breakout_name| - let(:breakout) do - schema_fields[:metrics]["fields"] - .find { |f| f["name"] == "taps" }["fields"] - .find { |f| f["name"] == breakout_name } - end - it "includes tapCount metric" do + breakout = find_breakout(breakout_name) expect(breakout["fields"]).to include( { "type" => "INTEGER", "name" => "tapCount", "mode" => "NULLABLE" } ) end it "includes sequenceSum metric" do + breakout = find_breakout(breakout_name) expect(breakout["fields"]).to include( { "type" => "INTEGER", "name" => "sequenceSum", "mode" => "NULLABLE" } ) end + + def find_breakout(name) + schema_fields[:metrics]["fields"] + .find { |f| f["name"] == "taps" }["fields"] + .find { |f| f["name"] == name } + end end include_examples "breakout metrics", "paid" From 75d96df9409116a78d87a0212a2cccef97c7cdaa Mon Sep 17 00:00:00 2001 From: claytongentry Date: Mon, 31 Mar 2025 11:41:13 -0400 Subject: [PATCH 3/8] Strip out legacy nonsense --- lib/manifold/api/schema_manager.rb | 98 ++++------------------------- spec/manifold/api/workspace_spec.rb | 85 +++++++++---------------- 2 files changed, 41 insertions(+), 142 deletions(-) diff --git a/lib/manifold/api/schema_manager.rb b/lib/manifold/api/schema_manager.rb index 7855513..ca4c536 100644 --- a/lib/manifold/api/schema_manager.rb +++ b/lib/manifold/api/schema_manager.rb @@ -128,46 +128,10 @@ def metrics_fields def group_metrics_fields(group_config) return [] unless group_config["aggregations"] - if legacy_format?(group_config) - handle_legacy_format(group_config) - else - handle_modern_format(group_config) - end - end - - def legacy_format?(group_config) - has_complex_operators = group_config["breakouts"]&.values&.any? do |v| - v.is_a?(Hash) && v["operator"] - end + # Generate condition fields + condition_fields = generate_condition_fields(get_conditions_list(group_config), group_config) - !group_config["breakouts"] || has_complex_operators - end - - def handle_legacy_format(group_config) - if group_config["breakouts"]&.values&.any? { |v| v.is_a?(Hash) && v["operator"] } - handle_legacy_breakouts(group_config) - else - handle_no_breakouts(group_config) - end - end - - def handle_no_breakouts(group_config) - # Support for older format where breakouts are direct fields - direct_field_keys = group_config.keys - %w[aggregations source filter] - - direct_field_keys.map do |condition_name| - create_metric_field(condition_name, group_config) - end - end - - def handle_modern_format(group_config) - # Determine conditions list - conditions = get_conditions_list(group_config) - - # Generate individual condition fields - condition_fields = generate_condition_fields(conditions, group_config) - - # Generate intersection fields across different breakout groups + # Generate intersection fields between breakout groups intersection_fields = generate_breakout_intersection_fields(group_config) condition_fields + intersection_fields @@ -181,13 +145,6 @@ def get_conditions_list(group_config) end end - def handle_legacy_breakouts(group_config) - # For legacy format, each key directly in the metrics group is a breakout - group_config["breakouts"].keys.map do |breakout_name| - create_metric_field(breakout_name, group_config) - end - end - def create_metric_field(field_name, group_config) { "name" => field_name, @@ -198,27 +155,16 @@ def create_metric_field(field_name, group_config) end def extract_conditions_from_breakouts(breakouts) - conditions = [] + return [] unless breakouts.is_a?(Hash) - breakouts.each do |breakout_name, breakout_values| - conditions.concat(get_conditions_for_breakout(breakout_name, breakout_values)) + conditions = [] + breakouts.each_value do |breakout_values| + conditions.concat(breakout_values) if breakout_values.is_a?(Array) end - conditions.uniq end - def get_conditions_for_breakout(breakout_name, breakout_values) - if breakout_values.is_a?(Array) - breakout_values - elsif breakout_values.is_a?(Hash) && breakout_values["operator"] - [] # Skip complex operators in the new format - else - [breakout_name] # For string format, use the breakout name as the condition - end - end - def generate_condition_fields(conditions, group_config) - # Add a field for each condition conditions.map do |condition_name| create_metric_field(condition_name, group_config) end @@ -226,19 +172,11 @@ def generate_condition_fields(conditions, group_config) def generate_breakout_intersection_fields(group_config) return [] unless group_config["breakouts"] - return [] if should_skip_intersections?(group_config) + return [] if group_config["breakouts"].keys.size <= 1 generate_intersections(group_config) end - def should_skip_intersections?(group_config) - breakout_groups = group_config["breakouts"].keys - - # Skip if there's only one breakout group or if using legacy format - breakout_groups.size <= 1 || - group_config["breakouts"].values.any? { |v| v.is_a?(Hash) && v["operator"] } - end - def generate_intersections(group_config) intersection_fields = [] breakout_groups = group_config["breakouts"].keys @@ -255,8 +193,8 @@ def generate_intersections(group_config) def generate_intersection_fields_for_pair(group_config, breakout_pair) first_group, second_group = breakout_pair - first_group_conditions = get_breakout_conditions(group_config["breakouts"], first_group) - second_group_conditions = get_breakout_conditions(group_config["breakouts"], second_group) + first_group_conditions = group_config["breakouts"][first_group] + second_group_conditions = group_config["breakouts"][second_group] generate_intersection_combinations( first_group_conditions, @@ -268,6 +206,7 @@ def generate_intersection_fields_for_pair(group_config, breakout_pair) def generate_intersection_combinations(first_conditions, second_conditions, group_config) fields = [] + # Process the intersection combinations first_conditions.each do |first_condition| second_conditions.each do |second_condition| intersection_name = "#{first_condition}#{second_condition.capitalize}" @@ -278,21 +217,6 @@ def generate_intersection_combinations(first_conditions, second_conditions, grou fields end - def get_breakout_conditions(breakouts, breakout_name) - breakout_value = breakouts[breakout_name] - - if breakout_value.is_a?(Array) - # New format: breakout contains array of conditions - breakout_value - elsif breakout_value.is_a?(Hash) && breakout_value["operator"] - # Complex operator breakout - skip for now - [] - else - # Legacy format: breakout name is the condition - [breakout_name] - end - end - def breakout_metrics_fields(group_config) [ *countif_fields(group_config), diff --git a/spec/manifold/api/workspace_spec.rb b/spec/manifold/api/workspace_spec.rb index 5a3c996..f195d76 100644 --- a/spec/manifold/api/workspace_spec.rb +++ b/spec/manifold/api/workspace_spec.rb @@ -102,43 +102,20 @@ interval: DAY metrics: taps: - breakouts: + conditions: paid: IS_PAID(context.location) organic: IS_ORGANIC(context.location) - paidOrganic: - fields: - - paid - - organic - operator: AND - paidOrOrganic: - fields: - - paid - - organic - operator: OR - notPaid: - fields: - - paid - operator: NOT - neitherPaidNorOrganic: - fields: - - paid - - organic - operator: NOR - notBothPaidAndOrganic: - fields: - - paid - - organic - operator: NAND - eitherPaidOrOrganic: - fields: - - paid - - organic - operator: XOR - similarPaidOrganic: - fields: - - paid - - organic - operator: XNOR + us: context.geo.country = 'US' + global: context.geo.country != 'US' + + breakouts: + acquisition: + - paid + - organic + geography: + - us + - global + aggregations: countif: tapCount sumif: @@ -277,26 +254,26 @@ def find_breakout(name) include_examples "breakout metrics", "paid" include_examples "breakout metrics", "organic" - include_examples "breakout metrics", "paidOrganic" - include_examples "breakout metrics", "paidOrOrganic" - include_examples "breakout metrics", "notPaid" - include_examples "breakout metrics", "neitherPaidNorOrganic" - include_examples "breakout metrics", "notBothPaidAndOrganic" - include_examples "breakout metrics", "eitherPaidOrOrganic" - include_examples "breakout metrics", "similarPaidOrganic" - - it "includes all breakouts in the metrics fields" do + include_examples "breakout metrics", "us" + include_examples "breakout metrics", "global" + + # Test intersection fields + include_examples "breakout metrics", "paidUs" + include_examples "breakout metrics", "paidGlobal" + include_examples "breakout metrics", "organicUs" + include_examples "breakout metrics", "organicGlobal" + + it "includes all condition fields and intersection fields in the metrics fields" do expect(schema_fields[:metrics]["fields"] .find { |f| f["name"] == "taps" }["fields"] .map { |f| f["name"] }) - .to match_array(expected_breakout_names) + .to match_array(expected_field_names) end - def expected_breakout_names + def expected_field_names %w[ - paid organic paidOrganic paidOrOrganic notPaid - neitherPaidNorOrganic notBothPaidAndOrganic - eitherPaidOrOrganic similarPaidOrganic + paid organic us global + paidUs paidGlobal organicUs organicGlobal ] end @@ -317,11 +294,6 @@ def parse_manifold_schema JSON.parse(workspace.tables_directory.join("manifold.json").read) end - def get_dimension(field) - dimensions = parse_dimensions_schema.find { |f| f["name"] == "dimensions" } - dimensions["fields"].find { |f| f["name"] == field } - end - def parse_metrics_schema(group_name) JSON.parse(workspace.tables_directory.join("metrics/#{group_name}.json").read) end @@ -424,8 +396,11 @@ def manifold_yaml_content metrics: taps: source: analytics.events - breakouts: + conditions: paid: IS_PAID(context.location) + breakouts: + acquisition: + - paid aggregations: countif: tapCount YAML From 143125a605585a880acbe299c9a66184d76a2c12 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Mon, 31 Mar 2025 11:42:52 -0400 Subject: [PATCH 4/8] Always expect conditions --- lib/manifold/api/schema_manager.rb | 20 +---------- spec/manifold/api/schema_manager_spec.rb | 43 ------------------------ 2 files changed, 1 insertion(+), 62 deletions(-) diff --git a/lib/manifold/api/schema_manager.rb b/lib/manifold/api/schema_manager.rb index ca4c536..5a0efed 100644 --- a/lib/manifold/api/schema_manager.rb +++ b/lib/manifold/api/schema_manager.rb @@ -129,7 +129,7 @@ def group_metrics_fields(group_config) return [] unless group_config["aggregations"] # Generate condition fields - condition_fields = generate_condition_fields(get_conditions_list(group_config), group_config) + condition_fields = generate_condition_fields(group_config["conditions"].keys, group_config) # Generate intersection fields between breakout groups intersection_fields = generate_breakout_intersection_fields(group_config) @@ -137,14 +137,6 @@ def group_metrics_fields(group_config) condition_fields + intersection_fields end - def get_conditions_list(group_config) - if group_config["conditions"] - group_config["conditions"].keys - else - extract_conditions_from_breakouts(group_config["breakouts"]) - end - end - def create_metric_field(field_name, group_config) { "name" => field_name, @@ -154,16 +146,6 @@ def create_metric_field(field_name, group_config) } end - def extract_conditions_from_breakouts(breakouts) - return [] unless breakouts.is_a?(Hash) - - conditions = [] - breakouts.each_value do |breakout_values| - conditions.concat(breakout_values) if breakout_values.is_a?(Array) - end - conditions.uniq - end - def generate_condition_fields(conditions, group_config) conditions.map do |condition_name| create_metric_field(condition_name, group_config) diff --git a/spec/manifold/api/schema_manager_spec.rb b/spec/manifold/api/schema_manager_spec.rb index 77e832f..79a564c 100644 --- a/spec/manifold/api/schema_manager_spec.rb +++ b/spec/manifold/api/schema_manager_spec.rb @@ -169,49 +169,6 @@ def find_intersection_field end end - describe "when conditions are not explicitly defined" do - subject(:conditional_fields) { metric_render_fields } - - let(:manifold_yaml) do - { - "metrics" => { - "renders" => { - "breakouts" => { - "device" => %w[mobile desktop], - "region" => %w[us global] - }, - "aggregations" => { - "countif" => "renderCount" - } - } - } - } - end - - def metric_render_fields - metrics_fields = schema_manager.send(:metrics_fields) - renders_field = metrics_fields.find { |field| field["name"] == "renders" } - renders_field["fields"] - end - - it "derives condition fields from breakouts" do - condition_names = conditional_fields.map { |field| field["name"] } - expect(condition_names).to include("mobile", "desktop", "us", "global") - end - - it "generates mobile-us intersection" do - condition_names = conditional_fields.map { |field| field["name"] } - mobile_us_variants = %w[mobileUs usMobile] - expect(mobile_us_variants.any? { |variant| condition_names.include?(variant) }).to be true - end - - it "generates desktop-us intersection" do - condition_names = conditional_fields.map { |field| field["name"] } - desktop_us_variants = %w[desktopUs usDesktop] - expect(desktop_us_variants.any? { |variant| condition_names.include?(variant) }).to be true - end - end - describe "#write_schemas" do subject(:tables_dir) { Pathname.pwd.join("tables") } From b52ce9c60f384ed0d9a5af706757641d3a3594fb Mon Sep 17 00:00:00 2001 From: claytongentry Date: Mon, 31 Mar 2025 12:17:01 -0400 Subject: [PATCH 5/8] Accommodate > 2 breakouts groups --- lib/manifold/api/schema_manager.rb | 96 ++++++++++++++----- lib/manifold/templates/workspace_template.yml | 3 + lib/manifold/version.rb | 2 +- spec/manifold/api/workspace_spec.rb | 33 ++++++- 4 files changed, 105 insertions(+), 29 deletions(-) diff --git a/lib/manifold/api/schema_manager.rb b/lib/manifold/api/schema_manager.rb index 5a0efed..e255547 100644 --- a/lib/manifold/api/schema_manager.rb +++ b/lib/manifold/api/schema_manager.rb @@ -129,7 +129,7 @@ def group_metrics_fields(group_config) return [] unless group_config["aggregations"] # Generate condition fields - condition_fields = generate_condition_fields(group_config["conditions"].keys, group_config) + condition_fields = generate_condition_fields(get_conditions_list(group_config), group_config) # Generate intersection fields between breakout groups intersection_fields = generate_breakout_intersection_fields(group_config) @@ -137,6 +137,12 @@ def group_metrics_fields(group_config) condition_fields + intersection_fields end + def get_conditions_list(group_config) + return [] unless group_config["conditions"] + + group_config["conditions"].keys + end + def create_metric_field(field_name, group_config) { "name" => field_name, @@ -156,47 +162,85 @@ def generate_breakout_intersection_fields(group_config) return [] unless group_config["breakouts"] return [] if group_config["breakouts"].keys.size <= 1 - generate_intersections(group_config) + generate_all_breakout_combinations(group_config) end - def generate_intersections(group_config) - intersection_fields = [] + def generate_all_breakout_combinations(group_config) + all_intersection_fields = [] breakout_groups = group_config["breakouts"].keys - # Generate all possible combinations of breakout groups - breakout_groups.combination(2).each do |breakout_pair| - fields = generate_intersection_fields_for_pair(group_config, breakout_pair) - intersection_fields.concat(fields) + # Generate combinations of different sizes (2 to n breakout groups) + (2..breakout_groups.size).each do |combination_size| + add_combinations_of_size(combination_size, breakout_groups, group_config, all_intersection_fields) end - intersection_fields + all_intersection_fields end - def generate_intersection_fields_for_pair(group_config, breakout_pair) - first_group, second_group = breakout_pair + def add_combinations_of_size(size, breakout_groups, group_config, all_fields) + breakout_groups.combination(size).each do |breakout_combination| + fields = generate_intersection_fields_for_combination(group_config, breakout_combination) + all_fields.concat(fields) + end + end - first_group_conditions = group_config["breakouts"][first_group] - second_group_conditions = group_config["breakouts"][second_group] + def generate_intersection_fields_for_combination(group_config, breakout_combination) + # Get all conditions from the given breakout groups + condition_sets = breakout_combination.map do |breakout_group| + group_config["breakouts"][breakout_group] + end - generate_intersection_combinations( - first_group_conditions, - second_group_conditions, - group_config - ) + # Generate all combinations of one condition from each breakout group + generate_all_condition_combinations(condition_sets, group_config) end - def generate_intersection_combinations(first_conditions, second_conditions, group_config) - fields = [] + def generate_all_condition_combinations(condition_sets, group_config) + # Start with first breakout group's conditions + combinations = condition_sets.first.map { |condition| [condition] } - # Process the intersection combinations - first_conditions.each do |first_condition| - second_conditions.each do |second_condition| - intersection_name = "#{first_condition}#{second_condition.capitalize}" - fields << create_metric_field(intersection_name, group_config) + # Extend combinations with remaining breakout groups + extended_combinations = extend_combinations_with_remaining_sets(combinations, condition_sets[1..]) + + # Convert combinations to field definitions + create_intersection_fields(extended_combinations, group_config) + end + + def extend_combinations_with_remaining_sets(initial_combinations, remaining_sets) + combinations = initial_combinations + + remaining_sets.each do |conditions| + combinations = extend_combinations_with_conditions(combinations, conditions) + end + + combinations + end + + def extend_combinations_with_conditions(existing_combinations, conditions) + new_combinations = [] + + existing_combinations.each do |existing_combination| + conditions.each do |condition| + new_combinations << (existing_combination + [condition]) end end - fields + new_combinations + end + + def create_intersection_fields(combinations, group_config) + combinations.map do |condition_combination| + # Format name with first condition lowercase, others capitalized + field_name = format_intersection_name(condition_combination) + create_metric_field(field_name, group_config) + end + end + + def format_intersection_name(condition_combination) + name = condition_combination.first + condition_combination[1..].each do |condition| + name += condition.capitalize + end + name end def breakout_metrics_fields(group_config) diff --git a/lib/manifold/templates/workspace_template.yml b/lib/manifold/templates/workspace_template.yml index 513dafa..38803a0 100644 --- a/lib/manifold/templates/workspace_template.yml +++ b/lib/manifold/templates/workspace_template.yml @@ -24,6 +24,9 @@ metrics: device: - mobile - desktop + acquisition: + - organic + - paid region: - us - global diff --git a/lib/manifold/version.rb b/lib/manifold/version.rb index 1dd715f..a0ab856 100644 --- a/lib/manifold/version.rb +++ b/lib/manifold/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module Manifold - VERSION = "0.1.0" + VERSION = "0.2.0" end diff --git a/spec/manifold/api/workspace_spec.rb b/spec/manifold/api/workspace_spec.rb index f195d76..5425363 100644 --- a/spec/manifold/api/workspace_spec.rb +++ b/spec/manifold/api/workspace_spec.rb @@ -107,6 +107,8 @@ organic: IS_ORGANIC(context.location) us: context.geo.country = 'US' global: context.geo.country != 'US' + retargeting: context.campaign_type = 'RETARGETING' + prospecting: context.campaign_type = 'PROSPECTING' breakouts: acquisition: @@ -115,6 +117,9 @@ geography: - us - global + campaign: + - retargeting + - prospecting aggregations: countif: tapCount @@ -256,12 +261,32 @@ def find_breakout(name) include_examples "breakout metrics", "organic" include_examples "breakout metrics", "us" include_examples "breakout metrics", "global" + include_examples "breakout metrics", "retargeting" + include_examples "breakout metrics", "prospecting" - # Test intersection fields + # Test two-way intersection fields include_examples "breakout metrics", "paidUs" include_examples "breakout metrics", "paidGlobal" include_examples "breakout metrics", "organicUs" include_examples "breakout metrics", "organicGlobal" + include_examples "breakout metrics", "paidRetargeting" + include_examples "breakout metrics", "paidProspecting" + include_examples "breakout metrics", "organicRetargeting" + include_examples "breakout metrics", "organicProspecting" + include_examples "breakout metrics", "usRetargeting" + include_examples "breakout metrics", "usProspecting" + include_examples "breakout metrics", "globalRetargeting" + include_examples "breakout metrics", "globalProspecting" + + # Test three-way intersection fields + include_examples "breakout metrics", "paidUsRetargeting" + include_examples "breakout metrics", "paidUsProspecting" + include_examples "breakout metrics", "paidGlobalRetargeting" + include_examples "breakout metrics", "paidGlobalProspecting" + include_examples "breakout metrics", "organicUsRetargeting" + include_examples "breakout metrics", "organicUsProspecting" + include_examples "breakout metrics", "organicGlobalRetargeting" + include_examples "breakout metrics", "organicGlobalProspecting" it "includes all condition fields and intersection fields in the metrics fields" do expect(schema_fields[:metrics]["fields"] @@ -272,8 +297,12 @@ def find_breakout(name) def expected_field_names %w[ - paid organic us global + paid organic us global retargeting prospecting paidUs paidGlobal organicUs organicGlobal + paidRetargeting paidProspecting organicRetargeting organicProspecting + usRetargeting usProspecting globalRetargeting globalProspecting + paidUsRetargeting paidUsProspecting paidGlobalRetargeting paidGlobalProspecting + organicUsRetargeting organicUsProspecting organicGlobalRetargeting organicGlobalProspecting ] end From cbd20f5c660ee8725539e6e8b143b8eceb3d7bd9 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Mon, 31 Mar 2025 12:22:18 -0400 Subject: [PATCH 6/8] reduce --- lib/manifold/api/schema_manager.rb | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/lib/manifold/api/schema_manager.rb b/lib/manifold/api/schema_manager.rb index e255547..a53403f 100644 --- a/lib/manifold/api/schema_manager.rb +++ b/lib/manifold/api/schema_manager.rb @@ -162,19 +162,19 @@ def generate_breakout_intersection_fields(group_config) return [] unless group_config["breakouts"] return [] if group_config["breakouts"].keys.size <= 1 - generate_all_breakout_combinations(group_config) + generate_intersections(group_config) end - def generate_all_breakout_combinations(group_config) - all_intersection_fields = [] + def generate_intersections(group_config) + intersections = [] breakout_groups = group_config["breakouts"].keys # Generate combinations of different sizes (2 to n breakout groups) (2..breakout_groups.size).each do |combination_size| - add_combinations_of_size(combination_size, breakout_groups, group_config, all_intersection_fields) + add_combinations_of_size(combination_size, breakout_groups, group_config, intersections) end - all_intersection_fields + intersections end def add_combinations_of_size(size, breakout_groups, group_config, all_fields) @@ -206,13 +206,9 @@ def generate_all_condition_combinations(condition_sets, group_config) end def extend_combinations_with_remaining_sets(initial_combinations, remaining_sets) - combinations = initial_combinations - - remaining_sets.each do |conditions| - combinations = extend_combinations_with_conditions(combinations, conditions) + remaining_sets.reduce(initial_combinations) do |combinations, conditions| + extend_combinations_with_conditions(combinations, conditions) end - - combinations end def extend_combinations_with_conditions(existing_combinations, conditions) From 1161c4a7acca20b25e2729cef4c36a069e10306f Mon Sep 17 00:00:00 2001 From: claytongentry Date: Mon, 31 Mar 2025 12:23:11 -0400 Subject: [PATCH 7/8] flat_map --- lib/manifold/api/schema_manager.rb | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/lib/manifold/api/schema_manager.rb b/lib/manifold/api/schema_manager.rb index a53403f..fe5c102 100644 --- a/lib/manifold/api/schema_manager.rb +++ b/lib/manifold/api/schema_manager.rb @@ -212,15 +212,9 @@ def extend_combinations_with_remaining_sets(initial_combinations, remaining_sets end def extend_combinations_with_conditions(existing_combinations, conditions) - new_combinations = [] - - existing_combinations.each do |existing_combination| - conditions.each do |condition| - new_combinations << (existing_combination + [condition]) - end + existing_combinations.flat_map do |existing_combination| + conditions.map { |condition| existing_combination + [condition] } end - - new_combinations end def create_intersection_fields(combinations, group_config) From 392fc20bf6365ce6bf971a0256079e2b753c6822 Mon Sep 17 00:00:00 2001 From: claytongentry Date: Mon, 31 Mar 2025 12:24:59 -0400 Subject: [PATCH 8/8] tighten --- lib/manifold/api/schema_manager.rb | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/manifold/api/schema_manager.rb b/lib/manifold/api/schema_manager.rb index fe5c102..f926197 100644 --- a/lib/manifold/api/schema_manager.rb +++ b/lib/manifold/api/schema_manager.rb @@ -166,15 +166,14 @@ def generate_breakout_intersection_fields(group_config) end def generate_intersections(group_config) - intersections = [] breakout_groups = group_config["breakouts"].keys - # Generate combinations of different sizes (2 to n breakout groups) - (2..breakout_groups.size).each do |combination_size| - add_combinations_of_size(combination_size, breakout_groups, group_config, intersections) + # Generate all valid combinations of breakout groups (sizes 2 to n) + (2..breakout_groups.size).flat_map do |size| + breakout_groups.combination(size).flat_map do |combo| + generate_intersection_fields_for_combination(group_config, combo) + end end - - intersections end def add_combinations_of_size(size, breakout_groups, group_config, all_fields)