diff --git a/lib/manifold/api/schema_manager.rb b/lib/manifold/api/schema_manager.rb index dc7dcb6..f926197 100644 --- a/lib/manifold/api/schema_manager.rb +++ b/lib/manifold/api/schema_manager.rb @@ -57,18 +57,28 @@ def write_manifold_schema(tables_directory) def write_metrics_schemas(tables_directory) return unless @manifold_yaml["metrics"] - # Create metrics subdirectory + create_metrics_directory(tables_directory) + write_individual_metrics_schemas(tables_directory) + end + + def create_metrics_directory(tables_directory) metrics_directory = tables_directory.join("metrics") metrics_directory.mkpath + end + def write_individual_metrics_schemas(tables_directory) @manifold_yaml["metrics"].each do |group_name, group_config| - metrics_table_path = metrics_directory.join("#{group_name}.json") - metrics_table_schema = metrics_table_schema(group_name, group_config) - metrics_table_path.write(JSON.pretty_generate(metrics_table_schema).concat("\n")) - @logger.info("Generated metrics table schema for '#{group_name}'.") + write_metrics_group_schema(tables_directory, group_name, group_config) end end + def write_metrics_group_schema(tables_directory, group_name, group_config) + metrics_table_path = tables_directory.join("metrics", "#{group_name}.json") + metrics_table_schema = metrics_table_schema(group_name, group_config) + metrics_table_path.write(JSON.pretty_generate(metrics_table_schema).concat("\n")) + @logger.info("Generated metrics table schema for '#{group_name}'.") + end + def metrics_table_schema(group_name, group_config) [ { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }, @@ -116,16 +126,110 @@ def metrics_fields end def group_metrics_fields(group_config) - return [] unless group_config["breakouts"] && group_config["aggregations"] + return [] unless group_config["aggregations"] - group_config["breakouts"].map do |breakout_name, _breakout_config| - { - "name" => breakout_name, - "type" => "RECORD", - "mode" => "NULLABLE", - "fields" => breakout_metrics_fields(group_config) - } + # Generate condition fields + condition_fields = generate_condition_fields(get_conditions_list(group_config), group_config) + + # Generate intersection fields between breakout groups + intersection_fields = generate_breakout_intersection_fields(group_config) + + condition_fields + intersection_fields + end + + def get_conditions_list(group_config) + return [] unless group_config["conditions"] + + group_config["conditions"].keys + end + + def create_metric_field(field_name, group_config) + { + "name" => field_name, + "type" => "RECORD", + "mode" => "NULLABLE", + "fields" => breakout_metrics_fields(group_config) + } + end + + def generate_condition_fields(conditions, group_config) + conditions.map do |condition_name| + create_metric_field(condition_name, group_config) + end + end + + def generate_breakout_intersection_fields(group_config) + return [] unless group_config["breakouts"] + return [] if group_config["breakouts"].keys.size <= 1 + + generate_intersections(group_config) + end + + def generate_intersections(group_config) + breakout_groups = group_config["breakouts"].keys + + # Generate all valid combinations of breakout groups (sizes 2 to n) + (2..breakout_groups.size).flat_map do |size| + breakout_groups.combination(size).flat_map do |combo| + generate_intersection_fields_for_combination(group_config, combo) + end + end + end + + def add_combinations_of_size(size, breakout_groups, group_config, all_fields) + breakout_groups.combination(size).each do |breakout_combination| + fields = generate_intersection_fields_for_combination(group_config, breakout_combination) + all_fields.concat(fields) + end + end + + def generate_intersection_fields_for_combination(group_config, breakout_combination) + # Get all conditions from the given breakout groups + condition_sets = breakout_combination.map do |breakout_group| + group_config["breakouts"][breakout_group] + end + + # Generate all combinations of one condition from each breakout group + generate_all_condition_combinations(condition_sets, group_config) + end + + def generate_all_condition_combinations(condition_sets, group_config) + # Start with first breakout group's conditions + combinations = condition_sets.first.map { |condition| [condition] } + + # Extend combinations with remaining breakout groups + extended_combinations = extend_combinations_with_remaining_sets(combinations, condition_sets[1..]) + + # Convert combinations to field definitions + create_intersection_fields(extended_combinations, group_config) + end + + def extend_combinations_with_remaining_sets(initial_combinations, remaining_sets) + remaining_sets.reduce(initial_combinations) do |combinations, conditions| + extend_combinations_with_conditions(combinations, conditions) + end + end + + def extend_combinations_with_conditions(existing_combinations, conditions) + existing_combinations.flat_map do |existing_combination| + conditions.map { |condition| existing_combination + [condition] } + end + end + + def create_intersection_fields(combinations, group_config) + combinations.map do |condition_combination| + # Format name with first condition lowercase, others capitalized + field_name = format_intersection_name(condition_combination) + create_metric_field(field_name, group_config) + end + end + + def format_intersection_name(condition_combination) + name = condition_combination.first + condition_combination[1..].each do |condition| + name += condition.capitalize end + name end def breakout_metrics_fields(group_config) diff --git a/lib/manifold/services/vector_service.rb b/lib/manifold/services/vector_service.rb index 9d8b646..2c2e521 100644 --- a/lib/manifold/services/vector_service.rb +++ b/lib/manifold/services/vector_service.rb @@ -4,7 +4,7 @@ module Manifold module Services # Handles the loading of vector schemas from configuration files class VectorService - def initialize(logger) + def initialize(logger = nil) @logger = logger end diff --git a/lib/manifold/templates/workspace_template.yml b/lib/manifold/templates/workspace_template.yml index d2a54ce..38803a0 100644 --- a/lib/manifold/templates/workspace_template.yml +++ b/lib/manifold/templates/workspace_template.yml @@ -14,9 +14,22 @@ timestamp: metrics: renders: + conditions: + mobile: IS_DESKTOP(context.device) + desktop: IS_MOBILE(context.device) + us: context.geo.country = 'US' + global: context.geo.country != 'US' + breakouts: - paid: IS_PAID(context.location) - organic: IS_ORGANIC(context.location) + device: + - mobile + - desktop + acquisition: + - organic + - paid + region: + - us + - global aggregations: countif: renderCount diff --git a/lib/manifold/version.rb b/lib/manifold/version.rb index 1dd715f..a0ab856 100644 --- a/lib/manifold/version.rb +++ b/lib/manifold/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module Manifold - VERSION = "0.1.0" + VERSION = "0.2.0" end diff --git a/spec/manifold/api/schema_manager_spec.rb b/spec/manifold/api/schema_manager_spec.rb new file mode 100644 index 0000000..79a564c --- /dev/null +++ b/spec/manifold/api/schema_manager_spec.rb @@ -0,0 +1,196 @@ +# frozen_string_literal: true + +RSpec.describe Manifold::API::SchemaManager do + include FakeFS::SpecHelpers + + subject(:schema_manager) { described_class.new(name, vectors, vector_service, manifold_yaml, logger) } + + let(:logger) { instance_spy(Logger) } + let(:name) { "test_workspace" } + let(:vectors) { ["TestVector"] } + let(:vector_service) { instance_spy(Manifold::Services::VectorService) } + let(:manifold_yaml) { build_test_manifold_yaml } + + before do + # Mock the vector service + allow(vector_service).to receive(:load_vector_schema).and_return( + { "name" => "test_vector", "type" => "STRING", "mode" => "NULLABLE" } + ) + end + + # rubocop:disable Metrics/MethodLength + def build_test_manifold_yaml + { + "metrics" => { + "renders" => { + "conditions" => { + "mobile" => "IS_DESKTOP(context.device)", + "desktop" => "IS_MOBILE(context.device)", + "us" => "context.geo.country = 'US'", + "global" => "context.geo.country != 'US'" + }, + "breakouts" => { + "device" => %w[mobile desktop], + "region" => %w[us global] + }, + "aggregations" => { + "countif" => "renderCount", + "sumif" => { + "sequenceSum" => { + "field" => "context.sequence" + } + } + } + } + } + } + end + # rubocop:enable Metrics/MethodLength + + describe "#dimensions_schema" do + subject(:schema) { schema_manager.dimensions_schema } + + it "includes required id field" do + expect(schema).to include( + { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" } + ) + end + + it "includes dimensions field with RECORD type" do + dimensions_field = schema.find { |f| f["name"] == "dimensions" } + expect(dimensions_field["type"]).to eq("RECORD") + end + + it "includes dimensions field with REQUIRED mode" do + dimensions_field = schema.find { |f| f["name"] == "dimensions" } + expect(dimensions_field["mode"]).to eq("REQUIRED") + end + end + + describe "#manifold_schema" do + subject(:schema) { schema_manager.manifold_schema } + + it "includes required id field" do + expect(schema).to include( + { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" } + ) + end + + it "includes required timestamp field" do + expect(schema).to include( + { "type" => "TIMESTAMP", "name" => "timestamp", "mode" => "REQUIRED" } + ) + end + + it "includes dimensions field with RECORD type" do + dimensions_field = schema.find { |f| f["name"] == "dimensions" } + expect(dimensions_field["type"]).to eq("RECORD") + end + + it "includes metrics field with RECORD type" do + metrics_field = schema.find { |f| f["name"] == "metrics" } + expect(metrics_field["type"]).to eq("RECORD") + end + end + + describe "#metrics_fields" do + # Using a simple helper method to clean up the spec and reduce memoized variables + def render_fields + metrics_fields = schema_manager.send(:metrics_fields) + renders_field = metrics_fields.find { |field| field["name"] == "renders" } + renders_field["fields"] + end + + it "includes the renders group field" do + metrics_fields = schema_manager.send(:metrics_fields) + renders_field = metrics_fields.find { |field| field["name"] == "renders" } + expect(renders_field).not_to be_nil + end + + it "includes all individual condition fields" do + field_names = render_fields.map { |field| field["name"] } + expect(field_names).to include("mobile", "desktop", "us", "global") + end + + describe "intersection fields" do + it "includes mobile-us intersection" do + field_names = render_fields.map { |field| field["name"] } + expect(field_names.any? { |name| %w[mobileUs usMobile].include?(name) }).to be true + end + + it "includes desktop-us intersection" do + field_names = render_fields.map { |field| field["name"] } + expect(field_names.any? { |name| %w[desktopUs usDesktop].include?(name) }).to be true + end + + it "includes mobile-global intersection" do + field_names = render_fields.map { |field| field["name"] } + expect(field_names.any? { |name| %w[mobileGlobal globalMobile].include?(name) }).to be true + end + + it "includes desktop-global intersection" do + field_names = render_fields.map { |field| field["name"] } + expect(field_names.any? { |name| %w[desktopGlobal globalDesktop].include?(name) }).to be true + end + end + + describe "exclusion of invalid intersections" do + it "does not include mobile-desktop intersection" do + field_names = render_fields.map { |field| field["name"] } + expect(field_names).not_to include("mobileDesktop", "desktopMobile") + end + + it "does not include us-global intersection" do + field_names = render_fields.map { |field| field["name"] } + expect(field_names).not_to include("usGlobal", "globalUs") + end + end + + describe "aggregation fields" do + it "includes correct aggregation fields for individual conditions" do + mobile_field = render_fields.find { |field| field["name"] == "mobile" } + aggregation_names = mobile_field["fields"].map { |f| f["name"] } + expect(aggregation_names).to include("renderCount", "sequenceSum") + end + + it "includes renderCount in intersection fields" do + intersection_field = find_intersection_field + expect(intersection_field["fields"].map { |f| f["name"] }).to include("renderCount") + end + + it "includes sequenceSum in intersection fields" do + intersection_field = find_intersection_field + expect(intersection_field["fields"].map { |f| f["name"] }).to include("sequenceSum") + end + + def find_intersection_field + render_fields.find { |field| field["name"] =~ /mobile.*us|us.*mobile/i } + end + end + end + + describe "#write_schemas" do + subject(:tables_dir) { Pathname.pwd.join("tables") } + + before do + tables_dir.mkpath + schema_manager.write_schemas(tables_dir) + end + + it "generates a dimensions schema file" do + expect(tables_dir.join("dimensions.json")).to be_file + end + + it "generates a manifold schema file" do + expect(tables_dir.join("manifold.json")).to be_file + end + + it "generates a metrics directory" do + expect(tables_dir.join("metrics")).to be_directory + end + + it "generates a metrics schema file for each metrics group" do + expect(tables_dir.join("metrics/renders.json")).to be_file + end + end +end diff --git a/spec/manifold/api/workspace_spec.rb b/spec/manifold/api/workspace_spec.rb index 5822fda..5425363 100644 --- a/spec/manifold/api/workspace_spec.rb +++ b/spec/manifold/api/workspace_spec.rb @@ -68,7 +68,24 @@ } end + let(:vector_service) { instance_double(Manifold::Services::VectorService) } + before do + # Use mock for VectorService + allow(Manifold::Services::VectorService).to receive(:new).and_return(vector_service) + + # Mock successful vector schema loading + allow(vector_service).to receive(:load_vector_schema).with("User").and_return( + { + "name" => "user", + "type" => "RECORD", + "fields" => [ + { "name" => "user_id", "type" => "STRING", "mode" => "NULLABLE" }, + { "name" => "email", "type" => "STRING", "mode" => "NULLABLE" } + ] + } + ) + Pathname.pwd.join("vectors").mkpath Pathname.pwd.join("vectors", "user.yml").write(<<~YAML) attributes: @@ -85,43 +102,25 @@ interval: DAY metrics: taps: - breakouts: + conditions: paid: IS_PAID(context.location) organic: IS_ORGANIC(context.location) - paidOrganic: - fields: - - paid - - organic - operator: AND - paidOrOrganic: - fields: - - paid - - organic - operator: OR - notPaid: - fields: - - paid - operator: NOT - neitherPaidNorOrganic: - fields: - - paid - - organic - operator: NOR - notBothPaidAndOrganic: - fields: - - paid - - organic - operator: NAND - eitherPaidOrOrganic: - fields: - - paid - - organic - operator: XOR - similarPaidOrganic: - fields: - - paid - - organic - operator: XNOR + us: context.geo.country = 'US' + global: context.geo.country != 'US' + retargeting: context.campaign_type = 'RETARGETING' + prospecting: context.campaign_type = 'PROSPECTING' + + breakouts: + acquisition: + - paid + - organic + geography: + - us + - global + campaign: + - retargeting + - prospecting + aggregations: countif: tapCount sumif: @@ -145,13 +144,27 @@ expect(schema).to include({ "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }) end - it "sets the dimensions fields" do - expect(get_dimension("user")["fields"]).to include( - { "type" => "STRING", "name" => "user_id", "mode" => "NULLABLE" }, - { "type" => "STRING", "name" => "email", "mode" => "NULLABLE" } + it "includes user_id dimension field" do + user_fields = fetch_user_dimension_fields + expect(user_fields).to include( + { "name" => "user_id", "type" => "STRING", "mode" => "NULLABLE" } ) end + it "includes email dimension field" do + user_fields = fetch_user_dimension_fields + expect(user_fields).to include( + { "name" => "email", "type" => "STRING", "mode" => "NULLABLE" } + ) + end + + def fetch_user_dimension_fields + schema = parse_dimensions_schema + dimensions = schema.find { |f| f["name"] == "dimensions" } + user_dimension = dimensions["fields"].find { |f| f["name"] == "user" } + user_dimension["fields"] + end + it "includes required id field in manifold schema" do expect(schema_fields[:basic]).to include( { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" } @@ -223,47 +236,73 @@ end shared_examples "breakout metrics" do |breakout_name| - let(:breakout) do - schema_fields[:metrics]["fields"] - .find { |f| f["name"] == "taps" }["fields"] - .find { |f| f["name"] == breakout_name } - end - it "includes tapCount metric" do + breakout = find_breakout(breakout_name) expect(breakout["fields"]).to include( { "type" => "INTEGER", "name" => "tapCount", "mode" => "NULLABLE" } ) end it "includes sequenceSum metric" do + breakout = find_breakout(breakout_name) expect(breakout["fields"]).to include( { "type" => "INTEGER", "name" => "sequenceSum", "mode" => "NULLABLE" } ) end + + def find_breakout(name) + schema_fields[:metrics]["fields"] + .find { |f| f["name"] == "taps" }["fields"] + .find { |f| f["name"] == name } + end end include_examples "breakout metrics", "paid" include_examples "breakout metrics", "organic" - include_examples "breakout metrics", "paidOrganic" - include_examples "breakout metrics", "paidOrOrganic" - include_examples "breakout metrics", "notPaid" - include_examples "breakout metrics", "neitherPaidNorOrganic" - include_examples "breakout metrics", "notBothPaidAndOrganic" - include_examples "breakout metrics", "eitherPaidOrOrganic" - include_examples "breakout metrics", "similarPaidOrganic" - - it "includes all breakouts in the metrics fields" do + include_examples "breakout metrics", "us" + include_examples "breakout metrics", "global" + include_examples "breakout metrics", "retargeting" + include_examples "breakout metrics", "prospecting" + + # Test two-way intersection fields + include_examples "breakout metrics", "paidUs" + include_examples "breakout metrics", "paidGlobal" + include_examples "breakout metrics", "organicUs" + include_examples "breakout metrics", "organicGlobal" + include_examples "breakout metrics", "paidRetargeting" + include_examples "breakout metrics", "paidProspecting" + include_examples "breakout metrics", "organicRetargeting" + include_examples "breakout metrics", "organicProspecting" + include_examples "breakout metrics", "usRetargeting" + include_examples "breakout metrics", "usProspecting" + include_examples "breakout metrics", "globalRetargeting" + include_examples "breakout metrics", "globalProspecting" + + # Test three-way intersection fields + include_examples "breakout metrics", "paidUsRetargeting" + include_examples "breakout metrics", "paidUsProspecting" + include_examples "breakout metrics", "paidGlobalRetargeting" + include_examples "breakout metrics", "paidGlobalProspecting" + include_examples "breakout metrics", "organicUsRetargeting" + include_examples "breakout metrics", "organicUsProspecting" + include_examples "breakout metrics", "organicGlobalRetargeting" + include_examples "breakout metrics", "organicGlobalProspecting" + + it "includes all condition fields and intersection fields in the metrics fields" do expect(schema_fields[:metrics]["fields"] .find { |f| f["name"] == "taps" }["fields"] .map { |f| f["name"] }) - .to match_array(expected_breakout_names) + .to match_array(expected_field_names) end - def expected_breakout_names + def expected_field_names %w[ - paid organic paidOrganic paidOrOrganic notPaid - neitherPaidNorOrganic notBothPaidAndOrganic - eitherPaidOrOrganic similarPaidOrganic + paid organic us global retargeting prospecting + paidUs paidGlobal organicUs organicGlobal + paidRetargeting paidProspecting organicRetargeting organicProspecting + usRetargeting usProspecting globalRetargeting globalProspecting + paidUsRetargeting paidUsProspecting paidGlobalRetargeting paidGlobalProspecting + organicUsRetargeting organicUsProspecting organicGlobalRetargeting organicGlobalProspecting ] end @@ -284,11 +323,6 @@ def parse_manifold_schema JSON.parse(workspace.tables_directory.join("manifold.json").read) end - def get_dimension(field) - dimensions = parse_dimensions_schema.find { |f| f["name"] == "dimensions" } - dimensions["fields"].find { |f| f["name"] == field } - end - def parse_metrics_schema(group_name) JSON.parse(workspace.tables_directory.join("metrics/#{group_name}.json").read) end @@ -391,8 +425,11 @@ def manifold_yaml_content metrics: taps: source: analytics.events - breakouts: + conditions: paid: IS_PAID(context.location) + breakouts: + acquisition: + - paid aggregations: countif: tapCount YAML