Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 117 additions & 13 deletions lib/manifold/api/schema_manager.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,18 +57,28 @@ def write_manifold_schema(tables_directory)
def write_metrics_schemas(tables_directory)
return unless @manifold_yaml["metrics"]

# Create metrics subdirectory
create_metrics_directory(tables_directory)
write_individual_metrics_schemas(tables_directory)
end

def create_metrics_directory(tables_directory)
metrics_directory = tables_directory.join("metrics")
metrics_directory.mkpath
end

def write_individual_metrics_schemas(tables_directory)
@manifold_yaml["metrics"].each do |group_name, group_config|
metrics_table_path = metrics_directory.join("#{group_name}.json")
metrics_table_schema = metrics_table_schema(group_name, group_config)
metrics_table_path.write(JSON.pretty_generate(metrics_table_schema).concat("\n"))
@logger.info("Generated metrics table schema for '#{group_name}'.")
write_metrics_group_schema(tables_directory, group_name, group_config)
end
end

def write_metrics_group_schema(tables_directory, group_name, group_config)
metrics_table_path = tables_directory.join("metrics", "#{group_name}.json")
metrics_table_schema = metrics_table_schema(group_name, group_config)
metrics_table_path.write(JSON.pretty_generate(metrics_table_schema).concat("\n"))
@logger.info("Generated metrics table schema for '#{group_name}'.")
end

def metrics_table_schema(group_name, group_config)
[
{ "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
Expand Down Expand Up @@ -116,16 +126,110 @@ def metrics_fields
end

def group_metrics_fields(group_config)
return [] unless group_config["breakouts"] && group_config["aggregations"]
return [] unless group_config["aggregations"]

group_config["breakouts"].map do |breakout_name, _breakout_config|
{
"name" => breakout_name,
"type" => "RECORD",
"mode" => "NULLABLE",
"fields" => breakout_metrics_fields(group_config)
}
# Generate condition fields
condition_fields = generate_condition_fields(get_conditions_list(group_config), group_config)

# Generate intersection fields between breakout groups
intersection_fields = generate_breakout_intersection_fields(group_config)

condition_fields + intersection_fields
end

def get_conditions_list(group_config)
return [] unless group_config["conditions"]

group_config["conditions"].keys
end

def create_metric_field(field_name, group_config)
{
"name" => field_name,
"type" => "RECORD",
"mode" => "NULLABLE",
"fields" => breakout_metrics_fields(group_config)
}
end

def generate_condition_fields(conditions, group_config)
conditions.map do |condition_name|
create_metric_field(condition_name, group_config)
end
end

def generate_breakout_intersection_fields(group_config)
return [] unless group_config["breakouts"]
return [] if group_config["breakouts"].keys.size <= 1

generate_intersections(group_config)
end

def generate_intersections(group_config)
breakout_groups = group_config["breakouts"].keys

# Generate all valid combinations of breakout groups (sizes 2 to n)
(2..breakout_groups.size).flat_map do |size|
breakout_groups.combination(size).flat_map do |combo|
generate_intersection_fields_for_combination(group_config, combo)
end
end
end

def add_combinations_of_size(size, breakout_groups, group_config, all_fields)
breakout_groups.combination(size).each do |breakout_combination|
fields = generate_intersection_fields_for_combination(group_config, breakout_combination)
all_fields.concat(fields)
end
end

def generate_intersection_fields_for_combination(group_config, breakout_combination)
# Get all conditions from the given breakout groups
condition_sets = breakout_combination.map do |breakout_group|
group_config["breakouts"][breakout_group]
end

# Generate all combinations of one condition from each breakout group
generate_all_condition_combinations(condition_sets, group_config)
end

def generate_all_condition_combinations(condition_sets, group_config)
# Start with first breakout group's conditions
combinations = condition_sets.first.map { |condition| [condition] }

# Extend combinations with remaining breakout groups
extended_combinations = extend_combinations_with_remaining_sets(combinations, condition_sets[1..])

# Convert combinations to field definitions
create_intersection_fields(extended_combinations, group_config)
end

def extend_combinations_with_remaining_sets(initial_combinations, remaining_sets)
remaining_sets.reduce(initial_combinations) do |combinations, conditions|
extend_combinations_with_conditions(combinations, conditions)
end
end

def extend_combinations_with_conditions(existing_combinations, conditions)
existing_combinations.flat_map do |existing_combination|
conditions.map { |condition| existing_combination + [condition] }
end
end

def create_intersection_fields(combinations, group_config)
combinations.map do |condition_combination|
# Format name with first condition lowercase, others capitalized
field_name = format_intersection_name(condition_combination)
create_metric_field(field_name, group_config)
end
end

def format_intersection_name(condition_combination)
name = condition_combination.first
condition_combination[1..].each do |condition|
name += condition.capitalize
end
name
end

def breakout_metrics_fields(group_config)
Expand Down
2 changes: 1 addition & 1 deletion lib/manifold/services/vector_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ module Manifold
module Services
# Handles the loading of vector schemas from configuration files
class VectorService
def initialize(logger)
def initialize(logger = nil)
@logger = logger
end

Expand Down
17 changes: 15 additions & 2 deletions lib/manifold/templates/workspace_template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,22 @@ timestamp:

metrics:
renders:
conditions:
mobile: IS_DESKTOP(context.device)
desktop: IS_MOBILE(context.device)
us: context.geo.country = 'US'
global: context.geo.country != 'US'

breakouts:
paid: IS_PAID(context.location)
organic: IS_ORGANIC(context.location)
device:
- mobile
- desktop
acquisition:
- organic
- paid
region:
- us
- global

aggregations:
countif: renderCount
Expand Down
2 changes: 1 addition & 1 deletion lib/manifold/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# frozen_string_literal: true

module Manifold
VERSION = "0.1.0"
VERSION = "0.2.0"
end
196 changes: 196 additions & 0 deletions spec/manifold/api/schema_manager_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
# frozen_string_literal: true

RSpec.describe Manifold::API::SchemaManager do
include FakeFS::SpecHelpers

subject(:schema_manager) { described_class.new(name, vectors, vector_service, manifold_yaml, logger) }

let(:logger) { instance_spy(Logger) }
let(:name) { "test_workspace" }
let(:vectors) { ["TestVector"] }
let(:vector_service) { instance_spy(Manifold::Services::VectorService) }
let(:manifold_yaml) { build_test_manifold_yaml }

before do
# Mock the vector service
allow(vector_service).to receive(:load_vector_schema).and_return(
{ "name" => "test_vector", "type" => "STRING", "mode" => "NULLABLE" }
)
end

# rubocop:disable Metrics/MethodLength
def build_test_manifold_yaml
{
"metrics" => {
"renders" => {
"conditions" => {
"mobile" => "IS_DESKTOP(context.device)",
"desktop" => "IS_MOBILE(context.device)",
"us" => "context.geo.country = 'US'",
"global" => "context.geo.country != 'US'"
},
"breakouts" => {
"device" => %w[mobile desktop],
"region" => %w[us global]
},
"aggregations" => {
"countif" => "renderCount",
"sumif" => {
"sequenceSum" => {
"field" => "context.sequence"
}
}
}
}
}
}
end
# rubocop:enable Metrics/MethodLength

describe "#dimensions_schema" do
subject(:schema) { schema_manager.dimensions_schema }

it "includes required id field" do
expect(schema).to include(
{ "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }
)
end

it "includes dimensions field with RECORD type" do
dimensions_field = schema.find { |f| f["name"] == "dimensions" }
expect(dimensions_field["type"]).to eq("RECORD")
end

it "includes dimensions field with REQUIRED mode" do
dimensions_field = schema.find { |f| f["name"] == "dimensions" }
expect(dimensions_field["mode"]).to eq("REQUIRED")
end
end

describe "#manifold_schema" do
subject(:schema) { schema_manager.manifold_schema }

it "includes required id field" do
expect(schema).to include(
{ "type" => "STRING", "name" => "id", "mode" => "REQUIRED" }
)
end

it "includes required timestamp field" do
expect(schema).to include(
{ "type" => "TIMESTAMP", "name" => "timestamp", "mode" => "REQUIRED" }
)
end

it "includes dimensions field with RECORD type" do
dimensions_field = schema.find { |f| f["name"] == "dimensions" }
expect(dimensions_field["type"]).to eq("RECORD")
end

it "includes metrics field with RECORD type" do
metrics_field = schema.find { |f| f["name"] == "metrics" }
expect(metrics_field["type"]).to eq("RECORD")
end
end

describe "#metrics_fields" do
# Using a simple helper method to clean up the spec and reduce memoized variables
def render_fields
metrics_fields = schema_manager.send(:metrics_fields)
renders_field = metrics_fields.find { |field| field["name"] == "renders" }
renders_field["fields"]
end

it "includes the renders group field" do
metrics_fields = schema_manager.send(:metrics_fields)
renders_field = metrics_fields.find { |field| field["name"] == "renders" }
expect(renders_field).not_to be_nil
end

it "includes all individual condition fields" do
field_names = render_fields.map { |field| field["name"] }
expect(field_names).to include("mobile", "desktop", "us", "global")
end

describe "intersection fields" do
it "includes mobile-us intersection" do
field_names = render_fields.map { |field| field["name"] }
expect(field_names.any? { |name| %w[mobileUs usMobile].include?(name) }).to be true
end

it "includes desktop-us intersection" do
field_names = render_fields.map { |field| field["name"] }
expect(field_names.any? { |name| %w[desktopUs usDesktop].include?(name) }).to be true
end

it "includes mobile-global intersection" do
field_names = render_fields.map { |field| field["name"] }
expect(field_names.any? { |name| %w[mobileGlobal globalMobile].include?(name) }).to be true
end

it "includes desktop-global intersection" do
field_names = render_fields.map { |field| field["name"] }
expect(field_names.any? { |name| %w[desktopGlobal globalDesktop].include?(name) }).to be true
end
end

describe "exclusion of invalid intersections" do
it "does not include mobile-desktop intersection" do
field_names = render_fields.map { |field| field["name"] }
expect(field_names).not_to include("mobileDesktop", "desktopMobile")
end

it "does not include us-global intersection" do
field_names = render_fields.map { |field| field["name"] }
expect(field_names).not_to include("usGlobal", "globalUs")
end
end

describe "aggregation fields" do
it "includes correct aggregation fields for individual conditions" do
mobile_field = render_fields.find { |field| field["name"] == "mobile" }
aggregation_names = mobile_field["fields"].map { |f| f["name"] }
expect(aggregation_names).to include("renderCount", "sequenceSum")
end

it "includes renderCount in intersection fields" do
intersection_field = find_intersection_field
expect(intersection_field["fields"].map { |f| f["name"] }).to include("renderCount")
end

it "includes sequenceSum in intersection fields" do
intersection_field = find_intersection_field
expect(intersection_field["fields"].map { |f| f["name"] }).to include("sequenceSum")
end

def find_intersection_field
render_fields.find { |field| field["name"] =~ /mobile.*us|us.*mobile/i }
end
end
end

describe "#write_schemas" do
subject(:tables_dir) { Pathname.pwd.join("tables") }

before do
tables_dir.mkpath
schema_manager.write_schemas(tables_dir)
end

it "generates a dimensions schema file" do
expect(tables_dir.join("dimensions.json")).to be_file
end

it "generates a manifold schema file" do
expect(tables_dir.join("manifold.json")).to be_file
end

it "generates a metrics directory" do
expect(tables_dir.join("metrics")).to be_directory
end

it "generates a metrics schema file for each metrics group" do
expect(tables_dir.join("metrics/renders.json")).to be_file
end
end
end
Loading