From f516013db6f89607d53e36d0fe1624562331b4b3 Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Mon, 24 Nov 2025 14:53:16 -0500 Subject: [PATCH 01/21] feat:Implement single column aggregations, no support for group by yet --- src/Backend/opti-sql-go/Expr/expr.go | 1 - .../opti-sql-go/operators/aggr/avgExec.go | 1 - .../operators/aggr/avgExec_test.go | 7 - .../opti-sql-go/operators/aggr/basicAggr.go | 5 - .../operators/aggr/basicAggr_test.go | 7 - .../opti-sql-go/operators/aggr/singleAggr.go | 266 +++++++++ .../operators/aggr/singleAggr_test.go | 529 ++++++++++++++++++ src/Backend/opti-sql-go/operators/aggr/sum.go | 1 - .../opti-sql-go/operators/aggr/sum_test.go | 7 - 9 files changed, 795 insertions(+), 29 deletions(-) delete mode 100644 src/Backend/opti-sql-go/operators/aggr/avgExec.go delete mode 100644 src/Backend/opti-sql-go/operators/aggr/avgExec_test.go delete mode 100644 src/Backend/opti-sql-go/operators/aggr/basicAggr.go delete mode 100644 src/Backend/opti-sql-go/operators/aggr/basicAggr_test.go create mode 100644 src/Backend/opti-sql-go/operators/aggr/singleAggr.go create mode 100644 src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go delete mode 100644 src/Backend/opti-sql-go/operators/aggr/sum.go delete mode 100644 src/Backend/opti-sql-go/operators/aggr/sum_test.go diff --git a/src/Backend/opti-sql-go/Expr/expr.go b/src/Backend/opti-sql-go/Expr/expr.go index b3eed34..4ae10bb 100644 --- a/src/Backend/opti-sql-go/Expr/expr.go +++ b/src/Backend/opti-sql-go/Expr/expr.go @@ -602,7 +602,6 @@ func EvalCast(c *CastExpr, batch *operators.RecordBatch) (arrow.Array, error) { castOpts := compute.SafeCastOptions(c.TargetType) out, err := compute.CastArray(context.TODO(), arr, castOpts) if err != nil { - // This is a runtime cast error return nil, fmt.Errorf("cast error: cannot cast %s to %s: %w", arr.DataType(), c.TargetType, err) } diff --git a/src/Backend/opti-sql-go/operators/aggr/avgExec.go b/src/Backend/opti-sql-go/operators/aggr/avgExec.go deleted file mode 100644 index abd1ad5..0000000 --- a/src/Backend/opti-sql-go/operators/aggr/avgExec.go +++ /dev/null @@ -1 +0,0 @@ -package aggr diff --git a/src/Backend/opti-sql-go/operators/aggr/avgExec_test.go b/src/Backend/opti-sql-go/operators/aggr/avgExec_test.go deleted file mode 100644 index 67671d0..0000000 --- a/src/Backend/opti-sql-go/operators/aggr/avgExec_test.go +++ /dev/null @@ -1,7 +0,0 @@ -package aggr - -import "testing" - -func TestAvgExec(t *testing.T) { - // Simple passing test -} diff --git a/src/Backend/opti-sql-go/operators/aggr/basicAggr.go b/src/Backend/opti-sql-go/operators/aggr/basicAggr.go deleted file mode 100644 index 0ffa1f3..0000000 --- a/src/Backend/opti-sql-go/operators/aggr/basicAggr.go +++ /dev/null @@ -1,5 +0,0 @@ -package aggr - -// Min -//Max -//Count diff --git a/src/Backend/opti-sql-go/operators/aggr/basicAggr_test.go b/src/Backend/opti-sql-go/operators/aggr/basicAggr_test.go deleted file mode 100644 index 7a59206..0000000 --- a/src/Backend/opti-sql-go/operators/aggr/basicAggr_test.go +++ /dev/null @@ -1,7 +0,0 @@ -package aggr - -import "testing" - -func TestBasicAggr(t *testing.T) { - // Simple passing test -} diff --git a/src/Backend/opti-sql-go/operators/aggr/singleAggr.go b/src/Backend/opti-sql-go/operators/aggr/singleAggr.go new file mode 100644 index 0000000..f59da08 --- /dev/null +++ b/src/Backend/opti-sql-go/operators/aggr/singleAggr.go @@ -0,0 +1,266 @@ +package aggr + +import ( + "context" + "errors" + "fmt" + "io" + "opti-sql-go/Expr" + "opti-sql-go/operators" + + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/compute" +) + +// TODO: next steps are to deal with group by statments but that can be dealt with after basic aggr that return just 1 global value +var ( + ErrUnsupportedAggrFunc = func(aggr int) error { + return fmt.Errorf("%d is an unsupported aggregate function", aggr) + } + ErrInvalidAggrColumnType = func(value any) error { + return fmt.Errorf("%v of type %T cannot be cast to float64 so it is not a valid column type to aggragate on", value, value) + } +) + +type AggrFunc int + +const ( + Min AggrFunc = iota + Max + Count + Sum + Avg +) + +var ( + _ = (Accumulator)(&MinAggrAccumulator{}) + _ = (Accumulator)(&MaxAggrAccumulator{}) + _ = (Accumulator)(&CountAggrAccumulator{}) + _ = (Accumulator)(&SumAggrAccumulator{}) + _ = (Accumulator)(&AvgAggrAccumulator{}) + _ = (operators.Operator)(&AggrExec{}) +) + +// Min +//Max +//Count +// Sum +// Avg + +// for now just focus on single-column aggregation without group by +type AggregateFunctions struct { + AggrFunc AggrFunc // switch to deal with seperate aggregation functions + Child Expr.Expression // resolves to a column generally +} +type Accumulator interface { + Update(value float64) + Finalize() float64 +} + +func newMinAggr() Accumulator { + return &MinAggrAccumulator{} +} + +type MinAggrAccumulator struct { + minV float64 + firstValue bool +} + +func (m *MinAggrAccumulator) Update(value float64) { + if !m.firstValue { + m.minV = value + m.firstValue = true + return + } + m.minV = min(m.minV, value) + +} +func (m *MinAggrAccumulator) Finalize() float64 { return m.minV } +func newMaxAggr() Accumulator { + return &MaxAggrAccumulator{} +} + +type MaxAggrAccumulator struct { + maxV float64 + firstValue bool +} + +func (m *MaxAggrAccumulator) Update(value float64) { + if !m.firstValue { + m.maxV = value + m.firstValue = true + return + } + m.maxV = max(m.maxV, value) +} +func (m *MaxAggrAccumulator) Finalize() float64 { return m.maxV } + +func NewCountAggr() Accumulator { + return &CountAggrAccumulator{} +} + +type CountAggrAccumulator struct { + count float64 +} + +func (c *CountAggrAccumulator) Update(_ float64) { + c.count++ +} +func (c *CountAggrAccumulator) Finalize() float64 { return c.count } + +func NewSumAggr() Accumulator { + return &SumAggrAccumulator{} +} + +type SumAggrAccumulator struct { + summation float64 +} + +func (s *SumAggrAccumulator) Update(value float64) { + s.summation += value +} +func (s *SumAggrAccumulator) Finalize() float64 { return s.summation } +func newAvgAggr() Accumulator { + return &AvgAggrAccumulator{} +} + +type AvgAggrAccumulator struct { + values float64 + count float64 +} + +func (a *AvgAggrAccumulator) Update(value float64) { + a.values += value + a.count++ +} +func (a *AvgAggrAccumulator) Finalize() float64 { return float64(a.values / a.count) } + +// =================== +// Aggregator Operator +// =================== +type AggrExec struct { + child operators.Operator // child operator + schema *arrow.Schema // output schema + aggExpressions []AggregateFunctions // list of wanted aggregate expressions + accumulators []Accumulator // list of accumulators corresponding to aggExpressions, these will actually work to compute the aggregation + done bool // know when to return io.EOF +} + +func NewAggrExec(child operators.Operator, aggExprs []AggregateFunctions) (*AggrExec, error) { + accs := make([]Accumulator, len(aggExprs)) + fields := make([]arrow.Field, len(aggExprs)) + for i, agg := range aggExprs { + dt, err := Expr.ExprDataType(agg.Child, child.Schema()) + if err != nil || !validAggrType(dt) { + return nil, ErrInvalidAggrColumnType(dt) + } + var fieldName string + switch agg.AggrFunc { + case Min: + fieldName = fmt.Sprintf("min_%s", agg.Child.String()) + accs[i] = newMinAggr() + case Max: + fieldName = fmt.Sprintf("max_%s", agg.Child.String()) + accs[i] = newMaxAggr() + case Count: + fieldName = fmt.Sprintf("count_%s", agg.Child.String()) + accs[i] = NewCountAggr() + case Sum: + fieldName = fmt.Sprintf("sum_%s", agg.Child.String()) + accs[i] = NewSumAggr() + case Avg: + fieldName = fmt.Sprintf("avg_%s", agg.Child.String()) + accs[i] = newAvgAggr() + + default: + return nil, ErrUnsupportedAggrFunc(int(agg.AggrFunc)) + } + fields[i] = arrow.Field{ + Name: fieldName, + Type: arrow.PrimitiveTypes.Float64, + Nullable: true, + } + } + return &AggrExec{ + child: child, + schema: arrow.NewSchema(fields, nil), + aggExpressions: aggExprs, + accumulators: accs, + }, nil +} + +// check for io.EOF with flag +// read in all record batches +// for each batch, run Expr.Evaluate, to get the column you want for the expression (cast to float64) +// +// for each element of that column grab the values you want using the accumulator interface +// +// build output batch, for now its just 1 of everything straight forward +func (a *AggrExec) Next(n uint16) (*operators.RecordBatch, error) { + if a.done { + return nil, io.EOF + } + for { + childBatch, err := a.child.Next(n) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, err + } + for i, aggExpr := range a.aggExpressions { + agrArray, err := Expr.EvalExpression(aggExpr.Child, childBatch) + if err != nil { + return nil, err + } + agrArray, err = castArrayToFloat64(agrArray) + if err != nil { + return nil, err + } + valueArray := agrArray.(*array.Float64) + accumulator := a.accumulators[i] + for i := 0; i < valueArray.Len(); i++ { + accumulator.Update(valueArray.Value(i)) + } + + } + } + // build array with just the result of the column + resultColumns := make([]arrow.Array, len(a.accumulators)) + for i := range a.accumulators { + resultColumns[i] = operators.NewRecordBatchBuilder().GenFloatArray(a.accumulators[i].Finalize()) + } + return &operators.RecordBatch{ + Schema: a.schema, + Columns: resultColumns, + RowCount: uint64(len(a.aggExpressions)), + }, io.EOF + // this is a pipeline breaker so it will always consume all of the input which means this needs to return an io.EOF +} + +func (a *AggrExec) Schema() *arrow.Schema { + return a.schema +} +func (a *AggrExec) Close() error { + return a.child.Close() +} + +func validAggrType(dt arrow.DataType) bool { + switch dt.ID() { + case arrow.UINT8, arrow.UINT16, arrow.UINT32, arrow.UINT64, + arrow.INT8, arrow.INT16, arrow.INT32, arrow.INT64, arrow.FLOAT16, arrow.FLOAT32, arrow.FLOAT64: + return true + default: + return false + } +} + +func castArrayToFloat64(arr arrow.Array) (arrow.Array, error) { + outDatum, err := compute.CastArray(context.TODO(), arr, compute.NewCastOptions(&arrow.Float64Type{}, true)) + if err != nil { + return nil, err + } + + return outDatum, nil +} diff --git a/src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go b/src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go new file mode 100644 index 0000000..36fe974 --- /dev/null +++ b/src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go @@ -0,0 +1,529 @@ +package aggr + +import ( + "errors" + "fmt" + "io" + "math" + "opti-sql-go/Expr" + "opti-sql-go/operators/project" + "testing" + + "github.com/apache/arrow/go/v15/arrow/memory" + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" +) + +func generateAggTestColumns() ([]string, []any) { + names := []string{ + "id", + "name", + "age", + "salary", + } + + columns := []any{ + // id: 1 to 25 + []int32{ + 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, + }, + + // name: 25 people + []string{ + "Alice", "Bob", "Charlie", "David", "Eve", + "Frank", "Grace", "Hannah", "Ivy", "Jake", + "Karen", "Leo", "Mona", "Nate", "Olive", + "Paul", "Quinn", "Rita", "Sam", "Tina", + "Uma", "Victor", "Wendy", "Xavier", "Yara", + }, + + // age: 25 numeric values + []int32{ + 28, 34, 45, 22, 31, + 29, 40, 36, 50, 26, + 33, 41, 27, 38, 24, + 46, 30, 35, 43, 32, + 39, 48, 29, 37, 42, + }, + + // salary: 25 numeric values + []float64{ + 70000.0, 82000.5, 54000.0, 91000.0, 60000.0, + 75000.0, 66000.0, 88000.0, 45000.0, 99000.0, + 72000.0, 81000.0, 53000.0, 86000.0, 64000.0, + 93000.0, 68000.0, 76000.0, 89000.0, 71000.0, + 83000.0, 94000.0, 55000.0, 87000.0, 91500.0, + }, + } + + return names, columns +} +func aggProject() *project.InMemorySource { + names, cols := generateAggTestColumns() + p, _ := project.NewInMemoryProjectExec(names, cols) + return p +} + +func col(name string) Expr.Expression { + return Expr.NewColumnResolve(name) +} + +func TestNewAggrExec(t *testing.T) { + + // ----------------------------------------------------------------- + t.Run("valid_single_min", func(t *testing.T) { + child := aggProject() + + agg := []AggregateFunctions{ + {AggrFunc: Min, Child: col("age")}, + } + + exec, err := NewAggrExec(child, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if exec.Schema().NumFields() != 1 { + t.Fatalf("expected 1 schema field, got %d", exec.Schema().NumFields()) + } + + expectedName := "min_Column(age)" + if exec.Schema().Field(0).Name != expectedName { + t.Fatalf("expected name %s, got %s", + expectedName, exec.Schema().Field(0).Name) + } + }) + + // ----------------------------------------------------------------- + t.Run("multiple_aggregations_schema_names", func(t *testing.T) { + child := aggProject() + + agg := []AggregateFunctions{ + {AggrFunc: Min, Child: col("id")}, + {AggrFunc: Max, Child: col("salary")}, + {AggrFunc: Avg, Child: col("age")}, + } + + exec, err := NewAggrExec(child, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + schema := exec.Schema() + + expected := []string{ + "min_Column(id)", + "max_Column(salary)", + "avg_Column(age)", + } + + for i, f := range schema.Fields() { + if f.Name != expected[i] { + t.Fatalf("expected field %s, got %s", expected[i], f.Name) + } + } + }) + + // ----------------------------------------------------------------- + t.Run("invalid_type_detection_string_column", func(t *testing.T) { + child := aggProject() + + agg := []AggregateFunctions{ + {AggrFunc: Min, Child: col("name")}, // "name" is string → invalid + } + + _, err := NewAggrExec(child, agg) + if err == nil { + t.Fatalf("expected type error, got nil") + } + t.Logf("================\n invalid column err %v \n ============", err) + }) + + // ----------------------------------------------------------------- + t.Run("unsupported_aggregate_function", func(t *testing.T) { + child := aggProject() + + agg := []AggregateFunctions{ + {AggrFunc: 9999, Child: col("age")}, + } + + _, err := NewAggrExec(child, agg) + if err == nil { + t.Fatalf("expected unsupported aggr error") + } + }) + + // ----------------------------------------------------------------- + t.Run("schema_type_float64_for_all_numeric_aggs", func(t *testing.T) { + child := aggProject() + + agg := []AggregateFunctions{ + {AggrFunc: Min, Child: col("id")}, + {AggrFunc: Max, Child: col("salary")}, + {AggrFunc: Sum, Child: col("age")}, + {AggrFunc: Avg, Child: col("salary")}, + {AggrFunc: Count, Child: col("age")}, + } + + exec, err := NewAggrExec(child, agg) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + for _, f := range exec.Schema().Fields() { + if f.Type.ID() != arrow.FLOAT64 { + t.Fatalf("expected float64 output type, got %s", f.Type) + } + } + if err := exec.Close(); err != nil { + t.Fatalf("unexpected close error: %v", err) + } + }) + + // ----------------------------------------------------------------- + t.Run("check_all_valid_numeric_types_pass", func(t *testing.T) { + + // all numeric arrow types accepted by validAggrType() + validTypes := []arrow.DataType{ + arrow.PrimitiveTypes.Uint8, + arrow.PrimitiveTypes.Uint16, + arrow.PrimitiveTypes.Uint32, + arrow.PrimitiveTypes.Uint64, + arrow.PrimitiveTypes.Int8, + arrow.PrimitiveTypes.Int16, + arrow.PrimitiveTypes.Int32, + arrow.PrimitiveTypes.Int64, + arrow.PrimitiveTypes.Float32, + arrow.PrimitiveTypes.Float64, + } + + fieldNames := make([]string, len(validTypes)) + colData := make([]any, len(validTypes)) + + for i, dt := range validTypes { + name := fmt.Sprintf("col_%d", i) + fieldNames[i] = name + + switch dt.ID() { + case arrow.UINT8: + colData[i] = []uint8{1} + case arrow.UINT16: + colData[i] = []uint16{1} + case arrow.UINT32: + colData[i] = []uint32{1} + case arrow.UINT64: + colData[i] = []uint64{1} + case arrow.INT8: + colData[i] = []int8{1} + case arrow.INT16: + colData[i] = []int16{1} + case arrow.INT32: + colData[i] = []int32{1} + case arrow.INT64: + colData[i] = []int64{1} + case arrow.FLOAT16: + // float16 stored as float32 in Go + colData[i] = []float32{1} + case arrow.FLOAT32: + colData[i] = []float32{1} + case arrow.FLOAT64: + colData[i] = []float64{1} + } + } + + src, _ := project.NewInMemoryProjectExec(fieldNames, colData) + + for i := range fieldNames { + agg := []AggregateFunctions{ + {AggrFunc: Sum, Child: col(fieldNames[i])}, + } + + _, err := NewAggrExec(src, agg) + if err != nil { + t.Fatalf("unexpected error for type %s: %v", validTypes[i], err) + } + } + }) +} + +func TestCastArrayToFloat64(t *testing.T) { + + alloc := memory.NewGoAllocator + + // -------------------------------------------------------- + t.Run("cast_int32_to_float64", func(t *testing.T) { + b := array.NewInt32Builder(alloc()) + b.AppendValues([]int32{1, 2, 3, 4}, nil) + arr := b.NewArray() + + out, err := castArrayToFloat64(arr) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + farr, ok := out.(*array.Float64) + if !ok { + t.Fatalf("expected Float64 array, got %T", out) + } + + expected := []float64{1, 2, 3, 4} + for i := range expected { + if farr.Value(i) != expected[i] { + t.Fatalf("expected %v at %d, got %v", expected[i], i, farr.Value(i)) + } + } + }) + + // -------------------------------------------------------- + t.Run("cast_float32_to_float64", func(t *testing.T) { + b := array.NewFloat32Builder(alloc()) + b.AppendValues([]float32{10.5, 20.5, 30.5}, nil) + arr := b.NewArray() + + out, err := castArrayToFloat64(arr) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + farr, ok := out.(*array.Float64) + if !ok { + t.Fatalf("expected Float64 array, got %T", out) + } + + expected := []float64{10.5, 20.5, 30.5} + for i := range expected { + if farr.Value(i) != expected[i] { + t.Fatalf("expected %v at %d, got %v", expected[i], i, farr.Value(i)) + } + } + }) + + // -------------------------------------------------------- + t.Run("invalid_string_cast", func(t *testing.T) { + b := array.NewStringBuilder(alloc()) + b.AppendValues([]string{"a", "b", "c"}, nil) + arr := b.NewArray() + + _, err := castArrayToFloat64(arr) + if err == nil { + t.Fatalf("expected error when casting string array to float64") + } + }) + + // -------------------------------------------------------- + t.Run("empty_array_cast", func(t *testing.T) { + b := array.NewInt32Builder(alloc()) + // no values appended + arr := b.NewArray() + + out, err := castArrayToFloat64(arr) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + _, ok := out.(*array.Float64) + if !ok { + t.Fatalf("expected Float64 array for empty cast, got %T", out) + } + + if out.Len() != 0 { + t.Fatalf("expected empty array, got length %d", out.Len()) + } + }) + +} + +func TestAggregateExecNext(t *testing.T) { + t.Run("validating done case early", func(t *testing.T) { + proj := aggProject() + agg := []AggregateFunctions{ + {AggrFunc: Min, Child: col("id")}} + aggrExec, err := NewAggrExec(proj, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + aggrExec.done = true + _, err = aggrExec.Next(10) + if err == nil || !errors.Is(err, io.EOF) { + t.Fatalf("expected io.EOF error, got nil") + } + }) + t.Run("Aggr minimum value on age", func(t *testing.T) { + proj := aggProject() + agg := []AggregateFunctions{ + {AggrFunc: Min, Child: col("age")}} + aggrExec, err := NewAggrExec(proj, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + resultBatch, err := aggrExec.Next(100) + if err == nil || !errors.Is(err, io.EOF) { + t.Fatalf("expected io.EOF error, got nil") + } + t.Logf("record batch: %v\n", resultBatch) + if resultBatch.Columns[0].(*array.Float64).Value(0) != 22 { + t.Fatalf("expected minimum age 22, got %v", resultBatch.Columns[0].(*array.Float64).Value(0)) + } + + }) + t.Run("Aggr maximum salary", func(t *testing.T) { + proj := aggProject() + agg := []AggregateFunctions{ + {AggrFunc: Max, Child: col("salary")}, + } + + aggrExec, err := NewAggrExec(proj, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + resultBatch, err := aggrExec.Next(100) + if err == nil || !errors.Is(err, io.EOF) { + t.Fatalf("expected io.EOF, got %v", err) + } + + maxSalary := resultBatch.Columns[0].(*array.Float64).Value(0) + if maxSalary != 99000.0 && maxSalary != 94000.0 && maxSalary != 93000.0 { + // Real max is 99000 (Jake has 99000) + t.Fatalf("expected max salary 99000, got %v", maxSalary) + } + }) + t.Run("Aggr sum of id column", func(t *testing.T) { + proj := aggProject() + agg := []AggregateFunctions{ + {AggrFunc: Sum, Child: col("id")}, + } + + aggrExec, err := NewAggrExec(proj, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + resultBatch, err := aggrExec.Next(200) + if err == nil || !errors.Is(err, io.EOF) { + t.Fatalf("expected io.EOF, got %v", err) + } + + sumIDs := resultBatch.Columns[0].(*array.Float64).Value(0) + expected := float64((25 * 26) / 2) // sum(1..25) = 325 + if sumIDs != expected { + t.Fatalf("expected sum 325, got %v", sumIDs) + } + }) + t.Run("Aggr count of age column", func(t *testing.T) { + proj := aggProject() + agg := []AggregateFunctions{ + {AggrFunc: Count, Child: col("age")}, + } + + aggrExec, err := NewAggrExec(proj, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + resultBatch, err := aggrExec.Next(300) + if err == nil || !errors.Is(err, io.EOF) { + t.Fatalf("expected io.EOF, got %v", err) + } + + count := resultBatch.Columns[0].(*array.Float64).Value(0) + if count != 25 { + t.Fatalf("expected count 25, got %v", count) + } + }) + t.Run("Aggr average of salary (⚠ your AVG is wrong)", func(t *testing.T) { + proj := aggProject() + + agg := []AggregateFunctions{ + {AggrFunc: Avg, Child: col("salary")}, + } + + aggrExec, err := NewAggrExec(proj, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + resultBatch, err := aggrExec.Next(500) + if err == nil || !errors.Is(err, io.EOF) { + t.Fatalf("expected io.EOF, got %v", err) + } + + avg := resultBatch.Columns[0].(*array.Float64).Value(0) + expected := 75740.02 + + if math.Abs(avg-expected) > 0.001 { + t.Fatalf("expected avg %v, got %v", expected, avg) + } + + }) + t.Run("Multiple aggregators in a single request", func(t *testing.T) { + proj := aggProject() + + agg := []AggregateFunctions{ + {AggrFunc: Min, Child: col("age")}, + {AggrFunc: Max, Child: col("salary")}, + {AggrFunc: Count, Child: col("id")}, + } + + aggrExec, err := NewAggrExec(proj, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + resultBatch, err := aggrExec.Next(1000) + if err == nil || !errors.Is(err, io.EOF) { + t.Fatalf("expected io.EOF, got %v", err) + } + + minAge := resultBatch.Columns[0].(*array.Float64).Value(0) + maxSalary := resultBatch.Columns[1].(*array.Float64).Value(0) + countIDs := resultBatch.Columns[2].(*array.Float64).Value(0) + + if minAge != 22 { + t.Fatalf("expected min age 22, got %v", minAge) + } + if maxSalary != 99000.0 { + t.Fatalf("expected max salary 99000, got %v", maxSalary) + } + if countIDs != 25 { + t.Fatalf("expected count 25, got %v", countIDs) + } + }) + + // ========================================================== + t.Run("Schema correctness for multiple aggregates", func(t *testing.T) { + proj := aggProject() + + agg := []AggregateFunctions{ + {AggrFunc: Min, Child: col("id")}, + {AggrFunc: Sum, Child: col("age")}, + {AggrFunc: Count, Child: col("salary")}, + } + + aggrExec, err := NewAggrExec(proj, agg) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + s := aggrExec.Schema() + + expectedNames := []string{ + "min_Column(id)", + "sum_Column(age)", + "count_Column(salary)", + } + + for i, f := range s.Fields() { + if f.Name != expectedNames[i] { + t.Fatalf("expected field %s, got %s", expectedNames[i], f.Name) + } + if f.Type.ID() != arrow.FLOAT64 { + t.Fatalf("expected float64 fields only") + } + } + }) +} diff --git a/src/Backend/opti-sql-go/operators/aggr/sum.go b/src/Backend/opti-sql-go/operators/aggr/sum.go deleted file mode 100644 index abd1ad5..0000000 --- a/src/Backend/opti-sql-go/operators/aggr/sum.go +++ /dev/null @@ -1 +0,0 @@ -package aggr diff --git a/src/Backend/opti-sql-go/operators/aggr/sum_test.go b/src/Backend/opti-sql-go/operators/aggr/sum_test.go deleted file mode 100644 index 485b9bb..0000000 --- a/src/Backend/opti-sql-go/operators/aggr/sum_test.go +++ /dev/null @@ -1,7 +0,0 @@ -package aggr - -import "testing" - -func TestSum(t *testing.T) { - // Simple passing test -} From d4b5538da5aae2a70359af964aa1e2db1309522f Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Mon, 24 Nov 2025 23:44:31 -0500 Subject: [PATCH 02/21] Feat: ground work for group bys, Consuming child record batching and producing result are next steps --- .../opti-sql-go/operators/aggr/groupBy.go | 120 ++++++ .../operators/aggr/groupBy_test.go | 400 +++++++++++++++++- .../opti-sql-go/operators/aggr/singleAggr.go | 30 +- .../operators/aggr/singleAggr_test.go | 28 +- .../opti-sql-go/operators/aggr/sort.go | 2 + 5 files changed, 560 insertions(+), 20 deletions(-) diff --git a/src/Backend/opti-sql-go/operators/aggr/groupBy.go b/src/Backend/opti-sql-go/operators/aggr/groupBy.go index abd1ad5..4a8d24b 100644 --- a/src/Backend/opti-sql-go/operators/aggr/groupBy.go +++ b/src/Backend/opti-sql-go/operators/aggr/groupBy.go @@ -1 +1,121 @@ package aggr + +import ( + "fmt" + "io" + "opti-sql-go/Expr" + "opti-sql-go/operators" + "strings" + + "github.com/apache/arrow/go/v17/arrow" +) + +/* +rules for group by: +1.Every non-aggregated column in SELECT must be in GROUP BY +2.You can group by multiple columns - creates groups for each unique combination +3.Use HAVING to filter groups (WHERE filters before grouping, HAVING filters after) +*/ +var ( + _ = (operators.Operator)(&GroupByExec{}) +) + +// place all unique elements of the group by column into a hash table, each element gets their own Accumulator instance +type GroupByExec struct { + child operators.Operator + schema *arrow.Schema + groupExpr []AggregateFunctions + groupByExpr []Expr.Expression // column names + + groups map[string][]Accumulator // maps group by key to its accumulator + keys map[string][]any // key → original values for output + done bool +} + +func NewGroupByExec(child operators.Operator, groupExpr []AggregateFunctions, groupBy []Expr.Expression) (*GroupByExec, error) { + s, err := buildGroupBySchema(child.Schema(), groupBy, groupExpr) + if err != nil { + return nil, err + } + + return &GroupByExec{ + child: child, + schema: s, + groupExpr: groupExpr, + groupByExpr: groupBy, + keys: make(map[string][]any), + groups: make(map[string][]Accumulator), + }, nil +} +func (g *GroupByExec) Next(batchSize uint16) (*operators.RecordBatch, error) { + if g.done { + return nil, io.EOF + } + return nil, nil +} +func (g *GroupByExec) Schema() *arrow.Schema { + return g.schema +} +func (g *GroupByExec) Close() error { + return g.child.Close() +} + +// handles validation and building of schema for group by +func buildGroupBySchema(childSchema *arrow.Schema, groupByExpr []Expr.Expression, aggrExprs []AggregateFunctions) (*arrow.Schema, error) { + + fields := make([]arrow.Field, 0, len(groupByExpr)+len(aggrExprs)) + + // 1. Add group-by columns + for _, expr := range groupByExpr { + dt, err := Expr.ExprDataType(expr, childSchema) + if err != nil { + return nil, fmt.Errorf("group-by expr %s has invalid type: %w", expr.String(), err) + } + + fields = append(fields, arrow.Field{ + Name: fmt.Sprintf("group_%s", expr.String()), + Type: dt, + Nullable: false, + }) + } + + // 2. Add aggregate columns + for _, agg := range aggrExprs { + + // All aggregates produce float64 in your design + fieldName := fmt.Sprintf("%s_%s", + strings.ToLower(aggrToString(int(agg.AggrFunc))), + agg.Child.String(), + ) + + fields = append(fields, arrow.Field{ + Name: fieldName, + Type: arrow.PrimitiveTypes.Float64, + Nullable: false, + }) + } + + return arrow.NewSchema(fields, nil), nil +} + +/* +TODO: use this in Next loop to skip boil plate creation code +func (g *GroupByExec) createAccumulators() []Accumulator { + accumulators := make([]Accumulator, len(g.groupExpr)) + for i, expr := range g.groupExpr { + switch expr.AggrFunc { + case Min: + accumulators[i] = newMinAggr() + case Max: + accumulators[i] = newMaxAggr() + case Count: + accumulators[i] = NewCountAggr() + case Sum: + accumulators[i] = NewSumAggr() + case Avg: + accumulators[i] = newAvgAggr() + } + } + return accumulators +} +*/ diff --git a/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go b/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go index 3313b3e..57855d7 100644 --- a/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go +++ b/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go @@ -1,7 +1,401 @@ package aggr -import "testing" +import ( + "errors" + "fmt" + "io" + "opti-sql-go/Expr" + "opti-sql-go/operators/project" + "strings" + "testing" -func TestGroupBy(t *testing.T) { - // Simple passing test + "github.com/apache/arrow/go/v17/arrow" +) + +func generateGroupByTestColumns() ([]string, []any) { + names := []string{ + "id", + "name", + "department", + "region", + "seniority", + "salary", + "age", + } + + // 40 IDs + ids := make([]int32, 40) + for i := range ids { + ids[i] = int32(i + 1) + } + + // Names – 40 names + namesArr := []string{ + "Alice", "Bob", "Charlie", "David", "Eve", + "Frank", "Grace", "Hannah", "Ivy", "Jake", + "Karen", "Leo", "Mona", "Nate", "Olive", + "Paul", "Quinn", "Rita", "Sam", "Tina", + "Uma", "Victor", "Wendy", "Xavier", "Yara", + "Zane", "Becky", "Carlos", "Dora", "Elias", + "Fiona", "Gabe", "Helena", "Isaac", "Julia", + "Kevin", "Lara", "Miles", "Nora", "Owen", + } + + // Randomized but balanced departments (5 groups) + departments := []string{ + "Engineering", "HR", "Sales", "Engineering", "Finance", + "Support", "Sales", "Engineering", "Support", "Finance", + "HR", "Engineering", "Sales", "Support", "Finance", + "Engineering", "Sales", "HR", "Support", "Engineering", + "Finance", "Sales", "Engineering", "Support", "HR", + "Support", "Engineering", "Finance", "Sales", "HR", + "Engineering", "Support", "Finance", "Sales", "Engineering", + "HR", "Finance", "Support", "Engineering", "Sales", + } + + // Randomized but balanced regions (4 groups) + regions := []string{ + "North", "East", "South", "West", "South", + "North", "West", "East", "North", "South", + "West", "East", "North", "South", "West", + "North", "East", "West", "South", "North", + "East", "West", "South", "North", "East", + "South", "North", "West", "East", "South", + "West", "North", "East", "South", "West", + "North", "South", "East", "West", "North", + } + + // Randomized seniority (3 groups) + seniority := []string{ + "Junior", "Senior", "Mid", "Junior", "Mid", + "Senior", "Junior", "Mid", "Senior", "Junior", + "Mid", "Senior", "Junior", "Mid", "Senior", + "Junior", "Mid", "Senior", "Junior", "Mid", + "Senior", "Junior", "Mid", "Senior", "Junior", + "Mid", "Senior", "Junior", "Mid", "Senior", + "Junior", "Mid", "Senior", "Junior", "Mid", + "Senior", "Junior", "Mid", "Senior", "Junior", + } + + // Salaries (same as before) + salaries := []float64{ + 70000, 82000, 54000, 91000, 60000, + 75000, 66000, 88000, 45000, 99000, + 72000, 81000, 53000, 86000, 64000, + 93000, 68000, 76000, 89000, 71000, + 83000, 94000, 55000, 87000, 91500, + 72000, 69000, 58000, 84000, 79000, + 81000, 78000, 62000, 97000, 82000, + 95000, 76000, 88000, 91000, 64000, + } + + // Ages with some repetition + ages := []int32{ + 28, 34, 45, 22, 31, + 29, 40, 36, 50, 26, + 33, 41, 27, 38, 24, + 46, 30, 35, 43, 32, + 39, 48, 29, 37, 42, + 28, 34, 45, 22, 31, + 29, 40, 36, 50, 26, + 39, 48, 29, 37, 42, + } + + columns := []any{ + ids, + namesArr, + departments, + regions, + seniority, + salaries, + ages, + } + + return names, columns +} + +func groupByProject() *project.InMemorySource { + names, cols := generateGroupByTestColumns() + p, _ := project.NewInMemoryProjectExec(names, cols) + return p +} + +func TestGroupByInit(t *testing.T) { + p := groupByProject() + rc, _ := p.Next(12) + fmt.Printf("rc:%v \n", rc) +} + +func TestNewGroupByExecAndSchema(t *testing.T) { + // convenience builder + col := func(name string) Expr.Expression { + return Expr.NewColumnResolve(name) + } + + t.Run("single group-by single aggregate", func(t *testing.T) { + child := groupByProject() + + groupBy := []Expr.Expression{col("department")} + aggs := []AggregateFunctions{ + {AggrFunc: Sum, Child: col("salary")}, + } + + gb, err := NewGroupByExec(child, aggs, groupBy) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + schema := gb.Schema() + if schema == nil { + t.Fatalf("schema should not be nil") + } + fmt.Println(schema) + + // group-by + 1 agg = 2 fields + if got, want := schema.NumFields(), 2; got != want { + t.Fatalf("expected %d fields, got %d", want, got) + } + + // group field + f0 := schema.Field(0) + expName := "group_" + groupBy[0].String() + if f0.Name != expName { + t.Fatalf("expected group field name %q, got %q", expName, f0.Name) + } + + // aggregate field + f1 := schema.Field(1) + properAggName := fmt.Sprintf("%s_%s", + strings.ToLower(aggrToString(int(aggs[0].AggrFunc))), + aggs[0].Child.String(), + ) + if f1.Name != properAggName { + t.Fatalf("expected agg field %q, got %q", properAggName, f1.Name) + } + + if gb.groups == nil { + t.Fatalf("groups map not initialized") + } + if gb.keys == nil { + t.Fatalf("keys map not initialized") + } + }) + + t.Run("multiple group-by and multiple aggregates", func(t *testing.T) { + child := groupByProject() + + groupBy := []Expr.Expression{col("region"), col("seniority")} + aggs := []AggregateFunctions{ + {AggrFunc: Min, Child: col("age")}, + {AggrFunc: Max, Child: col("salary")}, + {AggrFunc: Count, Child: col("id")}, + } + + gb, err := NewGroupByExec(child, aggs, groupBy) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + schema := gb.Schema() + fmt.Printf("schema: %v\n", schema) + wantFields := len(groupBy) + len(aggs) + if schema.NumFields() != wantFields { + t.Fatalf("expected %d fields, got %d", wantFields, schema.NumFields()) + } + + // group fields first + for i, gexpr := range groupBy { + f := schema.Field(i) + exp := "group_" + gexpr.String() + if f.Name != exp { + t.Fatalf("group field[%d] mismatch: want %q got %q", i, exp, f.Name) + } + } + + // aggregate fields next + offset := len(groupBy) + for j, agg := range aggs { + f := schema.Field(offset + j) + expAggName := fmt.Sprintf("%s_%s", + strings.ToLower(aggrToString(int(agg.AggrFunc))), + agg.Child.String(), + ) + if f.Name != expAggName { + t.Fatalf("agg field name mismatch: want %q got %q", expAggName, f.Name) + } + } + }) + + t.Run("invalid group-by column triggers error", func(t *testing.T) { + child := groupByProject() + + invalidGB := []Expr.Expression{col("not_a_col")} + aggs := []AggregateFunctions{ + {AggrFunc: Sum, Child: col("salary")}, + } + + // direct schema builder test + _, err := buildGroupBySchema(child.Schema(), invalidGB, aggs) + if err == nil { + t.Fatalf("expected error for invalid group-by expr") + } + + // NewGroupByExec should also fail + if _, err := NewGroupByExec(child, aggs, invalidGB); err == nil { + t.Fatalf("expected NewGroupByExec error for invalid group-by") + } + }) + + t.Run("no aggregates - schema should only contain group-by columns", func(t *testing.T) { + child := groupByProject() + + groupBy := []Expr.Expression{col("region")} + var aggs []AggregateFunctions + + gb, err := NewGroupByExec(child, aggs, groupBy) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + schema := gb.Schema() + + if schema.NumFields() != 1 { + t.Fatalf("expected 1 field, got %d", schema.NumFields()) + } + + f := schema.Field(0) + exp := "group_" + groupBy[0].String() + if f.Name != exp { + t.Fatalf("wrong group field name: want %q got %q", exp, f.Name) + } + }) + + t.Run("multiple aggregates produce float64 regardless of source type", func(t *testing.T) { + child := groupByProject() + + groupBy := []Expr.Expression{col("department")} + aggs := []AggregateFunctions{ + {AggrFunc: Avg, Child: col("age")}, // int32 → float64 + {AggrFunc: Sum, Child: col("salary")}, // float64 → float64 + } + + gb, err := NewGroupByExec(child, aggs, groupBy) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + schema := gb.Schema() + + // group-by + 2 aggregates = 3 + if schema.NumFields() != 3 { + t.Fatalf("expected 3 fields, got %d", schema.NumFields()) + } + + for idx := 1; idx < 3; idx++ { + f := schema.Field(idx) + if f.Type.ID() != arrow.FLOAT64 { + t.Fatalf("expected field[%d] to be float64, got %v", idx, f.Type) + } + } + }) + + t.Run("schema names must match exact string() output of expressions", func(t *testing.T) { + child := groupByProject() + + gbExpr := []Expr.Expression{ + Expr.NewColumnResolve("seniority"), + Expr.NewColumnResolve("region"), + } + aggs := []AggregateFunctions{ + {AggrFunc: Count, Child: Expr.NewColumnResolve("id")}, + } + + gb, err := NewGroupByExec(child, aggs, gbExpr) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + schema := gb.Schema() + + expected0 := "group_" + gbExpr[0].String() // group_Column(seniority) + expected1 := "group_" + gbExpr[1].String() // group_Column(region) + + if schema.Field(0).Name != expected0 { + t.Fatalf("wrong field[0] name: want %q got %q", expected0, schema.Field(0).Name) + } + if schema.Field(1).Name != expected1 { + t.Fatalf("wrong field[1] name: want %q got %q", expected1, schema.Field(1).Name) + } + + // count column + expectedAgg := "count_" + aggs[0].Child.String() + if schema.Field(2).Name != expectedAgg { + t.Fatalf("wrong agg field name: want %q got %q", expectedAgg, schema.Field(2).Name) + } + }) + t.Run("basic close check", func(t *testing.T) { + child := groupByProject() + + gbExpr := []Expr.Expression{ + Expr.NewColumnResolve("seniority"), + Expr.NewColumnResolve("region"), + } + aggs := []AggregateFunctions{ + {AggrFunc: Count, Child: Expr.NewColumnResolve("id")}, + } + + gb, err := NewGroupByExec(child, aggs, gbExpr) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if gb.Close() != nil { + t.Fatalf("unexpected error on close") + } + + }) +} +func TestBasicOperatorCasesGroupBy(t *testing.T) { + + t.Run("basic close check", func(t *testing.T) { + child := groupByProject() + + gbExpr := []Expr.Expression{ + Expr.NewColumnResolve("seniority"), + Expr.NewColumnResolve("region"), + } + aggs := []AggregateFunctions{ + {AggrFunc: Count, Child: Expr.NewColumnResolve("id")}, + } + + gb, err := NewGroupByExec(child, aggs, gbExpr) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if gb.Close() != nil { + t.Fatalf("unexpected error on close") + } + + }) + t.Run("done case", func(t *testing.T) { + child := groupByProject() + + gbExpr := []Expr.Expression{ + Expr.NewColumnResolve("seniority"), + Expr.NewColumnResolve("region"), + } + aggs := []AggregateFunctions{ + {AggrFunc: Count, Child: Expr.NewColumnResolve("id")}, + } + + gb, err := NewGroupByExec(child, aggs, gbExpr) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + gb.done = true + _, err = gb.Next(100) + if err == nil || !errors.Is(err, io.EOF) { + t.Fatalf("expected EOF but recieved %v", err) + } + + }) } diff --git a/src/Backend/opti-sql-go/operators/aggr/singleAggr.go b/src/Backend/opti-sql-go/operators/aggr/singleAggr.go index f59da08..6ac7cad 100644 --- a/src/Backend/opti-sql-go/operators/aggr/singleAggr.go +++ b/src/Backend/opti-sql-go/operators/aggr/singleAggr.go @@ -49,6 +49,13 @@ var ( // Avg // for now just focus on single-column aggregation without group by +func NewAggregateFunctions(aggrFunc AggrFunc, child Expr.Expression) AggregateFunctions { + return AggregateFunctions{ + AggrFunc: aggrFunc, + Child: child, + } +} + type AggregateFunctions struct { AggrFunc AggrFunc // switch to deal with seperate aggregation functions Child Expr.Expression // resolves to a column generally @@ -147,7 +154,7 @@ type AggrExec struct { done bool // know when to return io.EOF } -func NewAggrExec(child operators.Operator, aggExprs []AggregateFunctions) (*AggrExec, error) { +func NewGlobalAggrExec(child operators.Operator, aggExprs []AggregateFunctions) (*AggrExec, error) { accs := make([]Accumulator, len(aggExprs)) fields := make([]arrow.Field, len(aggExprs)) for i, agg := range aggExprs { @@ -220,8 +227,8 @@ func (a *AggrExec) Next(n uint16) (*operators.RecordBatch, error) { } valueArray := agrArray.(*array.Float64) accumulator := a.accumulators[i] - for i := 0; i < valueArray.Len(); i++ { - accumulator.Update(valueArray.Value(i)) + for j := 0; j < valueArray.Len(); j++ { + accumulator.Update(valueArray.Value(j)) } } @@ -231,6 +238,7 @@ func (a *AggrExec) Next(n uint16) (*operators.RecordBatch, error) { for i := range a.accumulators { resultColumns[i] = operators.NewRecordBatchBuilder().GenFloatArray(a.accumulators[i].Finalize()) } + a.done = true return &operators.RecordBatch{ Schema: a.schema, Columns: resultColumns, @@ -264,3 +272,19 @@ func castArrayToFloat64(arr arrow.Array) (arrow.Array, error) { return outDatum, nil } +func aggrToString(t int) string { + switch AggrFunc(t) { + case Min: + return "MIN" + case Max: + return "MAX" + case Count: + return "COUNT" + case Sum: + return "SUM" + case Avg: + return "AVG" + default: + return "UNKNOWN_AGGREGATE_FUNCTION" + } +} diff --git a/src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go b/src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go index 36fe974..ea89bac 100644 --- a/src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go +++ b/src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go @@ -82,7 +82,7 @@ func TestNewAggrExec(t *testing.T) { {AggrFunc: Min, Child: col("age")}, } - exec, err := NewAggrExec(child, agg) + exec, err := NewGlobalAggrExec(child, agg) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -108,7 +108,7 @@ func TestNewAggrExec(t *testing.T) { {AggrFunc: Avg, Child: col("age")}, } - exec, err := NewAggrExec(child, agg) + exec, err := NewGlobalAggrExec(child, agg) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -136,7 +136,7 @@ func TestNewAggrExec(t *testing.T) { {AggrFunc: Min, Child: col("name")}, // "name" is string → invalid } - _, err := NewAggrExec(child, agg) + _, err := NewGlobalAggrExec(child, agg) if err == nil { t.Fatalf("expected type error, got nil") } @@ -151,7 +151,7 @@ func TestNewAggrExec(t *testing.T) { {AggrFunc: 9999, Child: col("age")}, } - _, err := NewAggrExec(child, agg) + _, err := NewGlobalAggrExec(child, agg) if err == nil { t.Fatalf("expected unsupported aggr error") } @@ -169,7 +169,7 @@ func TestNewAggrExec(t *testing.T) { {AggrFunc: Count, Child: col("age")}, } - exec, err := NewAggrExec(child, agg) + exec, err := NewGlobalAggrExec(child, agg) if err != nil { t.Fatalf("unexpected: %v", err) } @@ -242,7 +242,7 @@ func TestNewAggrExec(t *testing.T) { {AggrFunc: Sum, Child: col(fieldNames[i])}, } - _, err := NewAggrExec(src, agg) + _, err := NewGlobalAggrExec(src, agg) if err != nil { t.Fatalf("unexpected error for type %s: %v", validTypes[i], err) } @@ -342,7 +342,7 @@ func TestAggregateExecNext(t *testing.T) { proj := aggProject() agg := []AggregateFunctions{ {AggrFunc: Min, Child: col("id")}} - aggrExec, err := NewAggrExec(proj, agg) + aggrExec, err := NewGlobalAggrExec(proj, agg) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -356,7 +356,7 @@ func TestAggregateExecNext(t *testing.T) { proj := aggProject() agg := []AggregateFunctions{ {AggrFunc: Min, Child: col("age")}} - aggrExec, err := NewAggrExec(proj, agg) + aggrExec, err := NewGlobalAggrExec(proj, agg) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -376,7 +376,7 @@ func TestAggregateExecNext(t *testing.T) { {AggrFunc: Max, Child: col("salary")}, } - aggrExec, err := NewAggrExec(proj, agg) + aggrExec, err := NewGlobalAggrExec(proj, agg) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -398,7 +398,7 @@ func TestAggregateExecNext(t *testing.T) { {AggrFunc: Sum, Child: col("id")}, } - aggrExec, err := NewAggrExec(proj, agg) + aggrExec, err := NewGlobalAggrExec(proj, agg) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -420,7 +420,7 @@ func TestAggregateExecNext(t *testing.T) { {AggrFunc: Count, Child: col("age")}, } - aggrExec, err := NewAggrExec(proj, agg) + aggrExec, err := NewGlobalAggrExec(proj, agg) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -442,7 +442,7 @@ func TestAggregateExecNext(t *testing.T) { {AggrFunc: Avg, Child: col("salary")}, } - aggrExec, err := NewAggrExec(proj, agg) + aggrExec, err := NewGlobalAggrExec(proj, agg) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -469,7 +469,7 @@ func TestAggregateExecNext(t *testing.T) { {AggrFunc: Count, Child: col("id")}, } - aggrExec, err := NewAggrExec(proj, agg) + aggrExec, err := NewGlobalAggrExec(proj, agg) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -504,7 +504,7 @@ func TestAggregateExecNext(t *testing.T) { {AggrFunc: Count, Child: col("salary")}, } - aggrExec, err := NewAggrExec(proj, agg) + aggrExec, err := NewGlobalAggrExec(proj, agg) if err != nil { t.Fatalf("unexpected: %v", err) } diff --git a/src/Backend/opti-sql-go/operators/aggr/sort.go b/src/Backend/opti-sql-go/operators/aggr/sort.go index abd1ad5..d5a469b 100644 --- a/src/Backend/opti-sql-go/operators/aggr/sort.go +++ b/src/Backend/opti-sql-go/operators/aggr/sort.go @@ -1 +1,3 @@ package aggr + +// order by col asc, col 2 desc .... ect From be9c8131b91992cbee9cf4a68d7579027cbcc9e7 Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Tue, 25 Nov 2025 15:37:02 -0500 Subject: [PATCH 03/21] feat: Implement agrregations with dynamic group by clause --- .../opti-sql-go/operators/aggr/groupBy.go | 364 ++++++++++++++++-- .../operators/aggr/groupBy_test.go | 292 +++++++++++++- .../opti-sql-go/operators/aggr/singleAggr.go | 58 +-- .../operators/aggr/singleAggr_test.go | 130 +++++-- .../opti-sql-go/operators/project/custom.go | 29 ++ src/Backend/opti-sql-go/operators/record.go | 113 ++++++ 6 files changed, 908 insertions(+), 78 deletions(-) diff --git a/src/Backend/opti-sql-go/operators/aggr/groupBy.go b/src/Backend/opti-sql-go/operators/aggr/groupBy.go index 4a8d24b..686ae3a 100644 --- a/src/Backend/opti-sql-go/operators/aggr/groupBy.go +++ b/src/Backend/opti-sql-go/operators/aggr/groupBy.go @@ -1,13 +1,16 @@ package aggr import ( + "errors" "fmt" "io" "opti-sql-go/Expr" "opti-sql-go/operators" "strings" + "github.com/apache/arrow/go/v15/arrow/memory" "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" ) /* @@ -27,8 +30,8 @@ type GroupByExec struct { groupExpr []AggregateFunctions groupByExpr []Expr.Expression // column names - groups map[string][]Accumulator // maps group by key to its accumulator - keys map[string][]any // key → original values for output + groups map[string][]accumulator // maps group by key to its accumulator + keys map[string][]string // key → original values for output done bool } @@ -43,16 +46,95 @@ func NewGroupByExec(child operators.Operator, groupExpr []AggregateFunctions, gr schema: s, groupExpr: groupExpr, groupByExpr: groupBy, - keys: make(map[string][]any), - groups: make(map[string][]Accumulator), + keys: make(map[string][]string), + groups: make(map[string][]accumulator), }, nil } + +/* +grab child rows +*/ func (g *GroupByExec) Next(batchSize uint16) (*operators.RecordBatch, error) { if g.done { return nil, io.EOF } - return nil, nil + + for { + childBatch, err := g.child.Next(batchSize) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, err + } + + rowCount := int(childBatch.RowCount) + + // 1. evaluate all group-by expressions into arrays + groupArrays := make([]arrow.Array, len(g.groupByExpr)) + for i, expr := range g.groupByExpr { + arr, err := Expr.EvalExpression(expr, childBatch) + if err != nil { + return nil, err + } + groupArrays[i] = arr + } + + // 2. evaluate all aggregation child expressions + aggrArrays := make([]arrow.Array, len(g.groupExpr)) + for i, agg := range g.groupExpr { + arr, err := Expr.EvalExpression(agg.Child, childBatch) + if err != nil { + return nil, err + } + arr, err = castArrayToFloat64(arr) + if err != nil { + return nil, err + } + aggrArrays[i] = arr + } + + // 3. process rows + for row := 0; row < rowCount; row++ { + + // Build group key + keyParts := make([]string, len(groupArrays)) + for j, arr := range groupArrays { + if arr.IsNull(row) { + keyParts[j] = "NULL" + } else { + keyParts[j] = fmt.Sprintf("%v", getValue(arr, row)) + } + } + key := strings.Join(keyParts, "|") + fmt.Printf("key: %v\n", key) + // Allocate accumulator list if new group + if _, exists := g.groups[key]; !exists { + g.groups[key] = make([]accumulator, len(g.groupExpr)) + for i, agg := range g.groupExpr { + g.groups[key][i] = createAccumulator(agg.AggrFunc) + } + g.keys[key] = keyParts // store original values + } + + // UPDATE accumulators + for i, arr := range aggrArrays { + if arr.IsNull(row) { + continue + } + val := arr.(*array.Float64).Value(row) + g.groups[key][i].Update(val) + } + } + } + + // 4. Build output RecordBatch + batch := buildGroupByOutput(g) + + g.done = true + return batch, io.EOF } + func (g *GroupByExec) Schema() *arrow.Schema { return g.schema } @@ -81,8 +163,11 @@ func buildGroupBySchema(childSchema *arrow.Schema, groupByExpr []Expr.Expression // 2. Add aggregate columns for _, agg := range aggrExprs { - - // All aggregates produce float64 in your design + dt, err := Expr.ExprDataType(agg.Child, childSchema) + if err != nil || !validAggrType(dt) { + return nil, ErrInvalidAggrColumnType(dt) + } + // All aggregates produce float64 fieldName := fmt.Sprintf("%s_%s", strings.ToLower(aggrToString(int(agg.AggrFunc))), agg.Child.String(), @@ -98,24 +183,253 @@ func buildGroupBySchema(childSchema *arrow.Schema, groupByExpr []Expr.Expression return arrow.NewSchema(fields, nil), nil } -/* -TODO: use this in Next loop to skip boil plate creation code -func (g *GroupByExec) createAccumulators() []Accumulator { - accumulators := make([]Accumulator, len(g.groupExpr)) - for i, expr := range g.groupExpr { - switch expr.AggrFunc { - case Min: - accumulators[i] = newMinAggr() - case Max: - accumulators[i] = newMaxAggr() - case Count: - accumulators[i] = NewCountAggr() - case Sum: - accumulators[i] = NewSumAggr() - case Avg: - accumulators[i] = newAvgAggr() +func getValue(arr arrow.Array, row int) any { + switch col := arr.(type) { + case *array.Int32: + return col.Value(row) + case *array.Int64: + return col.Value(row) + case *array.Float32: + return col.Value(row) + case *array.Float64: + return col.Value(row) + case *array.String: + return col.Value(row) + case *array.Boolean: + return col.Value(row) + default: + // fallback – debug only + return fmt.Sprintf("%v", col) + } +} +func createAccumulator(fn AggrFunc) accumulator { + switch fn { + case Min: + return newMinAggr() + case Max: + return newMaxAggr() + case Sum: + return NewSumAggr() + case Count: + return NewCountAggr() + case Avg: + return newAvgAggr() + default: + panic(fmt.Sprintf("unsupported aggregate function: %v", fn)) + } +} + +func buildGroupByOutput(g *GroupByExec) *operators.RecordBatch { + alloc := memory.NewGoAllocator() + + rowCount := len(g.groups) + if rowCount == 0 { + // return empty batch (0 groups) + return &operators.RecordBatch{ + Schema: g.schema, + Columns: []arrow.Array{}, + RowCount: 0, + } + } + + // Prepare column builders + colBuilders := make([]arrow.Array, len(g.schema.Fields())) + + // Temporary storage for columns + groupCols := make([][]any, len(g.groupByExpr)) // group columns + aggrCols := make([][]float64, len(g.groupExpr)) // aggregate columns + + for i := range groupCols { + groupCols[i] = make([]any, 0, rowCount) + } + for i := range aggrCols { + aggrCols[i] = make([]float64, 0, rowCount) + } + + // Iterate groups in stable order + i := 0 + for key, accs := range g.groups { + // Add group-by (dimension) values + dims := g.keys[key] + for j, v := range dims { + groupCols[j] = append(groupCols[j], v) + } + + // Add aggregated values + for j, acc := range accs { + aggrCols[j] = append(aggrCols[j], acc.Finalize()) } + + i++ + } + + // Now build Arrow arrays in correct schema order + fieldIndex := 0 + + // Build group-by columns first + for j := range g.groupByExpr { + colBuilders[fieldIndex] = buildDynamicArray(alloc, g.schema.Field(fieldIndex).Type, groupCols[j]) + fieldIndex++ + } + + // Build aggregate columns + for j := range g.groupExpr { + colBuilders[fieldIndex] = buildFloatArray(alloc, aggrCols[j]) + fieldIndex++ + } + + return &operators.RecordBatch{ + Schema: g.schema, + Columns: colBuilders, + RowCount: uint64(rowCount), } - return accumulators } -*/ +func buildDynamicArray(mem memory.Allocator, dt arrow.DataType, values []any) arrow.Array { + switch dt.ID() { + + // =========================== + // STRING (UTF8) + // =========================== + case arrow.STRING: + sb := array.NewStringBuilder(mem) + for _, v := range values { + if v == nil { + sb.AppendNull() + } else { + sb.Append(fmt.Sprintf("%v", v)) + } + } + return sb.NewArray() + + // =========================== + // SIGNED INTEGERS + // =========================== + case arrow.INT8: + b := array.NewInt8Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(int8)) + } + } + return b.NewArray() + + case arrow.INT16: + b := array.NewInt16Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(int16)) + } + } + return b.NewArray() + + case arrow.INT32: + b := array.NewInt32Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(int32)) + } + } + return b.NewArray() + + case arrow.INT64: + b := array.NewInt64Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(int64)) + } + } + return b.NewArray() + + // =========================== + // UNSIGNED INTEGERS + // =========================== + case arrow.UINT8: + b := array.NewUint8Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(uint8)) + } + } + return b.NewArray() + + case arrow.UINT16: + b := array.NewUint16Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(uint16)) + } + } + return b.NewArray() + + case arrow.UINT32: + b := array.NewUint32Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(uint32)) + } + } + return b.NewArray() + + case arrow.UINT64: + b := array.NewUint64Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(uint64)) + } + } + return b.NewArray() + + // =========================== + // FLOATS + // =========================== + case arrow.FLOAT32: + b := array.NewFloat32Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(float32)) + } + } + return b.NewArray() + + case arrow.FLOAT64: + b := array.NewFloat64Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(float64)) + } + } + return b.NewArray() + + // =========================== + // UNSUPPORTED TYPE + // =========================== + default: + panic(fmt.Sprintf("unsupported dynamic array type: %v", dt)) + } +} + +func buildFloatArray(mem memory.Allocator, values []float64) arrow.Array { + b := array.NewFloat64Builder(mem) + b.AppendValues(values, nil) + return b.NewArray() +} diff --git a/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go b/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go index 57855d7..23803dc 100644 --- a/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go +++ b/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go @@ -9,7 +9,9 @@ import ( "strings" "testing" + "github.com/apache/arrow/go/v15/arrow/memory" "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" ) func generateGroupByTestColumns() ([]string, []any) { @@ -394,8 +396,296 @@ func TestBasicOperatorCasesGroupBy(t *testing.T) { gb.done = true _, err = gb.Next(100) if err == nil || !errors.Is(err, io.EOF) { - t.Fatalf("expected EOF but recieved %v", err) + t.Fatalf("expected EOF but received %v", err) } }) } +func TestGroupByNext_SingleColumnCount(t *testing.T) { + col := func(n string) Expr.Expression { return Expr.NewColumnResolve(n) } + + child := groupByProject() + + gbExpr := []Expr.Expression{col("region")} + aggs := []AggregateFunctions{ + {AggrFunc: Count, Child: col("id")}, + } + + gb, err := NewGroupByExec(child, aggs, gbExpr) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + batch, err := gb.Next(1000) + if err == nil || !errors.Is(err, io.EOF) { + t.Fatalf("expected EOF, got %v", err) + } + + if batch == nil || batch.RowCount == 0 { + t.Fatalf("expected non-empty grouped result") + } + + // Validate schema + if batch.Schema.NumFields() != 2 { + t.Fatalf("expected 2 fields, got %d", batch.Schema.NumFields()) + } + + // Validate that group keys exist and aggregates exist + if batch.Columns[0].Len() == 0 { + t.Fatalf("expected region groups") + } + + if batch.Columns[1].Len() == 0 { + t.Fatalf("expected aggregated counts") + } +} + +func TestGroupByNext_MultipleGroupBy_MultipleAggs(t *testing.T) { + col := func(n string) Expr.Expression { return Expr.NewColumnResolve(n) } + + child := groupByProject() + + gbExpr := []Expr.Expression{ + col("seniority"), + col("region"), + } + + aggs := []AggregateFunctions{ + {AggrFunc: Min, Child: col("age")}, + {AggrFunc: Max, Child: col("salary")}, + {AggrFunc: Count, Child: col("id")}, + } + + gb, err := NewGroupByExec(child, aggs, gbExpr) + if err != nil { + t.Fatal(err) + } + + batch, err := gb.Next(50) + if err == nil || !errors.Is(err, io.EOF) { + t.Fatalf("expected EOF, got %v", err) + } + + if batch.RowCount == 0 { + t.Fatalf("expected non-zero grouped rows") + } + + if batch.Schema.NumFields() != 5 { + t.Fatalf("expected 5 fields (2 group-by + 3 aggr), got %d", batch.Schema.NumFields()) + } +} + +func TestGroupByNext_MultipleNextCalls(t *testing.T) { + col := func(n string) Expr.Expression { return Expr.NewColumnResolve(n) } + + child := groupByProject() + + gbExpr := []Expr.Expression{col("region")} + aggs := []AggregateFunctions{ + {AggrFunc: Sum, Child: col("salary")}, + } + + gb, err := NewGroupByExec(child, aggs, gbExpr) + if err != nil { + t.Fatal(err) + } + + // First call returns batch + EOF + _, err = gb.Next(100) + if !errors.Is(err, io.EOF) { + t.Fatalf("expected EOF on first return, got %v", err) + } + + // Second call MUST return EOF immediately + _, err = gb.Next(100) + if !errors.Is(err, io.EOF) { + t.Fatalf("expected EOF on second call, got %v", err) + } +} + +func TestBuildGroupBySchema_AllBranches(t *testing.T) { + col := func(n string) Expr.Expression { return Expr.NewColumnResolve(n) } + + child := groupByProject() + + groupBy := []Expr.Expression{col("region"), col("seniority")} + aggs := []AggregateFunctions{ + {AggrFunc: Sum, Child: col("salary")}, + {AggrFunc: Count, Child: col("id")}, + } + + schema, err := buildGroupBySchema(child.Schema(), groupBy, aggs) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if schema.NumFields() != 4 { + t.Fatalf("expected 4 fields got %d", schema.NumFields()) + } + + // test group-by fields + if schema.Field(0).Type.ID() != arrow.STRING { + t.Fatalf("expected STRING for region") + } + + // aggregated fields always float64 + if schema.Field(2).Type.ID() != arrow.FLOAT64 { + t.Fatalf("expected FLOAT64 for aggregate field") + } +} + +func TestBuildGroupBySchema_InvalidColumn(t *testing.T) { + col := func(n string) Expr.Expression { return Expr.NewColumnResolve(n) } + child := groupByProject() + + _, err := buildGroupBySchema(child.Schema(), []Expr.Expression{col("doesnotexist")}, nil) + if err == nil { + t.Fatalf("expected error but got none") + } +} + +func TestBuildGroupBySchema_InvalidAggType(t *testing.T) { + col := func(n string) Expr.Expression { return Expr.NewColumnResolve(n) } + child := groupByProject() + + aggs := []AggregateFunctions{ + // Boolean type or unsupported type + {AggrFunc: Sum, Child: col("name")}, + } + + _, err := buildGroupBySchema(child.Schema(), nil, aggs) + if err == nil { + t.Fatalf("expected invalid agg type error") + } +} +func TestGetValue_AllTypes(t *testing.T) { + mem := memory.NewGoAllocator() + + // int32 + i32 := array.NewInt32Builder(mem) + i32.Append(42) + arr32 := i32.NewArray() + if getValue(arr32, 0).(int32) != 42 { + t.Fatal("failed int32 case") + } + + // int64 + i64 := array.NewInt64Builder(mem) + i64.Append(99) + arr64 := i64.NewArray() + if getValue(arr64, 0).(int64) != 99 { + t.Fatal("failed int64 case") + } + + // float32 + f32 := array.NewFloat32Builder(mem) + f32.Append(3.5) + arrf32 := f32.NewArray() + if getValue(arrf32, 0).(float32) != 3.5 { + t.Fatal("failed float32 case") + } + + // float64 + f64 := array.NewFloat64Builder(mem) + f64.Append(9.1) + arrf64 := f64.NewArray() + if getValue(arrf64, 0).(float64) != 9.1 { + t.Fatal("failed float64 case") + } + + // string + sb := array.NewStringBuilder(mem) + sb.Append("hello") + sarr := sb.NewArray() + if getValue(sarr, 0).(string) != "hello" { + t.Fatal("failed string case") + } + + // boolean + bb := array.NewBooleanBuilder(mem) + bb.Append(true) + barr := bb.NewArray() + if getValue(barr, 0).(bool) != true { + t.Fatal("failed boolean case") + } +} + +func TestBuildDynamicArray_AllPrimitiveTypes(t *testing.T) { + mem := memory.NewGoAllocator() + + tests := []struct { + dt arrow.DataType + val []any + }{ + {arrow.PrimitiveTypes.Int8, []any{int8(1), nil, int8(3)}}, + {arrow.PrimitiveTypes.Int16, []any{int16(2), int16(5)}}, + {arrow.PrimitiveTypes.Int32, []any{int32(10), nil, int32(12)}}, + {arrow.PrimitiveTypes.Int64, []any{int64(20), int64(40)}}, + + {arrow.PrimitiveTypes.Uint8, []any{uint8(7), nil}}, + {arrow.PrimitiveTypes.Uint16, []any{uint16(100)}}, + {arrow.PrimitiveTypes.Uint32, []any{uint32(2000)}}, + {arrow.PrimitiveTypes.Uint64, []any{uint64(99999)}}, + + {arrow.PrimitiveTypes.Float32, []any{float32(2.2), nil}}, + {arrow.PrimitiveTypes.Float64, []any{float64(9.9)}}, + + {arrow.BinaryTypes.String, []any{"a", "b", nil}}, + } + + for _, tc := range tests { + arr := buildDynamicArray(mem, tc.dt, tc.val) + if arr.Len() != len(tc.val) { + t.Fatalf("wrong length for type %v", tc.dt) + } + } +} + +func TestCreateAccumulator_AllCases(t *testing.T) { + funcs := []AggrFunc{Min, Max, Sum, Count, Avg} + + for _, fn := range funcs { + acc := createAccumulator(fn) + if acc == nil { + t.Fatalf("expected accumulator for fn=%v", fn) + } + } +} + +func TestCreateAccumulator_PanicOnInvalid(t *testing.T) { + defer func() { + if recover() == nil { + t.Fatalf("expected panic for invalid function") + } + }() + + createAccumulator(AggrFunc(9999)) // invalid +} + +func TestBuildGroupByOutput_Basic(t *testing.T) { + col := func(n string) Expr.Expression { return Expr.NewColumnResolve(n) } + child := groupByProject() + + gbExpr := []Expr.Expression{col("region")} + aggs := []AggregateFunctions{ + {AggrFunc: Count, Child: col("id")}, + } + + gb, err := NewGroupByExec(child, aggs, gbExpr) + if err != nil { + t.Fatal(err) + } + + // invoke Next (fills accumulators) + _, _ = gb.Next(100) + + batch := buildGroupByOutput(gb) + + if batch.RowCount == 0 { + t.Fatalf("expected grouped rows") + } + + if len(batch.Columns) != 2 { + t.Fatalf("expected 2 columns, got %d", len(batch.Columns)) + } +} diff --git a/src/Backend/opti-sql-go/operators/aggr/singleAggr.go b/src/Backend/opti-sql-go/operators/aggr/singleAggr.go index 6ac7cad..df9d3fa 100644 --- a/src/Backend/opti-sql-go/operators/aggr/singleAggr.go +++ b/src/Backend/opti-sql-go/operators/aggr/singleAggr.go @@ -13,16 +13,16 @@ import ( "github.com/apache/arrow/go/v17/arrow/compute" ) -// TODO: next steps are to deal with group by statments but that can be dealt with after basic aggr that return just 1 global value var ( ErrUnsupportedAggrFunc = func(aggr int) error { return fmt.Errorf("%d is an unsupported aggregate function", aggr) } ErrInvalidAggrColumnType = func(value any) error { - return fmt.Errorf("%v of type %T cannot be cast to float64 so it is not a valid column type to aggragate on", value, value) + return fmt.Errorf("%v of type %T cannot be cast to float64 so it is not a valid column type to aggregate on", value, value) } ) +// AggrFunc represents the type of aggregation function to be performed. type AggrFunc int const ( @@ -34,21 +34,14 @@ const ( ) var ( - _ = (Accumulator)(&MinAggrAccumulator{}) - _ = (Accumulator)(&MaxAggrAccumulator{}) - _ = (Accumulator)(&CountAggrAccumulator{}) - _ = (Accumulator)(&SumAggrAccumulator{}) - _ = (Accumulator)(&AvgAggrAccumulator{}) + _ = (accumulator)(&MinAggrAccumulator{}) + _ = (accumulator)(&MaxAggrAccumulator{}) + _ = (accumulator)(&CountAggrAccumulator{}) + _ = (accumulator)(&SumAggrAccumulator{}) + _ = (accumulator)(&AvgAggrAccumulator{}) _ = (operators.Operator)(&AggrExec{}) ) -// Min -//Max -//Count -// Sum -// Avg - -// for now just focus on single-column aggregation without group by func NewAggregateFunctions(aggrFunc AggrFunc, child Expr.Expression) AggregateFunctions { return AggregateFunctions{ AggrFunc: aggrFunc, @@ -57,15 +50,15 @@ func NewAggregateFunctions(aggrFunc AggrFunc, child Expr.Expression) AggregateFu } type AggregateFunctions struct { - AggrFunc AggrFunc // switch to deal with seperate aggregation functions + AggrFunc AggrFunc // switch to deal with separate aggregate functions Child Expr.Expression // resolves to a column generally } -type Accumulator interface { +type accumulator interface { Update(value float64) Finalize() float64 } -func newMinAggr() Accumulator { +func newMinAggr() accumulator { return &MinAggrAccumulator{} } @@ -84,7 +77,7 @@ func (m *MinAggrAccumulator) Update(value float64) { } func (m *MinAggrAccumulator) Finalize() float64 { return m.minV } -func newMaxAggr() Accumulator { +func newMaxAggr() accumulator { return &MaxAggrAccumulator{} } @@ -103,7 +96,7 @@ func (m *MaxAggrAccumulator) Update(value float64) { } func (m *MaxAggrAccumulator) Finalize() float64 { return m.maxV } -func NewCountAggr() Accumulator { +func NewCountAggr() accumulator { return &CountAggrAccumulator{} } @@ -116,7 +109,7 @@ func (c *CountAggrAccumulator) Update(_ float64) { } func (c *CountAggrAccumulator) Finalize() float64 { return c.count } -func NewSumAggr() Accumulator { +func NewSumAggr() accumulator { return &SumAggrAccumulator{} } @@ -128,34 +121,43 @@ func (s *SumAggrAccumulator) Update(value float64) { s.summation += value } func (s *SumAggrAccumulator) Finalize() float64 { return s.summation } -func newAvgAggr() Accumulator { +func newAvgAggr() accumulator { return &AvgAggrAccumulator{} } type AvgAggrAccumulator struct { + used bool values float64 count float64 } func (a *AvgAggrAccumulator) Update(value float64) { + a.used = true a.values += value a.count++ } -func (a *AvgAggrAccumulator) Finalize() float64 { return float64(a.values / a.count) } +func (a *AvgAggrAccumulator) Finalize() float64 { + // handles divide by zero + if !a.used { + return 0.0 + } + return a.values / a.count +} // =================== // Aggregator Operator // =================== +// handles global aggregations without group by type AggrExec struct { child operators.Operator // child operator schema *arrow.Schema // output schema aggExpressions []AggregateFunctions // list of wanted aggregate expressions - accumulators []Accumulator // list of accumulators corresponding to aggExpressions, these will actually work to compute the aggregation + accumulators []accumulator // list of accumulators corresponding to aggExpressions, these will actually work to compute the aggregation done bool // know when to return io.EOF } func NewGlobalAggrExec(child operators.Operator, aggExprs []AggregateFunctions) (*AggrExec, error) { - accs := make([]Accumulator, len(aggExprs)) + accs := make([]accumulator, len(aggExprs)) fields := make([]arrow.Field, len(aggExprs)) for i, agg := range aggExprs { dt, err := Expr.ExprDataType(agg.Child, child.Schema()) @@ -210,6 +212,7 @@ func (a *AggrExec) Next(n uint16) (*operators.RecordBatch, error) { } for { childBatch, err := a.child.Next(n) + fmt.Printf("child batch: %v\n", childBatch) if err != nil { if errors.Is(err, io.EOF) { break @@ -228,6 +231,9 @@ func (a *AggrExec) Next(n uint16) (*operators.RecordBatch, error) { valueArray := agrArray.(*array.Float64) accumulator := a.accumulators[i] for j := 0; j < valueArray.Len(); j++ { + if valueArray.IsNull(j) { + continue + } accumulator.Update(valueArray.Value(j)) } @@ -242,8 +248,8 @@ func (a *AggrExec) Next(n uint16) (*operators.RecordBatch, error) { return &operators.RecordBatch{ Schema: a.schema, Columns: resultColumns, - RowCount: uint64(len(a.aggExpressions)), - }, io.EOF + RowCount: 1, + }, nil // this is a pipeline breaker so it will always consume all of the input which means this needs to return an io.EOF } diff --git a/src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go b/src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go index ea89bac..192630d 100644 --- a/src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go +++ b/src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go @@ -62,12 +62,81 @@ func generateAggTestColumns() ([]string, []any) { return names, columns } +func generateAggTestColumnsWithNulls(mem memory.Allocator) ([]string, []arrow.Array) { + names := []string{"id", "name", "age", "salary"} + + // ------------------------- + // id column (int32) + // ------------------------- + idB := array.NewInt32Builder(mem) + idVals := []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + idValid := []bool{ + true, true, false, true, true, + false, true, true, true, false, + } + idB.AppendValues(idVals, idValid) + idArr := idB.NewArray() + + // ------------------------- + // name column (string) + // ------------------------- + nameB := array.NewStringBuilder(mem) + nameVals := []string{ + "Alice", "Bob", "Charlie", "David", "Eve", + "Frank", "Grace", "Hannah", "Ivy", "Jake", + } + nameValid := []bool{ + true, true, true, false, true, + true, true, true, false, true, + } + nameB.AppendValues(nameVals, nameValid) + nameArr := nameB.NewArray() + + // ------------------------- + // age column (int32) + // ------------------------- + ageB := array.NewInt32Builder(mem) + ageVals := []int32{28, 34, 45, 22, 31, 29, 40, 36, 50, 26} + ageValid := []bool{ + true, false, true, true, true, + true, false, true, true, true, + } + ageB.AppendValues(ageVals, ageValid) + ageArr := ageB.NewArray() + + // ------------------------- + // salary column (float64) + // ------------------------- + salB := array.NewFloat64Builder(mem) + salVals := []float64{ + 70000, 82000, 54000, 91000, 60000, + 75000, 66000, 0, 45000, 99000, + } + + salaryValid := []bool{ + true, true, true, true, true, + true, true, false, true, true, + } + + salB.AppendValues(salVals, salaryValid) + salaryArr := salB.NewArray() + + return names, []arrow.Array{idArr, nameArr, ageArr, salaryArr} +} + func aggProject() *project.InMemorySource { names, cols := generateAggTestColumns() p, _ := project.NewInMemoryProjectExec(names, cols) return p } +// TODO: add test that check for null +func aggProjectNull() *project.InMemorySource { + names, arr := generateAggTestColumnsWithNulls(memory.NewGoAllocator()) + p, _ := project.NewInMemoryProjectExecFromArrays(names, arr) + return p +} + func col(name string) Expr.Expression { return Expr.NewColumnResolve(name) } @@ -360,10 +429,7 @@ func TestAggregateExecNext(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } - resultBatch, err := aggrExec.Next(100) - if err == nil || !errors.Is(err, io.EOF) { - t.Fatalf("expected io.EOF error, got nil") - } + resultBatch, _ := aggrExec.Next(100) t.Logf("record batch: %v\n", resultBatch) if resultBatch.Columns[0].(*array.Float64).Value(0) != 22 { t.Fatalf("expected minimum age 22, got %v", resultBatch.Columns[0].(*array.Float64).Value(0)) @@ -381,10 +447,7 @@ func TestAggregateExecNext(t *testing.T) { t.Fatalf("unexpected error: %v", err) } - resultBatch, err := aggrExec.Next(100) - if err == nil || !errors.Is(err, io.EOF) { - t.Fatalf("expected io.EOF, got %v", err) - } + resultBatch, _ := aggrExec.Next(100) maxSalary := resultBatch.Columns[0].(*array.Float64).Value(0) if maxSalary != 99000.0 && maxSalary != 94000.0 && maxSalary != 93000.0 { @@ -403,10 +466,7 @@ func TestAggregateExecNext(t *testing.T) { t.Fatalf("unexpected error: %v", err) } - resultBatch, err := aggrExec.Next(200) - if err == nil || !errors.Is(err, io.EOF) { - t.Fatalf("expected io.EOF, got %v", err) - } + resultBatch, _ := aggrExec.Next(200) sumIDs := resultBatch.Columns[0].(*array.Float64).Value(0) expected := float64((25 * 26) / 2) // sum(1..25) = 325 @@ -417,7 +477,7 @@ func TestAggregateExecNext(t *testing.T) { t.Run("Aggr count of age column", func(t *testing.T) { proj := aggProject() agg := []AggregateFunctions{ - {AggrFunc: Count, Child: col("age")}, + NewAggregateFunctions(Count, col("age")), } aggrExec, err := NewGlobalAggrExec(proj, agg) @@ -425,17 +485,14 @@ func TestAggregateExecNext(t *testing.T) { t.Fatalf("unexpected error: %v", err) } - resultBatch, err := aggrExec.Next(300) - if err == nil || !errors.Is(err, io.EOF) { - t.Fatalf("expected io.EOF, got %v", err) - } + resultBatch, _ := aggrExec.Next(300) count := resultBatch.Columns[0].(*array.Float64).Value(0) if count != 25 { t.Fatalf("expected count 25, got %v", count) } }) - t.Run("Aggr average of salary (⚠ your AVG is wrong)", func(t *testing.T) { + t.Run("Aggr average of salary ", func(t *testing.T) { proj := aggProject() agg := []AggregateFunctions{ @@ -447,10 +504,7 @@ func TestAggregateExecNext(t *testing.T) { t.Fatalf("unexpected error: %v", err) } - resultBatch, err := aggrExec.Next(500) - if err == nil || !errors.Is(err, io.EOF) { - t.Fatalf("expected io.EOF, got %v", err) - } + resultBatch, _ := aggrExec.Next(500) avg := resultBatch.Columns[0].(*array.Float64).Value(0) expected := 75740.02 @@ -474,10 +528,7 @@ func TestAggregateExecNext(t *testing.T) { t.Fatalf("unexpected error: %v", err) } - resultBatch, err := aggrExec.Next(1000) - if err == nil || !errors.Is(err, io.EOF) { - t.Fatalf("expected io.EOF, got %v", err) - } + resultBatch, _ := aggrExec.Next(1000) minAge := resultBatch.Columns[0].(*array.Float64).Value(0) maxSalary := resultBatch.Columns[1].(*array.Float64).Value(0) @@ -527,3 +578,30 @@ func TestAggregateExecNext(t *testing.T) { } }) } + +func TestAggregateExecNull(t *testing.T) { + + t.Run("Aggr count of age column", func(t *testing.T) { + proj := aggProjectNull() + agg := []AggregateFunctions{ + NewAggregateFunctions(Count, col("age")), + NewAggregateFunctions(Sum, col("id")), + } + + aggrExec, err := NewGlobalAggrExec(proj, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + resultBatch, _ := aggrExec.Next(100) + t.Logf("rb:%v\n", resultBatch) + count := resultBatch.Columns[0].(*array.Float64).Value(0) + if count != 8 { + t.Fatalf("expected count 7, got %v", count) + } + sumIDs := resultBatch.Columns[1].(*array.Float64).Value(0) + expectedSum := float64(1 + 2 + 4 + 5 + 7 + 8 + 9) // only non-null ids + if sumIDs != expectedSum { + t.Fatalf("expected sum %v, got %v", expectedSum, sumIDs) + } + }) +} diff --git a/src/Backend/opti-sql-go/operators/project/custom.go b/src/Backend/opti-sql-go/operators/project/custom.go index e36fa0c..0816600 100644 --- a/src/Backend/opti-sql-go/operators/project/custom.go +++ b/src/Backend/opti-sql-go/operators/project/custom.go @@ -73,6 +73,35 @@ func (ms *InMemorySource) withFields(names ...string) error { ms.columns = cols return nil } +func NewInMemoryProjectExecFromArrays(names []string, arrays []arrow.Array) (*InMemorySource, error) { + if len(names) != len(arrays) { + return nil, operators.ErrInvalidSchema("number of column names and arrays do not match") + } + + fields := make([]arrow.Field, len(names)) + fieldToColIdx := make(map[string]int, len(names)) + + for i, arr := range arrays { + if arr == nil { + return nil, operators.ErrInvalidSchema(fmt.Sprintf("nil array for column %s", names[i])) + } + + fields[i] = arrow.Field{ + Name: names[i], + Type: arr.DataType(), + Nullable: true, // Arrow arrays may have null bitmaps + } + + fieldToColIdx[names[i]] = i + } + + return &InMemorySource{ + schema: arrow.NewSchema(fields, nil), + columns: arrays, + fieldToColIDx: fieldToColIdx, + }, nil +} + func (ms *InMemorySource) Next(n uint16) (*operators.RecordBatch, error) { if len(ms.columns) == 0 || ms.pos >= uint16(ms.columns[0].Len()) { return nil, io.EOF // EOF diff --git a/src/Backend/opti-sql-go/operators/record.go b/src/Backend/opti-sql-go/operators/record.go index 60f695b..d1f81a6 100644 --- a/src/Backend/opti-sql-go/operators/record.go +++ b/src/Backend/opti-sql-go/operators/record.go @@ -129,6 +129,7 @@ func (rb *RecordBatch) ColumnByName(name string) (arrow.Array, error) { } return rb.Columns[indices[0]], nil } + func (rbb *RecordBatchBuilder) GenIntArray(values ...int) arrow.Array { mem := memory.NewGoAllocator() builder := array.NewInt32Builder(mem) @@ -289,3 +290,115 @@ func (rbb *RecordBatchBuilder) GenLargeBinaryArray(values ...[]byte) arrow.Array } return builder.NewArray() } + +func (rb *RecordBatch) PrettyPrint() string { + if rb == nil { + return "" + } + + // ------------------------------- + // 1. Extract column names + // ------------------------------- + colNames := make([]string, len(rb.Schema.Fields())) + for i, f := range rb.Schema.Fields() { + colNames[i] = f.Name + } + + // ------------------------------- + // 2. Extract rows into [][]string + // ------------------------------- + rows := make([][]string, rb.RowCount) + for r := 0; r < int(rb.RowCount); r++ { + row := make([]string, len(rb.Columns)) + for c, arr := range rb.Columns { + row[c] = formatValue(arr, r) + } + rows[r] = row + } + + // ------------------------------- + // 3. Compute column widths + // ------------------------------- + colWidths := make([]int, len(colNames)) + for i, name := range colNames { + colWidths[i] = len(name) + } + for _, row := range rows { + for i, v := range row { + if len(v) > colWidths[i] { + colWidths[i] = len(v) + } + } + } + + // ------------------------------- + // 4. Build horizontal border line + // ------------------------------- + border := "+" + for _, w := range colWidths { + border += strings.Repeat("-", w+2) + "+" + } + + // ------------------------------- + // 5. Build the final output + // ------------------------------- + var b strings.Builder + + b.WriteString(border + "\n") + + // Header + b.WriteString("|") + for i, name := range colNames { + b.WriteString(" " + padRight(name, colWidths[i]) + " |") + } + b.WriteString("\n") + + b.WriteString(border + "\n") + + // Rows + for _, row := range rows { + b.WriteString("|") + for i, v := range row { + b.WriteString(" " + padRight(v, colWidths[i]) + " |") + } + b.WriteString("\n") + } + + b.WriteString(border) + + return b.String() +} + +// ------------------------------- +// Helper Functions +// ------------------------------- + +func padRight(s string, width int) string { + if len(s) >= width { + return s + } + return s + strings.Repeat(" ", width-len(s)) +} + +func formatValue(arr arrow.Array, row int) string { + if arr.IsNull(row) { + return "NULL" + } + + switch col := arr.(type) { + case *array.Int32: + return fmt.Sprintf("%d", col.Value(row)) + case *array.Int64: + return fmt.Sprintf("%d", col.Value(row)) + case *array.Float32: + return fmt.Sprintf("%g", col.Value(row)) + case *array.Float64: + return fmt.Sprintf("%g", col.Value(row)) + case *array.String: + return col.Value(row) + case *array.Boolean: + return fmt.Sprintf("%t", col.Value(row)) + default: + return "" + } +} From c1da3cb84d3a6b5eedad4d651d9afc14fa5e2054 Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Tue, 25 Nov 2025 22:47:40 -0500 Subject: [PATCH 04/21] feat: Implement having operator --- src/Backend/opti-sql-go/Expr/expr.go | 1 + .../opti-sql-go/operators/aggr/groupBy.go | 4 +- .../operators/aggr/groupBy_test.go | 21 +- .../opti-sql-go/operators/aggr/having.go | 78 +++++++ .../opti-sql-go/operators/aggr/having_test.go | 213 ++++++++++++++++++ .../opti-sql-go/operators/aggr/singleAggr.go | 1 - .../opti-sql-go/operators/aggr/sort.go | 2 +- .../opti-sql-go/operators/filter/filter.go | 4 +- .../opti-sql-go/operators/test/t1_test.go | 1 + 9 files changed, 302 insertions(+), 23 deletions(-) create mode 100644 src/Backend/opti-sql-go/operators/aggr/having.go create mode 100644 src/Backend/opti-sql-go/operators/aggr/having_test.go create mode 100644 src/Backend/opti-sql-go/operators/test/t1_test.go diff --git a/src/Backend/opti-sql-go/Expr/expr.go b/src/Backend/opti-sql-go/Expr/expr.go index 4ae10bb..e27d179 100644 --- a/src/Backend/opti-sql-go/Expr/expr.go +++ b/src/Backend/opti-sql-go/Expr/expr.go @@ -387,6 +387,7 @@ func EvalBinary(b *BinaryExpr, batch *operators.RecordBatch) (arrow.Array, error } rightArr, err := EvalExpression(b.Right, batch) if err != nil { + fmt.Printf("right side evaluation failed with %v", err) return nil, err } opt := compute.ArithmeticOptions{} diff --git a/src/Backend/opti-sql-go/operators/aggr/groupBy.go b/src/Backend/opti-sql-go/operators/aggr/groupBy.go index 686ae3a..c958cac 100644 --- a/src/Backend/opti-sql-go/operators/aggr/groupBy.go +++ b/src/Backend/opti-sql-go/operators/aggr/groupBy.go @@ -107,7 +107,6 @@ func (g *GroupByExec) Next(batchSize uint16) (*operators.RecordBatch, error) { } } key := strings.Join(keyParts, "|") - fmt.Printf("key: %v\n", key) // Allocate accumulator list if new group if _, exists := g.groups[key]; !exists { g.groups[key] = make([]accumulator, len(g.groupExpr)) @@ -132,7 +131,7 @@ func (g *GroupByExec) Next(batchSize uint16) (*operators.RecordBatch, error) { batch := buildGroupByOutput(g) g.done = true - return batch, io.EOF + return batch, nil } func (g *GroupByExec) Schema() *arrow.Schema { @@ -224,7 +223,6 @@ func buildGroupByOutput(g *GroupByExec) *operators.RecordBatch { rowCount := len(g.groups) if rowCount == 0 { - // return empty batch (0 groups) return &operators.RecordBatch{ Schema: g.schema, Columns: []arrow.Array{}, diff --git a/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go b/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go index 23803dc..0482870 100644 --- a/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go +++ b/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go @@ -124,8 +124,7 @@ func groupByProject() *project.InMemorySource { func TestGroupByInit(t *testing.T) { p := groupByProject() - rc, _ := p.Next(12) - fmt.Printf("rc:%v \n", rc) + _, _ = p.Next(12) } func TestNewGroupByExecAndSchema(t *testing.T) { @@ -416,10 +415,7 @@ func TestGroupByNext_SingleColumnCount(t *testing.T) { t.Fatalf("unexpected error: %v", err) } - batch, err := gb.Next(1000) - if err == nil || !errors.Is(err, io.EOF) { - t.Fatalf("expected EOF, got %v", err) - } + batch, _ := gb.Next(1000) if batch == nil || batch.RowCount == 0 { t.Fatalf("expected non-empty grouped result") @@ -461,10 +457,7 @@ func TestGroupByNext_MultipleGroupBy_MultipleAggs(t *testing.T) { t.Fatal(err) } - batch, err := gb.Next(50) - if err == nil || !errors.Is(err, io.EOF) { - t.Fatalf("expected EOF, got %v", err) - } + batch, _ := gb.Next(50) if batch.RowCount == 0 { t.Fatalf("expected non-zero grouped rows") @@ -491,16 +484,12 @@ func TestGroupByNext_MultipleNextCalls(t *testing.T) { } // First call returns batch + EOF + _, _ = gb.Next(100) _, err = gb.Next(100) if !errors.Is(err, io.EOF) { - t.Fatalf("expected EOF on first return, got %v", err) + t.Fatalf("expected EOF on second return, got %v", err) } - // Second call MUST return EOF immediately - _, err = gb.Next(100) - if !errors.Is(err, io.EOF) { - t.Fatalf("expected EOF on second call, got %v", err) - } } func TestBuildGroupBySchema_AllBranches(t *testing.T) { diff --git a/src/Backend/opti-sql-go/operators/aggr/having.go b/src/Backend/opti-sql-go/operators/aggr/having.go new file mode 100644 index 0000000..72a5a91 --- /dev/null +++ b/src/Backend/opti-sql-go/operators/aggr/having.go @@ -0,0 +1,78 @@ +package aggr + +import ( + "errors" + "io" + "opti-sql-go/Expr" + "opti-sql-go/operators" + "opti-sql-go/operators/filter" + + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" +) + +// carbon copy of filter.go with minor changes to fit having semantics +var ( + _ = (operators.Operator)(&HavingExec{}) +) + +type HavingClone = filter.FilterExec + +type HavingExec struct { + input operators.Operator + schema *arrow.Schema + + havingExpr Expr.Expression + done bool +} + +func NewHavingExec(input operators.Operator, havingFilter Expr.Expression) (*HavingExec, error) { + + return &HavingExec{ + input: input, + schema: input.Schema(), + havingExpr: havingFilter, + }, nil +} + +func (h *HavingExec) Next(n uint16) (*operators.RecordBatch, error) { + if h.done { + return nil, io.EOF + } + batch, err := h.input.Next(n) + if err != nil { + return nil, err + } + booleanMask, err := Expr.EvalExpression(h.havingExpr, batch) + if err != nil { + return nil, err + } + boolArr, ok := booleanMask.(*array.Boolean) // impossible for this to not be a boolean array,assuming validPredicates works as it should + if !ok { + return nil, errors.New("predicate did not evaluate to boolean array") + } + filteredCol := make([]arrow.Array, len(batch.Columns)) + for i, col := range batch.Columns { + filteredCol[i], err = filter.ApplyBooleanMask(col, boolArr) + if err != nil { + return nil, err + } + } + // release old columns + for _, c := range batch.Columns { + c.Release() + } + size := uint64(filteredCol[0].Len()) + + return &operators.RecordBatch{ + Schema: batch.Schema, + Columns: filteredCol, + RowCount: size, + }, nil +} +func (h *HavingExec) Schema() *arrow.Schema { + return h.schema +} +func (h *HavingExec) Close() error { + return h.input.Close() +} diff --git a/src/Backend/opti-sql-go/operators/aggr/having_test.go b/src/Backend/opti-sql-go/operators/aggr/having_test.go new file mode 100644 index 0000000..9321639 --- /dev/null +++ b/src/Backend/opti-sql-go/operators/aggr/having_test.go @@ -0,0 +1,213 @@ +package aggr + +import ( + "errors" + "io" + "strings" + "testing" + + "opti-sql-go/Expr" + + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" +) + +func TestHavingExec_OnGroupBy(t *testing.T) { + + // ============================================================= + // 1) HAVING SUM(salary) > 600000 + // ============================================================= + t.Run("having_sum_salary_gt_600k", func(t *testing.T) { + + child := groupByProject() + + groupBy := []Expr.Expression{col("department")} + aggs := []AggregateFunctions{ + {AggrFunc: Sum, Child: col("salary")}, + } + + gb, err := NewGroupByExec(child, aggs, groupBy) + if err != nil { + t.Fatalf("unexpected GroupBy error: %v", err) + } + + sumCol := "sum_Column(salary)" + + // SUM(salary) > 600000 + havingExpr := Expr.NewBinaryExpr( + Expr.NewColumnResolve(sumCol), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(600000)), + ) + + having, err := NewHavingExec(gb, havingExpr) + if err != nil { + t.Fatalf("unexpected HavingExec init error: %v", err) + } + + batch, err := having.Next(1024) + if err != nil { + t.Fatalf("unexpected error running Next: %v", err) + } + t.Logf("batch : %v\n", batch.PrettyPrint()) + sumValues := batch.Columns[1].(*array.Float64) + for i := 0; i < sumValues.Len(); i++ { + if sumValues.Value(i) <= 600000 { + t.Fatalf("expected sum(salary) > 600000, got %f", sumValues.Value(i)) + } + } + + }) + + // ============================================================= + // 2) HAVING COUNT(id) >= 10 + // ============================================================= + t.Run("having_count_id_ge_10", func(t *testing.T) { + + child := groupByProject() + + groupBy := []Expr.Expression{col("region")} + aggs := []AggregateFunctions{ + {AggrFunc: Count, Child: col("id")}, + } + + gb, err := NewGroupByExec(child, aggs, groupBy) + if err != nil { + t.Fatalf("unexpected GroupBy err: %v", err) + } + + countCol := "count_Column(id)" + + havingExpr := Expr.NewBinaryExpr( + Expr.NewColumnResolve(countCol), + Expr.GreaterThanOrEqual, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(10)), + ) + + having, err := NewHavingExec(gb, havingExpr) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + batch, err := having.Next(200) + if err != nil { + t.Fatalf("unexpected Next error: %v", err) + } + + if batch.RowCount != 3 { // North, South, West ≥ 10 + t.Fatalf("expected 3 regions with >=10 rows, got %d", batch.RowCount) + } + }) + + // ============================================================= + // 3) HAVING filters all groups out + // ============================================================= + t.Run("having_filters_all", func(t *testing.T) { + + child := groupByProject() + + groupBy := []Expr.Expression{col("department")} + aggs := []AggregateFunctions{ + {AggrFunc: Sum, Child: col("salary")}, + } + + gb, _ := NewGroupByExec(child, aggs, groupBy) + + sumCol := "sum_Column(salary)" + + // Impossible condition + havingExpr := Expr.NewBinaryExpr( + Expr.NewColumnResolve(sumCol), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(1_000_000_000)), + ) + + having, _ := NewHavingExec(gb, havingExpr) + + batch, err := having.Next(1024) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + if batch.RowCount != 0 { + t.Fatalf("expected all rows to be filtered out, got %d", batch.RowCount) + } + }) + + // ============================================================= + // 4) Non-boolean predicate → error + // ============================================================= + t.Run("having_non_boolean_predicate", func(t *testing.T) { + + child := groupByProject() + groupBy := []Expr.Expression{col("department")} + aggs := []AggregateFunctions{ + {AggrFunc: Sum, Child: col("salary")}, + } + + gb, _ := NewGroupByExec(child, aggs, groupBy) + + // invalid: resolves to float, not boolean + invalidExpr := Expr.NewColumnResolve("sum_Column(salary)") + + having, _ := NewHavingExec(gb, invalidExpr) + + _, err := having.Next(100) + if err == nil { + t.Fatalf("expected non-boolean error, got nil") + } + if !strings.Contains(err.Error(), "boolean") { + t.Fatalf("expected boolean error, got: %v", err) + } + }) + + // ============================================================= + // 5) done = true returns EOF + // ============================================================= + t.Run("done_returns_eof", func(t *testing.T) { + + child := groupByProject() + + groupBy := []Expr.Expression{col("region")} + aggs := []AggregateFunctions{ + {AggrFunc: Count, Child: col("id")}, + } + + gb, _ := NewGroupByExec(child, aggs, groupBy) + + countCol := "count_Column(id)" + + havingExpr := Expr.NewBinaryExpr( + Expr.NewColumnResolve(countCol), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(0)), + ) + + h, _ := NewHavingExec(gb, havingExpr) + h.done = true + + _, err := h.Next(10) + if !errors.Is(err, io.EOF) { + t.Fatalf("expected EOF, got: %v", err) + } + }) + + // ============================================================= + // 6) Close forwards to child.Close() + // ============================================================= + t.Run("close_propagates", func(t *testing.T) { + + child := groupByProject() + + gb, _ := NewGroupByExec(child, []AggregateFunctions{ + {AggrFunc: Count, Child: col("id")}, + }, []Expr.Expression{col("region")}) + + h, _ := NewHavingExec(gb, Expr.NewLiteralResolve(arrow.FixedWidthTypes.Boolean, true)) + + if err := h.Close(); err != nil { + t.Fatalf("Close returned error: %v", err) + } + t.Log(h.Schema()) + }) +} diff --git a/src/Backend/opti-sql-go/operators/aggr/singleAggr.go b/src/Backend/opti-sql-go/operators/aggr/singleAggr.go index df9d3fa..3e1f4e6 100644 --- a/src/Backend/opti-sql-go/operators/aggr/singleAggr.go +++ b/src/Backend/opti-sql-go/operators/aggr/singleAggr.go @@ -212,7 +212,6 @@ func (a *AggrExec) Next(n uint16) (*operators.RecordBatch, error) { } for { childBatch, err := a.child.Next(n) - fmt.Printf("child batch: %v\n", childBatch) if err != nil { if errors.Is(err, io.EOF) { break diff --git a/src/Backend/opti-sql-go/operators/aggr/sort.go b/src/Backend/opti-sql-go/operators/aggr/sort.go index d5a469b..ed342a8 100644 --- a/src/Backend/opti-sql-go/operators/aggr/sort.go +++ b/src/Backend/opti-sql-go/operators/aggr/sort.go @@ -1,3 +1,3 @@ package aggr -// order by col asc, col 2 desc .... ect +// order by col asc, col 2 desc .... etc diff --git a/src/Backend/opti-sql-go/operators/filter/filter.go b/src/Backend/opti-sql-go/operators/filter/filter.go index ddd8c1b..645eeeb 100644 --- a/src/Backend/opti-sql-go/operators/filter/filter.go +++ b/src/Backend/opti-sql-go/operators/filter/filter.go @@ -55,7 +55,7 @@ func (f *FilterExec) Next(n uint16) (*operators.RecordBatch, error) { } filteredCol := make([]arrow.Array, len(batch.Columns)) for i, col := range batch.Columns { - filteredCol[i], err = applyBooleanMask(col, boolArr) + filteredCol[i], err = ApplyBooleanMask(col, boolArr) if err != nil { return nil, err } @@ -80,7 +80,7 @@ func (f *FilterExec) Close() error { return f.input.Close() } -func applyBooleanMask(col arrow.Array, mask *array.Boolean) (arrow.Array, error) { +func ApplyBooleanMask(col arrow.Array, mask *array.Boolean) (arrow.Array, error) { datum, err := compute.Filter( context.TODO(), compute.NewDatum(col), diff --git a/src/Backend/opti-sql-go/operators/test/t1_test.go b/src/Backend/opti-sql-go/operators/test/t1_test.go new file mode 100644 index 0000000..56e5404 --- /dev/null +++ b/src/Backend/opti-sql-go/operators/test/t1_test.go @@ -0,0 +1 @@ +package test From 825732b448c67c9e2495de2e5e6f31bf603ca94a Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Wed, 26 Nov 2025 13:05:18 -0500 Subject: [PATCH 05/21] fixed PR comments --- .../opti-sql-go/operators/aggr/groupBy.go | 11 ++- .../operators/aggr/groupBy_test.go | 2 +- .../opti-sql-go/operators/aggr/having.go | 7 +- .../opti-sql-go/operators/aggr/singleAggr.go | 69 +++++++++---------- .../opti-sql-go/operators/test/t1_test.go | 2 + 5 files changed, 43 insertions(+), 48 deletions(-) diff --git a/src/Backend/opti-sql-go/operators/aggr/groupBy.go b/src/Backend/opti-sql-go/operators/aggr/groupBy.go index c958cac..7c57b28 100644 --- a/src/Backend/opti-sql-go/operators/aggr/groupBy.go +++ b/src/Backend/opti-sql-go/operators/aggr/groupBy.go @@ -8,9 +8,9 @@ import ( "opti-sql-go/operators" "strings" - "github.com/apache/arrow/go/v15/arrow/memory" "github.com/apache/arrow/go/v17/arrow" "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/memory" ) /* @@ -156,7 +156,7 @@ func buildGroupBySchema(childSchema *arrow.Schema, groupByExpr []Expr.Expression fields = append(fields, arrow.Field{ Name: fmt.Sprintf("group_%s", expr.String()), Type: dt, - Nullable: false, + Nullable: true, }) } @@ -208,9 +208,9 @@ func createAccumulator(fn AggrFunc) accumulator { case Max: return newMaxAggr() case Sum: - return NewSumAggr() + return newSumAggr() case Count: - return NewCountAggr() + return newCountAggr() case Avg: return newAvgAggr() default: @@ -244,8 +244,6 @@ func buildGroupByOutput(g *GroupByExec) *operators.RecordBatch { aggrCols[i] = make([]float64, 0, rowCount) } - // Iterate groups in stable order - i := 0 for key, accs := range g.groups { // Add group-by (dimension) values dims := g.keys[key] @@ -258,7 +256,6 @@ func buildGroupByOutput(g *GroupByExec) *operators.RecordBatch { aggrCols[j] = append(aggrCols[j], acc.Finalize()) } - i++ } // Now build Arrow arrays in correct schema order diff --git a/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go b/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go index 0482870..10756f0 100644 --- a/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go +++ b/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go @@ -9,9 +9,9 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v15/arrow/memory" "github.com/apache/arrow/go/v17/arrow" "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/memory" ) func generateGroupByTestColumns() ([]string, []any) { diff --git a/src/Backend/opti-sql-go/operators/aggr/having.go b/src/Backend/opti-sql-go/operators/aggr/having.go index 72a5a91..3f47233 100644 --- a/src/Backend/opti-sql-go/operators/aggr/having.go +++ b/src/Backend/opti-sql-go/operators/aggr/having.go @@ -16,8 +16,6 @@ var ( _ = (operators.Operator)(&HavingExec{}) ) -type HavingClone = filter.FilterExec - type HavingExec struct { input operators.Operator schema *arrow.Schema @@ -41,6 +39,9 @@ func (h *HavingExec) Next(n uint16) (*operators.RecordBatch, error) { } batch, err := h.input.Next(n) if err != nil { + if errors.Is(err, io.EOF) { + h.done = true + } return nil, err } booleanMask, err := Expr.EvalExpression(h.havingExpr, batch) @@ -49,7 +50,7 @@ func (h *HavingExec) Next(n uint16) (*operators.RecordBatch, error) { } boolArr, ok := booleanMask.(*array.Boolean) // impossible for this to not be a boolean array,assuming validPredicates works as it should if !ok { - return nil, errors.New("predicate did not evaluate to boolean array") + return nil, errors.New("having predicate did not evaluate to boolean array") } filteredCol := make([]arrow.Array, len(batch.Columns)) for i, col := range batch.Columns { diff --git a/src/Backend/opti-sql-go/operators/aggr/singleAggr.go b/src/Backend/opti-sql-go/operators/aggr/singleAggr.go index 3e1f4e6..9593ca3 100644 --- a/src/Backend/opti-sql-go/operators/aggr/singleAggr.go +++ b/src/Backend/opti-sql-go/operators/aggr/singleAggr.go @@ -34,11 +34,11 @@ const ( ) var ( - _ = (accumulator)(&MinAggrAccumulator{}) - _ = (accumulator)(&MaxAggrAccumulator{}) - _ = (accumulator)(&CountAggrAccumulator{}) - _ = (accumulator)(&SumAggrAccumulator{}) - _ = (accumulator)(&AvgAggrAccumulator{}) + _ = (accumulator)(&minAggrAccumulator{}) + _ = (accumulator)(&maxAggrAccumulator{}) + _ = (accumulator)(&countAggrAccumulator{}) + _ = (accumulator)(&sumAggrAccumulator{}) + _ = (accumulator)(&avgAggrAccumulator{}) _ = (operators.Operator)(&AggrExec{}) ) @@ -59,15 +59,15 @@ type accumulator interface { } func newMinAggr() accumulator { - return &MinAggrAccumulator{} + return &minAggrAccumulator{} } -type MinAggrAccumulator struct { +type minAggrAccumulator struct { minV float64 firstValue bool } -func (m *MinAggrAccumulator) Update(value float64) { +func (m *minAggrAccumulator) Update(value float64) { if !m.firstValue { m.minV = value m.firstValue = true @@ -76,17 +76,17 @@ func (m *MinAggrAccumulator) Update(value float64) { m.minV = min(m.minV, value) } -func (m *MinAggrAccumulator) Finalize() float64 { return m.minV } +func (m *minAggrAccumulator) Finalize() float64 { return m.minV } func newMaxAggr() accumulator { - return &MaxAggrAccumulator{} + return &maxAggrAccumulator{} } -type MaxAggrAccumulator struct { +type maxAggrAccumulator struct { maxV float64 firstValue bool } -func (m *MaxAggrAccumulator) Update(value float64) { +func (m *maxAggrAccumulator) Update(value float64) { if !m.firstValue { m.maxV = value m.firstValue = true @@ -94,49 +94,49 @@ func (m *MaxAggrAccumulator) Update(value float64) { } m.maxV = max(m.maxV, value) } -func (m *MaxAggrAccumulator) Finalize() float64 { return m.maxV } +func (m *maxAggrAccumulator) Finalize() float64 { return m.maxV } -func NewCountAggr() accumulator { - return &CountAggrAccumulator{} +func newCountAggr() accumulator { + return &countAggrAccumulator{} } -type CountAggrAccumulator struct { +type countAggrAccumulator struct { count float64 } -func (c *CountAggrAccumulator) Update(_ float64) { +func (c *countAggrAccumulator) Update(_ float64) { c.count++ } -func (c *CountAggrAccumulator) Finalize() float64 { return c.count } +func (c *countAggrAccumulator) Finalize() float64 { return c.count } -func NewSumAggr() accumulator { - return &SumAggrAccumulator{} +func newSumAggr() accumulator { + return &sumAggrAccumulator{} } -type SumAggrAccumulator struct { +type sumAggrAccumulator struct { summation float64 } -func (s *SumAggrAccumulator) Update(value float64) { +func (s *sumAggrAccumulator) Update(value float64) { s.summation += value } -func (s *SumAggrAccumulator) Finalize() float64 { return s.summation } +func (s *sumAggrAccumulator) Finalize() float64 { return s.summation } func newAvgAggr() accumulator { - return &AvgAggrAccumulator{} + return &avgAggrAccumulator{} } -type AvgAggrAccumulator struct { +type avgAggrAccumulator struct { used bool values float64 count float64 } -func (a *AvgAggrAccumulator) Update(value float64) { +func (a *avgAggrAccumulator) Update(value float64) { a.used = true a.values += value a.count++ } -func (a *AvgAggrAccumulator) Finalize() float64 { +func (a *avgAggrAccumulator) Finalize() float64 { // handles divide by zero if !a.used { return 0.0 @@ -174,10 +174,10 @@ func NewGlobalAggrExec(child operators.Operator, aggExprs []AggregateFunctions) accs[i] = newMaxAggr() case Count: fieldName = fmt.Sprintf("count_%s", agg.Child.String()) - accs[i] = NewCountAggr() + accs[i] = newCountAggr() case Sum: fieldName = fmt.Sprintf("sum_%s", agg.Child.String()) - accs[i] = NewSumAggr() + accs[i] = newSumAggr() case Avg: fieldName = fmt.Sprintf("avg_%s", agg.Child.String()) accs[i] = newAvgAggr() @@ -199,13 +199,9 @@ func NewGlobalAggrExec(child operators.Operator, aggExprs []AggregateFunctions) }, nil } -// check for io.EOF with flag -// read in all record batches -// for each batch, run Expr.Evaluate, to get the column you want for the expression (cast to float64) -// -// for each element of that column grab the values you want using the accumulator interface -// -// build output batch, for now its just 1 of everything straight forward +// Next consumes all batches from the child operator, evaluates the aggregate expressions, +// updates the accumulators for each value, and returns a single output batch containing +// the final aggregation results. It returns io.EOF after producing the result batch. func (a *AggrExec) Next(n uint16) (*operators.RecordBatch, error) { if a.done { return nil, io.EOF @@ -249,7 +245,6 @@ func (a *AggrExec) Next(n uint16) (*operators.RecordBatch, error) { Columns: resultColumns, RowCount: 1, }, nil - // this is a pipeline breaker so it will always consume all of the input which means this needs to return an io.EOF } func (a *AggrExec) Schema() *arrow.Schema { diff --git a/src/Backend/opti-sql-go/operators/test/t1_test.go b/src/Backend/opti-sql-go/operators/test/t1_test.go index 56e5404..a571421 100644 --- a/src/Backend/opti-sql-go/operators/test/t1_test.go +++ b/src/Backend/opti-sql-go/operators/test/t1_test.go @@ -1 +1,3 @@ package test + +// test for all operators together From 7352d28e00088c53310808d42462dd46b4a59ef5 Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Wed, 26 Nov 2025 13:20:39 -0500 Subject: [PATCH 06/21] fix:removed array memory leaks --- src/Backend/opti-sql-go/Expr/expr.go | 3 --- src/Backend/opti-sql-go/operators/aggr/groupBy.go | 12 ++++++++++++ src/Backend/opti-sql-go/operators/aggr/having.go | 4 +--- src/Backend/opti-sql-go/operators/aggr/singleAggr.go | 1 + .../opti-sql-go/operators/aggr/singleAggr_test.go | 1 - src/Backend/opti-sql-go/operators/filter/filter.go | 5 ++--- src/Backend/opti-sql-go/operators/record.go | 7 +++++++ 7 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/Backend/opti-sql-go/Expr/expr.go b/src/Backend/opti-sql-go/Expr/expr.go index e27d179..f9d88de 100644 --- a/src/Backend/opti-sql-go/Expr/expr.go +++ b/src/Backend/opti-sql-go/Expr/expr.go @@ -387,7 +387,6 @@ func EvalBinary(b *BinaryExpr, batch *operators.RecordBatch) (arrow.Array, error } rightArr, err := EvalExpression(b.Right, batch) if err != nil { - fmt.Printf("right side evaluation failed with %v", err) return nil, err } opt := compute.ArithmeticOptions{} @@ -496,7 +495,6 @@ func EvalBinary(b *BinaryExpr, batch *operators.RecordBatch) (arrow.Array, error return unpackDatum(datum) case Like: if leftArr.DataType() != arrow.BinaryTypes.String || rightArr.DataType() != arrow.BinaryTypes.String { - // regEx runs only on strings return nil, errors.New("binary operator Like only works on arrays of strings") } var compiledRegEx = compileSqlRegEx(rightArr.ValueStr(0)) @@ -504,7 +502,6 @@ func EvalBinary(b *BinaryExpr, batch *operators.RecordBatch) (arrow.Array, error leftStrArray := leftArr.(*array.String) for i := 0; i < leftStrArray.Len(); i++ { valid := validRegEx(leftStrArray.Value(i), compiledRegEx) - fmt.Printf("does %s match %s: %v\n", leftStrArray.Value(i), compiledRegEx, valid) filterBuilder.Append(valid) } return filterBuilder.NewArray(), nil diff --git a/src/Backend/opti-sql-go/operators/aggr/groupBy.go b/src/Backend/opti-sql-go/operators/aggr/groupBy.go index 7c57b28..5e65bfb 100644 --- a/src/Backend/opti-sql-go/operators/aggr/groupBy.go +++ b/src/Backend/opti-sql-go/operators/aggr/groupBy.go @@ -75,6 +75,8 @@ func (g *GroupByExec) Next(batchSize uint16) (*operators.RecordBatch, error) { for i, expr := range g.groupByExpr { arr, err := Expr.EvalExpression(expr, childBatch) if err != nil { + operators.ReleaseArrays(groupArrays) + operators.ReleaseArrays(childBatch.Columns) return nil, err } groupArrays[i] = arr @@ -85,10 +87,16 @@ func (g *GroupByExec) Next(batchSize uint16) (*operators.RecordBatch, error) { for i, agg := range g.groupExpr { arr, err := Expr.EvalExpression(agg.Child, childBatch) if err != nil { + operators.ReleaseArrays(aggrArrays) + operators.ReleaseArrays(groupArrays) + operators.ReleaseArrays(childBatch.Columns) return nil, err } arr, err = castArrayToFloat64(arr) if err != nil { + operators.ReleaseArrays(aggrArrays) + operators.ReleaseArrays(groupArrays) + operators.ReleaseArrays(childBatch.Columns) return nil, err } aggrArrays[i] = arr @@ -125,6 +133,10 @@ func (g *GroupByExec) Next(batchSize uint16) (*operators.RecordBatch, error) { g.groups[key][i].Update(val) } } + // 4. release temp arrays + operators.ReleaseArrays(aggrArrays) + operators.ReleaseArrays(groupArrays) + operators.ReleaseArrays(childBatch.Columns) } // 4. Build output RecordBatch diff --git a/src/Backend/opti-sql-go/operators/aggr/having.go b/src/Backend/opti-sql-go/operators/aggr/having.go index 3f47233..a2aeb63 100644 --- a/src/Backend/opti-sql-go/operators/aggr/having.go +++ b/src/Backend/opti-sql-go/operators/aggr/having.go @@ -60,9 +60,7 @@ func (h *HavingExec) Next(n uint16) (*operators.RecordBatch, error) { } } // release old columns - for _, c := range batch.Columns { - c.Release() - } + operators.ReleaseArrays(batch.Columns) size := uint64(filteredCol[0].Len()) return &operators.RecordBatch{ diff --git a/src/Backend/opti-sql-go/operators/aggr/singleAggr.go b/src/Backend/opti-sql-go/operators/aggr/singleAggr.go index 9593ca3..0d1db36 100644 --- a/src/Backend/opti-sql-go/operators/aggr/singleAggr.go +++ b/src/Backend/opti-sql-go/operators/aggr/singleAggr.go @@ -233,6 +233,7 @@ func (a *AggrExec) Next(n uint16) (*operators.RecordBatch, error) { } } + operators.ReleaseArrays(childBatch.Columns) } // build array with just the result of the column resultColumns := make([]arrow.Array, len(a.accumulators)) diff --git a/src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go b/src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go index 192630d..9b5af24 100644 --- a/src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go +++ b/src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go @@ -130,7 +130,6 @@ func aggProject() *project.InMemorySource { return p } -// TODO: add test that check for null func aggProjectNull() *project.InMemorySource { names, arr := generateAggTestColumnsWithNulls(memory.NewGoAllocator()) p, _ := project.NewInMemoryProjectExecFromArrays(names, arr) diff --git a/src/Backend/opti-sql-go/operators/filter/filter.go b/src/Backend/opti-sql-go/operators/filter/filter.go index 645eeeb..e93a1c8 100644 --- a/src/Backend/opti-sql-go/operators/filter/filter.go +++ b/src/Backend/opti-sql-go/operators/filter/filter.go @@ -60,10 +60,9 @@ func (f *FilterExec) Next(n uint16) (*operators.RecordBatch, error) { return nil, err } } + booleanMask.Release() // release old columns - for _, c := range batch.Columns { - c.Release() - } + operators.ReleaseArrays(batch.Columns) size := uint64(filteredCol[0].Len()) return &operators.RecordBatch{ diff --git a/src/Backend/opti-sql-go/operators/record.go b/src/Backend/opti-sql-go/operators/record.go index d1f81a6..24c6da7 100644 --- a/src/Backend/opti-sql-go/operators/record.go +++ b/src/Backend/opti-sql-go/operators/record.go @@ -290,6 +290,13 @@ func (rbb *RecordBatchBuilder) GenLargeBinaryArray(values ...[]byte) arrow.Array } return builder.NewArray() } +func ReleaseArrays(a []arrow.Array) { + for _, col := range a { + if col != nil { + col.Release() + } + } +} func (rb *RecordBatch) PrettyPrint() string { if rb == nil { From 139c88c9899b758c44279739ec517ff021904e98 Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Wed, 26 Nov 2025 13:22:24 -0500 Subject: [PATCH 07/21] fix:added naming convention for child input record batch --- src/Backend/opti-sql-go/operators/project/projectExec.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Backend/opti-sql-go/operators/project/projectExec.go b/src/Backend/opti-sql-go/operators/project/projectExec.go index 9d93d96..3df1fee 100644 --- a/src/Backend/opti-sql-go/operators/project/projectExec.go +++ b/src/Backend/opti-sql-go/operators/project/projectExec.go @@ -94,9 +94,7 @@ func (p *ProjectExec) Next(n uint16) (*operators.RecordBatch, error) { outPutCols[i] = arr arr.Retain() } - for _, c := range childBatch.Columns { - c.Release() - } + operators.ReleaseArrays(childBatch.Columns) return &operators.RecordBatch{ Schema: &p.outputschema, Columns: outPutCols, From 1e48e9da1f1ff100fa95d6b1c5e74377ed47dde2 Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Wed, 26 Nov 2025 13:26:15 -0500 Subject: [PATCH 08/21] closes #25 and closes #24 --- src/Backend/opti-sql-go/operators/filter/filter.go | 12 ++++++------ src/Backend/opti-sql-go/operators/record.go | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Backend/opti-sql-go/operators/filter/filter.go b/src/Backend/opti-sql-go/operators/filter/filter.go index e93a1c8..6c30c8f 100644 --- a/src/Backend/opti-sql-go/operators/filter/filter.go +++ b/src/Backend/opti-sql-go/operators/filter/filter.go @@ -41,11 +41,11 @@ func (f *FilterExec) Next(n uint16) (*operators.RecordBatch, error) { if f.done { return nil, io.EOF } - batch, err := f.input.Next(n) + childBatch, err := f.input.Next(n) if err != nil { return nil, err } - booleanMask, err := Expr.EvalExpression(f.predicate, batch) + booleanMask, err := Expr.EvalExpression(f.predicate, childBatch) if err != nil { return nil, err } @@ -53,8 +53,8 @@ func (f *FilterExec) Next(n uint16) (*operators.RecordBatch, error) { if !ok { return nil, errors.New("predicate did not evaluate to boolean array") } - filteredCol := make([]arrow.Array, len(batch.Columns)) - for i, col := range batch.Columns { + filteredCol := make([]arrow.Array, len(childBatch.Columns)) + for i, col := range childBatch.Columns { filteredCol[i], err = ApplyBooleanMask(col, boolArr) if err != nil { return nil, err @@ -62,11 +62,11 @@ func (f *FilterExec) Next(n uint16) (*operators.RecordBatch, error) { } booleanMask.Release() // release old columns - operators.ReleaseArrays(batch.Columns) + operators.ReleaseArrays(childBatch.Columns) size := uint64(filteredCol[0].Len()) return &operators.RecordBatch{ - Schema: batch.Schema, + Schema: childBatch.Schema, Columns: filteredCol, RowCount: size, }, nil diff --git a/src/Backend/opti-sql-go/operators/record.go b/src/Backend/opti-sql-go/operators/record.go index 24c6da7..6678ef4 100644 --- a/src/Backend/opti-sql-go/operators/record.go +++ b/src/Backend/opti-sql-go/operators/record.go @@ -24,7 +24,7 @@ type Operator interface { type RecordBatch struct { Schema *arrow.Schema Columns []arrow.Array - RowCount uint64 // TODO: update to actually use this, in all operators + RowCount uint64 // } type SchemaBuilder struct { From 413f2062b3b7f9e1b4cc2efd244edd9e2e3da1fe Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Thu, 27 Nov 2025 13:48:18 -0500 Subject: [PATCH 09/21] feat:implement basic sort operator --- .../opti-sql-go/operators/aggr/groupBy.go | 8 +- .../opti-sql-go/operators/aggr/having.go | 12 +- .../opti-sql-go/operators/aggr/singleAggr.go | 8 +- .../opti-sql-go/operators/aggr/sort.go | 338 ++++++++++++++++++ .../opti-sql-go/operators/aggr/sort_test.go | 126 ++++++- .../opti-sql-go/operators/project/parquet.go | 8 +- .../operators/project/projectExec.go | 8 +- 7 files changed, 483 insertions(+), 25 deletions(-) diff --git a/src/Backend/opti-sql-go/operators/aggr/groupBy.go b/src/Backend/opti-sql-go/operators/aggr/groupBy.go index 5e65bfb..962a450 100644 --- a/src/Backend/opti-sql-go/operators/aggr/groupBy.go +++ b/src/Backend/opti-sql-go/operators/aggr/groupBy.go @@ -25,7 +25,7 @@ var ( // place all unique elements of the group by column into a hash table, each element gets their own Accumulator instance type GroupByExec struct { - child operators.Operator + input operators.Operator schema *arrow.Schema groupExpr []AggregateFunctions groupByExpr []Expr.Expression // column names @@ -42,7 +42,7 @@ func NewGroupByExec(child operators.Operator, groupExpr []AggregateFunctions, gr } return &GroupByExec{ - child: child, + input: child, schema: s, groupExpr: groupExpr, groupByExpr: groupBy, @@ -60,7 +60,7 @@ func (g *GroupByExec) Next(batchSize uint16) (*operators.RecordBatch, error) { } for { - childBatch, err := g.child.Next(batchSize) + childBatch, err := g.input.Next(batchSize) if err != nil { if errors.Is(err, io.EOF) { break @@ -150,7 +150,7 @@ func (g *GroupByExec) Schema() *arrow.Schema { return g.schema } func (g *GroupByExec) Close() error { - return g.child.Close() + return g.input.Close() } // handles validation and building of schema for group by diff --git a/src/Backend/opti-sql-go/operators/aggr/having.go b/src/Backend/opti-sql-go/operators/aggr/having.go index a2aeb63..a2a559f 100644 --- a/src/Backend/opti-sql-go/operators/aggr/having.go +++ b/src/Backend/opti-sql-go/operators/aggr/having.go @@ -37,14 +37,14 @@ func (h *HavingExec) Next(n uint16) (*operators.RecordBatch, error) { if h.done { return nil, io.EOF } - batch, err := h.input.Next(n) + childBatch, err := h.input.Next(n) if err != nil { if errors.Is(err, io.EOF) { h.done = true } return nil, err } - booleanMask, err := Expr.EvalExpression(h.havingExpr, batch) + booleanMask, err := Expr.EvalExpression(h.havingExpr, childBatch) if err != nil { return nil, err } @@ -52,19 +52,19 @@ func (h *HavingExec) Next(n uint16) (*operators.RecordBatch, error) { if !ok { return nil, errors.New("having predicate did not evaluate to boolean array") } - filteredCol := make([]arrow.Array, len(batch.Columns)) - for i, col := range batch.Columns { + filteredCol := make([]arrow.Array, len(childBatch.Columns)) + for i, col := range childBatch.Columns { filteredCol[i], err = filter.ApplyBooleanMask(col, boolArr) if err != nil { return nil, err } } // release old columns - operators.ReleaseArrays(batch.Columns) + operators.ReleaseArrays(childBatch.Columns) size := uint64(filteredCol[0].Len()) return &operators.RecordBatch{ - Schema: batch.Schema, + Schema: childBatch.Schema, Columns: filteredCol, RowCount: size, }, nil diff --git a/src/Backend/opti-sql-go/operators/aggr/singleAggr.go b/src/Backend/opti-sql-go/operators/aggr/singleAggr.go index 0d1db36..1fcccdd 100644 --- a/src/Backend/opti-sql-go/operators/aggr/singleAggr.go +++ b/src/Backend/opti-sql-go/operators/aggr/singleAggr.go @@ -149,7 +149,7 @@ func (a *avgAggrAccumulator) Finalize() float64 { // =================== // handles global aggregations without group by type AggrExec struct { - child operators.Operator // child operator + input operators.Operator // child operator schema *arrow.Schema // output schema aggExpressions []AggregateFunctions // list of wanted aggregate expressions accumulators []accumulator // list of accumulators corresponding to aggExpressions, these will actually work to compute the aggregation @@ -192,7 +192,7 @@ func NewGlobalAggrExec(child operators.Operator, aggExprs []AggregateFunctions) } } return &AggrExec{ - child: child, + input: child, schema: arrow.NewSchema(fields, nil), aggExpressions: aggExprs, accumulators: accs, @@ -207,7 +207,7 @@ func (a *AggrExec) Next(n uint16) (*operators.RecordBatch, error) { return nil, io.EOF } for { - childBatch, err := a.child.Next(n) + childBatch, err := a.input.Next(n) if err != nil { if errors.Is(err, io.EOF) { break @@ -252,7 +252,7 @@ func (a *AggrExec) Schema() *arrow.Schema { return a.schema } func (a *AggrExec) Close() error { - return a.child.Close() + return a.input.Close() } func validAggrType(dt arrow.DataType) bool { diff --git a/src/Backend/opti-sql-go/operators/aggr/sort.go b/src/Backend/opti-sql-go/operators/aggr/sort.go index ed342a8..11ab431 100644 --- a/src/Backend/opti-sql-go/operators/aggr/sort.go +++ b/src/Backend/opti-sql-go/operators/aggr/sort.go @@ -1,3 +1,341 @@ package aggr +import ( + "context" + "errors" + "fmt" + "io" + "opti-sql-go/Expr" + "opti-sql-go/operators" + "sort" + + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/compute" + "github.com/apache/arrow/go/v17/arrow/memory" +) + // order by col asc, col 2 desc .... etc +var ( + _ = (operators.Operator)(&SortExec{}) + _ = (operators.Operator)(&TopKSortExec{}) +) + +type SortKey struct { + Expr Expr.Expression + Ascending bool // by default false -- DESC (highest values first -> smaller values) + NullFirst bool // by default false -- nulls last +} + +func NewSortKey(expr Expr.Expression, options ...bool) *SortKey { + var asc, nullF bool + switch len(options) { + case 2: + asc = options[0] + nullF = options[1] + case 1: + asc = options[0] + } + return &SortKey{ + Expr: expr, + Ascending: asc, + NullFirst: nullF, + } +} +func CombineSortKeys(sk ...*SortKey) []SortKey { + var res []SortKey + for _, s := range sk { + res = append(res, *s) + } + return res +} + +type SortExec struct { + child operators.Operator + schema *arrow.Schema + done bool + sortKeys []SortKey // resolves to columns +} + +func NewSortExec(child operators.Operator, sortKeys []SortKey) (*SortExec, error) { + fmt.Printf("sorts Keys %v\n", sortKeys) + return &SortExec{ + child: child, + schema: child.Schema(), + sortKeys: sortKeys, + }, nil +} + +// for now read everything into memory and sort -- next steps will be to do external merge +func (s *SortExec) Next(n uint16) (*operators.RecordBatch, error) { + if s.done { + return nil, io.EOF + } + allColumns := make([]arrow.Array, len(s.schema.Fields())) // concated columns + mem := memory.NewGoAllocator() + fmt.Printf("all columns init %v\n", allColumns) + var count uint64 + for { + childBatch, err := s.child.Next(n) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, err + } + for i := range childBatch.Columns { + if allColumns[i] == nil { + allColumns[i] = childBatch.Columns[i] + continue + } + largerArray, err := concatarr(allColumns[i], childBatch.Columns[i], mem) + if err != nil { + return nil, err + } + allColumns[i] = largerArray + } + } + if len(allColumns) > 0 { + count = uint64(allColumns[0].Len()) + } + idx := sortBatches(&operators.RecordBatch{ + Schema: s.schema, + Columns: allColumns, + RowCount: count, + }, s.sortKeys) + // now update all mappings + for i := range len(allColumns) { + tmpDatum, err := compute.Take(context.TODO(), *compute.DefaultTakeOptions(), compute.NewDatum(allColumns[i]), compute.NewDatum(toDatumFormat(idx, mem))) + if err != nil { + return nil, err + } + array, ok := tmpDatum.(*compute.ArrayDatum) + if !ok { + return nil, fmt.Errorf("non datum was returned from take") + } + allColumns[i] = array.MakeArray() + } + // TOOD: break this uo into N chunks + return &operators.RecordBatch{ + Schema: s.schema, + Columns: allColumns, + RowCount: count, + }, nil +} +func (s *SortExec) Schema() *arrow.Schema { + return s.schema +} +func (s *SortExec) Close() error { + return s.child.Close() +} + +/* +only sort and keep the top k elements in memory +*/ +type TopKSortExec struct { + child operators.Operator + schema *arrow.Schema + done bool + sortKeys []SortKey // resolves to columns + k uint16 // top k +} + +func NewTopKSortExec(child operators.Operator, sortKeys []SortKey, k uint16) (*TopKSortExec, error) { + fmt.Printf("sort keys %v\n", sortKeys) + return &TopKSortExec{ + child: child, + schema: child.Schema(), + sortKeys: sortKeys, + k: k, + }, nil +} + +// for now read everything into memory and sort -- next steps will be to do external merge +func (t *TopKSortExec) Next(n uint16) (*operators.RecordBatch, error) { + if t.done { + return nil, io.EOF + } + return nil, nil +} +func (t *TopKSortExec) Schema() *arrow.Schema { + return t.schema +} +func (t *TopKSortExec) Close() error { + return t.child.Close() +} + +/* +shared functions +*/ +func sortBatches(fullRC *operators.RecordBatch, sortKeys []SortKey) []uint64 { + keyColumns := make([]arrow.Array, len(sortKeys)) + for i, sk := range sortKeys { + arr, err := Expr.EvalExpression(sk.Expr, fullRC) + if err != nil { + panic(fmt.Sprintf("sort batches: failed to eval sort expression: %v", err)) + } + keyColumns[i] = arr + } + fmt.Printf("columns\n") + for i, k := range keyColumns { + fmt.Printf("%d:%v\n", i, k) + } + idVector := make([]uint64, fullRC.RowCount) + for i := 0; uint64(i) < fullRC.RowCount; i++ { + idVector[i] = uint64(i) + } + sortIndexVector(idVector, keyColumns, sortKeys) + fmt.Printf("old Id Vec:%v\n", idVector) + fmt.Printf("new ID vec: %v\n", idVector) + return idVector +} +func toRC() []arrow.Array { + return nil +} + +func concatarr(a arrow.Array, b arrow.Array, mem memory.Allocator) (arrow.Array, error) { + return array.Concatenate([]arrow.Array{a, b}, mem) + +} + +// sortIndexVector sorts idVec based on keyColumns + sortKeys. +// keyColumns[i] corresponds to sortKeys[i]. +func sortIndexVector(idVec []uint64, keyColumns []arrow.Array, sortKeys []SortKey) { + sort.Slice(idVec, func(a, b int) bool { + i := idVec[a] + j := idVec[b] + + // lexicographic: go through each sort key + for k, col := range keyColumns { + sk := sortKeys[k] + cmp := compareArrowValues(col, i, j) + + if cmp == 0 { + continue // equal → move to next key + } + + if sk.Ascending { + return cmp < 0 + } else { + return cmp > 0 + } + } + + // completely equal for all keys + return false + }) +} + +func compareArrowValues(col arrow.Array, i, j uint64) int { + // Handle nulls (treat as lowest value for now) + if col.IsNull(int(i)) && col.IsNull(int(j)) { + return 0 + } + if col.IsNull(int(i)) { + return -1 + } + if col.IsNull(int(j)) { + return 1 + } + + switch arr := col.(type) { + + case *array.String: + vi := arr.Value(int(i)) + vj := arr.Value(int(j)) + switch { + case vi < vj: + return -1 + case vi > vj: + return 1 + default: + return 0 + } + + case *array.Int8: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareNumeric(vi, vj) + + case *array.Int16: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareNumeric(vi, vj) + + case *array.Int32: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareNumeric(vi, vj) + + case *array.Int64: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareNumeric(vi, vj) + + case *array.Uint8: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareNumeric(vi, vj) + + case *array.Uint16: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareNumeric(vi, vj) + + case *array.Uint32: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareNumeric(vi, vj) + + case *array.Uint64: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareNumeric(vi, vj) + + case *array.Float32: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareFloat(vi, vj) + + case *array.Float64: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareFloat(vi, vj) + + case *array.Boolean: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + if vi == vj { + return 0 + } + if !vi && vj { + return -1 + } + return 1 + + default: + panic("unsupported Arrow type in compareArrowValues") + } +} + +func compareNumeric[T int64 | int32 | int16 | int8 | uint64 | uint32 | uint16 | uint8](a, b T) int { + switch { + case a < b: + return -1 + case a > b: + return 1 + default: + return 0 + } +} + +func compareFloat[T float32 | float64](a, b T) int { + switch { + case a < b: + return -1 + case a > b: + return 1 + default: + return 0 + } +} +func toDatumFormat(v []uint64, mem memory.Allocator) compute.Datum { + // turn to array first + b := array.NewUint64Builder(mem) + defer b.Release() + for _, val := range v { + b.Append(val) + } + arr := b.NewArray() + defer arr.Release() + return compute.NewDatum(arr) +} diff --git a/src/Backend/opti-sql-go/operators/aggr/sort_test.go b/src/Backend/opti-sql-go/operators/aggr/sort_test.go index b919b31..9ae02ab 100644 --- a/src/Backend/opti-sql-go/operators/aggr/sort_test.go +++ b/src/Backend/opti-sql-go/operators/aggr/sort_test.go @@ -1,7 +1,129 @@ package aggr -import "testing" +import ( + "context" + "fmt" + "io" + "opti-sql-go/Expr" + "testing" -func TestSort(t *testing.T) { + "github.com/apache/arrow/go/v17/arrow/compute" +) + +func TestSortInit(t *testing.T) { // Simple passing test + t.Run("sort Exec init", func(t *testing.T) { + proj := aggProject() + sortExec, err := NewSortExec(proj, nil) + if err != nil { + t.Fatal(err) + } + if !sortExec.Schema().Equal(proj.Schema()) { + t.Fatalf("expected schema %v, got %v", proj.Schema(), sortExec.schema) + } + sortExec.done = true + _, err = sortExec.Next(100) + if err != io.EOF { + t.Fatalf("expected io.EOF error on done sortExec but got %v", err) + } + if sortExec.Close() != nil { + t.Fatalf("expected nil error on close but got %v", sortExec.Close()) + } + + }) + t.Run("tok k sort exec init", func(t *testing.T) { + proj := aggProject() + topKVal := 5 + topK, err := NewTopKSortExec(proj, nil, uint16(topKVal)) + if err != nil { + t.Fatal(err) + } + if !topK.Schema().Equal(proj.Schema()) { + t.Fatalf("expected schema %v, got %v", proj.Schema(), topK.schema) + } + if topK.k != 5 { + t.Fatalf("expected %v for top k but got %v", topKVal, topK.k) + } + topK.done = true + _, err = topK.Next(100) + if err != io.EOF { + t.Fatalf("expected io.EOF error on done topK but got %v", err) + } + if topK.Close() != nil { + t.Fatalf("expected nil error on close but got %v", topK.Close()) + } + + }) +} + +func TestBasicSortExpr(t *testing.T) { + t.Run("Sort", func(t *testing.T) { + proj := aggProject() + nameExpr := Expr.NewColumnResolve("name") + nameSK := NewSortKey(nameExpr, true) + ageExpr := Expr.NewColumnResolve("age") + ageSK := NewSortKey(ageExpr, false) + _, err := NewSortExec(proj, CombineSortKeys(nameSK, ageSK)) + if err != nil { + t.Fatalf("unexpected error from NewSortExec : %v\n", err) + } + //t.Logf("%v\n", sortExec) + }) + t.Run("Basic Next operation", func(t *testing.T) { + proj := aggProject() + nameExpr := Expr.NewColumnResolve("name") + nameSK := NewSortKey(nameExpr, true) + ageExpr := Expr.NewColumnResolve("age") + ageSK := NewSortKey(ageExpr, false) + sortExec, err := NewSortExec(proj, CombineSortKeys(ageSK, nameSK)) + if err != nil { + t.Fatalf("unexpected error from NewSortExec : %v\n", err) + } + sortedBatch, err := sortExec.Next(10) + if err != nil { + t.Fatalf("unexpected error from sortExec Next : %v\n", err) + } + fmt.Println(sortedBatch.PrettyPrint()) + + }) +} +func TestBasicTopKSortExpr(t *testing.T) { + t.Run("TopK Sort", func(t *testing.T) { + proj := aggProject() + nameExpr := Expr.NewColumnResolve("name") + nameSK := NewSortKey(nameExpr, true) + ageExpr := Expr.NewColumnResolve("age") + ageSK := NewSortKey(ageExpr, false) + sortExec, err := NewTopKSortExec(proj, CombineSortKeys(nameSK, ageSK), 5) + if err != nil { + t.Fatalf("unexpected error from NewTopKSortExec : %v\n", err) + } + t.Logf("%v\n", sortExec) + + }) +} + +func TestOne(t *testing.T) { + v := compute.GetExecCtx(context.Background()) + names := v.Registry.GetFunctionNames() + for i, name := range names { + fmt.Printf("%d: %v\n", i, name) + } + /* + mem := memory.NewGoAllocator() + floatB := array.NewFloat64Builder(mem) + floatB.AppendValues([]float64{10.5, 20.3, 30.1, 40.7, 50.2}, []bool{true, true, true, true, true}) + pos := array.NewInt32Builder(mem) + pos.AppendValues([]int32{1, 3, 4}, []bool{true, true, true}) + + dat, err := compute.Take(context.TODO(), *compute.DefaultTakeOptions(), compute.NewDatum(floatB.NewArray()), compute.NewDatum(pos.NewArray())) + if err != nil { + t.Fatalf("Take failed: %v", err) + } + array, ok := dat.(*compute.ArrayDatum) + if !ok { + t.Logf("expected an array to be returned but got something else %T\n", dat) + } + t.Logf("data: %v\n", array.MakeArray()) + */ } diff --git a/src/Backend/opti-sql-go/operators/project/parquet.go b/src/Backend/opti-sql-go/operators/project/parquet.go index 94b6e1d..42d5c14 100644 --- a/src/Backend/opti-sql-go/operators/project/parquet.go +++ b/src/Backend/opti-sql-go/operators/project/parquet.go @@ -22,12 +22,10 @@ var ( ) type ParquetSource struct { - // existing fields schema *arrow.Schema projectionPushDown []string // columns to project up reader pqarrow.RecordReader - // for internal reading - done bool // if set to true always return io.EOF + done bool // if set to true always return io.EOF } func NewParquetSource(r parquet.ReaderAtSeeker) (*ParquetSource, error) { @@ -45,7 +43,7 @@ func NewParquetSource(r parquet.ReaderAtSeeker) (*ParquetSource, error) { arrowReader, err := pqarrow.NewFileReader( filerReader, - pqarrow.ArrowReadProperties{Parallel: true, BatchSize: int64(Config.Batch.Size)}, // TODO: Read in from config for this stuff + pqarrow.ArrowReadProperties{Parallel: true, BatchSize: int64(Config.Batch.Size)}, allocator, ) if err != nil { @@ -84,7 +82,7 @@ func NewParquetSourcePushDown(r parquet.ReaderAtSeeker, columns []string) (*Parq arrowReader, err := pqarrow.NewFileReader( filerReader, - pqarrow.ArrowReadProperties{Parallel: true, BatchSize: int64(Config.Batch.Size)}, // TODO: Read in from config for this stuff + pqarrow.ArrowReadProperties{Parallel: true, BatchSize: int64(Config.Batch.Size)}, allocator, ) if err != nil { diff --git a/src/Backend/opti-sql-go/operators/project/projectExec.go b/src/Backend/opti-sql-go/operators/project/projectExec.go index 3df1fee..033a58c 100644 --- a/src/Backend/opti-sql-go/operators/project/projectExec.go +++ b/src/Backend/opti-sql-go/operators/project/projectExec.go @@ -20,7 +20,7 @@ var ( ) type ProjectExec struct { - child operators.Operator + input operators.Operator outputschema arrow.Schema expr []Expr.Expression done bool @@ -60,7 +60,7 @@ func NewProjectExec(input operators.Operator, exprs []Expr.Expression) (*Project outputschema := arrow.NewSchema(fields, nil) // return new exec return &ProjectExec{ - child: input, + input: input, outputschema: *outputschema, expr: exprs, }, nil @@ -73,7 +73,7 @@ func (p *ProjectExec) Next(n uint16) (*operators.RecordBatch, error) { return nil, io.EOF } - childBatch, err := p.child.Next(n) + childBatch, err := p.input.Next(n) if err != nil { return nil, err } @@ -102,7 +102,7 @@ func (p *ProjectExec) Next(n uint16) (*operators.RecordBatch, error) { }, nil } func (p *ProjectExec) Close() error { - return p.child.Close() + return p.input.Close() } func (p *ProjectExec) Schema() *arrow.Schema { return &p.outputschema From eb30dec9a8f31ddbddb968fcfcaa93300be2dfaa Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Thu, 27 Nov 2025 15:01:03 -0500 Subject: [PATCH 10/21] feat:Full-Sort operator returns results in batches| TODO:Top K sort operator --- src/Backend/opti-sql-go/go.mod | 3 +- src/Backend/opti-sql-go/go.sum | 2 + .../opti-sql-go/operators/aggr/sort.go | 153 +++--- .../opti-sql-go/operators/aggr/sort_test.go | 490 +++++++++++++++++- 4 files changed, 585 insertions(+), 63 deletions(-) diff --git a/src/Backend/opti-sql-go/go.mod b/src/Backend/opti-sql-go/go.mod index c9ee239..5b872b6 100644 --- a/src/Backend/opti-sql-go/go.mod +++ b/src/Backend/opti-sql-go/go.mod @@ -1,6 +1,6 @@ module opti-sql-go -go 1.23 +go 1.24.0 require ( github.com/apache/arrow/go/v15 v15.0.2 @@ -28,6 +28,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.13 // indirect github.com/aws/smithy-go v1.23.2 // indirect github.com/go-ini/ini v1.67.0 // indirect + github.com/go-jose/go-jose/v4 v4.1.3 // indirect github.com/goccy/go-json v0.10.3 // indirect github.com/golang/snappy v0.0.4 // indirect github.com/google/flatbuffers v24.3.25+incompatible // indirect diff --git a/src/Backend/opti-sql-go/go.sum b/src/Backend/opti-sql-go/go.sum index 9c4220d..7c4ee5c 100644 --- a/src/Backend/opti-sql-go/go.sum +++ b/src/Backend/opti-sql-go/go.sum @@ -37,6 +37,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A= github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= +github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs= +github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= diff --git a/src/Backend/opti-sql-go/operators/aggr/sort.go b/src/Backend/opti-sql-go/operators/aggr/sort.go index 11ab431..60d0cb5 100644 --- a/src/Backend/opti-sql-go/operators/aggr/sort.go +++ b/src/Backend/opti-sql-go/operators/aggr/sort.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "io" + "math" "opti-sql-go/Expr" "opti-sql-go/operators" "sort" @@ -53,8 +54,13 @@ func CombineSortKeys(sk ...*SortKey) []SortKey { type SortExec struct { child operators.Operator schema *arrow.Schema - done bool sortKeys []SortKey // resolves to columns + // internal book keeping + totalColumns []arrow.Array + consumedOffset uint64 + totalRows uint64 + consumed bool // did we finish reading all of the child record batches? + done bool // have we already produced all the sorted record batches? } func NewSortExec(child operators.Operator, sortKeys []SortKey) (*SortExec, error) { @@ -67,59 +73,80 @@ func NewSortExec(child operators.Operator, sortKeys []SortKey) (*SortExec, error } // for now read everything into memory and sort -- next steps will be to do external merge + +// n is the number of records we will return,sortExec will read in 2^16-1 column entries from its child, this is more efficient that trusting the caller to pass in a reasonable +// n so that we avoid small/frequent IO operations func (s *SortExec) Next(n uint16) (*operators.RecordBatch, error) { if s.done { return nil, io.EOF } - allColumns := make([]arrow.Array, len(s.schema.Fields())) // concated columns - mem := memory.NewGoAllocator() - fmt.Printf("all columns init %v\n", allColumns) - var count uint64 - for { - childBatch, err := s.child.Next(n) - if err != nil { - if errors.Is(err, io.EOF) { - break + if !s.consumed { + allColumns := make([]arrow.Array, len(s.schema.Fields())) // concated columns + mem := memory.NewGoAllocator() + var count uint64 + for { + childBatch, err := s.child.Next(math.MaxUint16) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, err + } + for i := range childBatch.Columns { + if allColumns[i] == nil { + allColumns[i] = childBatch.Columns[i] + continue + } + largerArray, err := array.Concatenate([]arrow.Array{allColumns[i], childBatch.Columns[i]}, mem) + if err != nil { + return nil, err + } + allColumns[i] = largerArray } + } + s.consumed = true + if len(allColumns) > 0 { + count = uint64(allColumns[0].Len()) + } + idx, err := sortBatches(&operators.RecordBatch{ + Schema: s.schema, + Columns: allColumns, + RowCount: count, + }, s.sortKeys) + if err != nil { return nil, err } - for i := range childBatch.Columns { - if allColumns[i] == nil { - allColumns[i] = childBatch.Columns[i] - continue - } - largerArray, err := concatarr(allColumns[i], childBatch.Columns[i], mem) + // now update all mappings + for i := range len(allColumns) { + arr, err := compute.TakeArray(context.TODO(), allColumns[i], idxToArrowArray(idx, mem)) if err != nil { return nil, err } - allColumns[i] = largerArray + allColumns[i] = arr } + s.totalColumns = allColumns + s.totalRows = count } - if len(allColumns) > 0 { - count = uint64(allColumns[0].Len()) + var readSize uint64 + remaining := s.totalRows - s.consumedOffset + if remaining < uint64(n) { + // if n is more than we have left just read up to remaining + readSize = uint64(remaining) + s.done = true + } else { + // remaining > n or remaining = n then just read n and return + readSize = uint64(n) } - idx := sortBatches(&operators.RecordBatch{ - Schema: s.schema, - Columns: allColumns, - RowCount: count, - }, s.sortKeys) - // now update all mappings - for i := range len(allColumns) { - tmpDatum, err := compute.Take(context.TODO(), *compute.DefaultTakeOptions(), compute.NewDatum(allColumns[i]), compute.NewDatum(toDatumFormat(idx, mem))) - if err != nil { - return nil, err - } - array, ok := tmpDatum.(*compute.ArrayDatum) - if !ok { - return nil, fmt.Errorf("non datum was returned from take") - } - allColumns[i] = array.MakeArray() + mem := memory.NewGoAllocator() + sortedColumns, err := s.consumeSortedBatch(readSize, mem) + if err != nil { + return nil, err } - // TOOD: break this uo into N chunks + return &operators.RecordBatch{ Schema: s.schema, - Columns: allColumns, - RowCount: count, + Columns: sortedColumns, + RowCount: readSize, }, nil } func (s *SortExec) Schema() *arrow.Schema { @@ -128,6 +155,22 @@ func (s *SortExec) Schema() *arrow.Schema { func (s *SortExec) Close() error { return s.child.Close() } +func (s *SortExec) consumeSortedBatch(readsize uint64, mem memory.Allocator) ([]arrow.Array, error) { + ctx := context.TODO() + resultColumns := make([]arrow.Array, len(s.schema.Fields())) + offsetArray := genoffsetTakeIdx(s.consumedOffset, readsize, mem) + for i := range s.totalColumns { + sortArr := s.totalColumns[i] + arr, err := compute.TakeArray(ctx, sortArr, offsetArray) + if err != nil { + return nil, err + } + resultColumns[i] = arr + + } + s.consumedOffset += readsize + return resultColumns, nil +} /* only sort and keep the top k elements in memory @@ -167,35 +210,21 @@ func (t *TopKSortExec) Close() error { /* shared functions */ -func sortBatches(fullRC *operators.RecordBatch, sortKeys []SortKey) []uint64 { +func sortBatches(fullRC *operators.RecordBatch, sortKeys []SortKey) ([]uint64, error) { keyColumns := make([]arrow.Array, len(sortKeys)) for i, sk := range sortKeys { arr, err := Expr.EvalExpression(sk.Expr, fullRC) if err != nil { - panic(fmt.Sprintf("sort batches: failed to eval sort expression: %v", err)) + return nil, fmt.Errorf("sort batches: failed to eval sort expression: %v", err) } keyColumns[i] = arr } - fmt.Printf("columns\n") - for i, k := range keyColumns { - fmt.Printf("%d:%v\n", i, k) - } idVector := make([]uint64, fullRC.RowCount) for i := 0; uint64(i) < fullRC.RowCount; i++ { idVector[i] = uint64(i) } sortIndexVector(idVector, keyColumns, sortKeys) - fmt.Printf("old Id Vec:%v\n", idVector) - fmt.Printf("new ID vec: %v\n", idVector) - return idVector -} -func toRC() []arrow.Array { - return nil -} - -func concatarr(a arrow.Array, b arrow.Array, mem memory.Allocator) (arrow.Array, error) { - return array.Concatenate([]arrow.Array{a, b}, mem) - + return idVector, nil } // sortIndexVector sorts idVec based on keyColumns + sortKeys. @@ -328,14 +357,20 @@ func compareFloat[T float32 | float64](a, b T) int { return 0 } } -func toDatumFormat(v []uint64, mem memory.Allocator) compute.Datum { +func idxToArrowArray(v []uint64, mem memory.Allocator) arrow.Array { // turn to array first b := array.NewUint64Builder(mem) - defer b.Release() for _, val := range v { b.Append(val) } arr := b.NewArray() - defer arr.Release() - return compute.NewDatum(arr) + return arr +} +func genoffsetTakeIdx(offset, size uint64, mem memory.Allocator) arrow.Array { + b := array.NewUint64Builder(mem) + for i := range size { + b.Append(offset + i) + } + arr := b.NewArray() + return arr } diff --git a/src/Backend/opti-sql-go/operators/aggr/sort_test.go b/src/Backend/opti-sql-go/operators/aggr/sort_test.go index 9ae02ab..95754c8 100644 --- a/src/Backend/opti-sql-go/operators/aggr/sort_test.go +++ b/src/Backend/opti-sql-go/operators/aggr/sort_test.go @@ -2,14 +2,63 @@ package aggr import ( "context" + "errors" "fmt" "io" "opti-sql-go/Expr" + "opti-sql-go/operators" + "opti-sql-go/operators/project" "testing" + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" "github.com/apache/arrow/go/v17/arrow/compute" + "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/go-jose/go-jose/v4/testutils/require" ) +func buildAggTestRecordBatch(t *testing.T) *operators.RecordBatch { + names, cols := generateAggTestColumns() + mem := memory.NewGoAllocator() + + arrowCols := make([]arrow.Array, len(cols)) + fields := make([]arrow.Field, len(cols)) + + for i, col := range cols { + switch v := col.(type) { + + case []int32: + b := array.NewInt32Builder(mem) + defer b.Release() + b.AppendValues(v, nil) + arrowCols[i] = b.NewArray() + + case []string: + b := array.NewStringBuilder(mem) + defer b.Release() + b.AppendValues(v, nil) + arrowCols[i] = b.NewArray() + + case []float64: + b := array.NewFloat64Builder(mem) + defer b.Release() + b.AppendValues(v, nil) + arrowCols[i] = b.NewArray() + + default: + t.Fatalf("unsupported type in generateAggTestColumns") + } + + fields[i] = arrow.Field{Name: names[i], Type: arrowCols[i].DataType()} + } + + return &operators.RecordBatch{ + Schema: arrow.NewSchema(fields, nil), + Columns: arrowCols, + RowCount: uint64(len(cols[0].([]int32))), + } +} + func TestSortInit(t *testing.T) { // Simple passing test t.Run("sort Exec init", func(t *testing.T) { @@ -30,6 +79,14 @@ func TestSortInit(t *testing.T) { t.Fatalf("expected nil error on close but got %v", sortExec.Close()) } + }) + t.Run("SortKey options", func(t *testing.T) { + proj := aggProject() + _, err := NewSortExec(proj, []SortKey{*NewSortKey(col("-"), false, false)}) + if err != nil { + t.Fatal(err) + } + }) t.Run("tok k sort exec init", func(t *testing.T) { proj := aggProject() @@ -79,14 +136,441 @@ func TestBasicSortExpr(t *testing.T) { if err != nil { t.Fatalf("unexpected error from NewSortExec : %v\n", err) } - sortedBatch, err := sortExec.Next(10) + for { + sortedBatch, err := sortExec.Next(5) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + t.Fatalf("unexpected error from sortExec Next : %v\n", err) + } + fmt.Println(sortedBatch.PrettyPrint()) + } + }) +} +func TestFullSortOverNetwork(t *testing.T) { + t.Run("Full Sort of large file", func(t *testing.T) { + const fileName = "country_full.csv" + nr, err := project.NewStreamReader(fileName) + if err != nil { + t.Fatalf("failed to create s3 object: %v", err) + } + pj, err := project.NewProjectCSVLeaf(nr.Stream()) if err != nil { - t.Fatalf("unexpected error from sortExec Next : %v\n", err) + t.Fatalf("failed to create csv project source from s3 object: %v", err) + } + nameExpr := Expr.NewColumnResolve("name") + nameSK := NewSortKey(nameExpr, true) + sortExec, err := NewSortExec(pj, CombineSortKeys(nameSK)) + if err != nil { + t.Fatalf("unexpected error %v\n", err) + } + rc, err := sortExec.Next(10) + if err != nil { + t.Fatalf("unexpected error %v\n", err) + } + fmt.Println(rc.PrettyPrint()) + + }) + +} + +func TestFullSortExec_Next(t *testing.T) { + t.Parallel() + + t.Run("sort_age_DESC", func(t *testing.T) { + proj := aggProject() + + ageExpr := Expr.NewColumnResolve("age") + ageSK := NewSortKey(ageExpr, false) // DESC + + sortExec, err := NewSortExec(proj, CombineSortKeys(ageSK)) + require.NoError(t, err) + + batch, err := sortExec.Next(5) + require.NoError(t, err) + require.Equal(t, uint64(5), batch.RowCount) + + ages := batch.Columns[2].(*array.Int32) + got := []int32{ + ages.Value(0), + ages.Value(1), + ages.Value(2), + ages.Value(3), + ages.Value(4), + } + + expected := []int32{50, 48, 46, 45, 43} + for i, v := range expected { + if got[i] != v { + t.Fatalf("expected %v at index %d, but got %v", v, i, got[i]) + } + } + }) + + t.Run("sort_name_ASC", func(t *testing.T) { + proj := aggProject() + + nameExpr := Expr.NewColumnResolve("name") + nameSK := NewSortKey(nameExpr, true) + + sortExec, err := NewSortExec(proj, CombineSortKeys(nameSK)) + require.NoError(t, err) + + batch, err := sortExec.Next(3) + require.NoError(t, err) + + names := batch.Columns[1].(*array.String) + got := []string{ + names.Value(0), + names.Value(1), + names.Value(2), + } + + expected := []string{"Alice", "Bob", "Charlie"} + for i, v := range expected { + if got[i] != v { + t.Fatalf("expected %v at index %d, but got %v", v, i, got[i]) + } + } + }) +} + +// ----------------------------------------------------------------------------- +// TEST 2: sortIndexVector() +// ----------------------------------------------------------------------------- + +func TestSortIndexVector(t *testing.T) { + t.Parallel() + + mem := memory.NewGoAllocator() + + t.Run("single_key_int", func(t *testing.T) { + b := array.NewInt32Builder(mem) + b.AppendValues([]int32{30, 10, 20}, nil) + arr := b.NewArray() + defer arr.Release() + + keys := []arrow.Array{arr} + idVec := []uint64{0, 1, 2} + + sks := []SortKey{ + {Expr: nil, Ascending: true}, + } + + sortIndexVector(idVec, keys, sks) + + expected := []uint64{1, 2, 0} + for i, v := range expected { + if idVec[i] != v { + t.Fatalf("expected %v at index %d, but got %v", v, i, idVec[i]) + } + } + }) + + t.Run("single_key_string", func(t *testing.T) { + b := array.NewStringBuilder(mem) + b.AppendValues([]string{"Charlie", "Alice", "Bob"}, nil) + arr := b.NewArray() + defer arr.Release() + + keys := []arrow.Array{arr} + idVec := []uint64{0, 1, 2} + + sks := []SortKey{{Ascending: true}} + + sortIndexVector(idVec, keys, sks) + + expected := []uint64{1, 2, 0} + for i, v := range expected { + if idVec[i] != v { + t.Fatalf("expected %v at index %d, but got %v", v, i, idVec[i]) + } } - fmt.Println(sortedBatch.PrettyPrint()) + }) +} + +// ----------------------------------------------------------------------------- +// TEST 3: compareArrowValues() +// ----------------------------------------------------------------------------- + +func TestCompareArrowValues(t *testing.T) { + t.Parallel() + + mem := memory.NewGoAllocator() + t.Run("int", func(t *testing.T) { + b := array.NewInt32Builder(mem) + b.AppendValues([]int32{10, 20}, nil) + arr := b.NewArray() + defer arr.Release() + + require.Equal(t, -1, compareArrowValues(arr, 0, 1)) + require.Equal(t, 1, compareArrowValues(arr, 1, 0)) + require.Equal(t, 0, compareArrowValues(arr, 0, 0)) }) + + t.Run("uint", func(t *testing.T) { + b := array.NewUint32Builder(mem) + b.AppendValues([]uint32{5, 7}, nil) + arr := b.NewArray() + defer arr.Release() + + require.Equal(t, -1, compareArrowValues(arr, 0, 1)) + require.Equal(t, 1, compareArrowValues(arr, 1, 0)) + }) + + t.Run("float", func(t *testing.T) { + b := array.NewFloat64Builder(mem) + b.AppendValues([]float64{1.5, 1.7}, nil) + arr := b.NewArray() + defer arr.Release() + + require.Equal(t, -1, compareArrowValues(arr, 0, 1)) + require.Equal(t, 1, compareArrowValues(arr, 1, 0)) + }) + + t.Run("string", func(t *testing.T) { + b := array.NewStringBuilder(mem) + b.AppendValues([]string{"a", "b"}, nil) + arr := b.NewArray() + defer arr.Release() + + require.Equal(t, -1, compareArrowValues(arr, 0, 1)) + require.Equal(t, 1, compareArrowValues(arr, 1, 0)) + }) + + t.Run("bool", func(t *testing.T) { + b := array.NewBooleanBuilder(mem) + b.AppendValues([]bool{false, true}, nil) + arr := b.NewArray() + defer arr.Release() + + require.Equal(t, -1, compareArrowValues(arr, 0, 1)) + require.Equal(t, 1, compareArrowValues(arr, 1, 0)) + }) +} +func TestCompareArrowValues_AllTypes(t *testing.T) { + mem := memory.NewGoAllocator() + + // helper to assert cmp result + assert := func(name string, got, want int) { + if got != want { + t.Fatalf("%s: expected %d, got %d", name, want, got) + } + } + + // ---- STRING ---- + strB := array.NewStringBuilder(mem) + strB.Append("apple") + strB.Append("banana") + strArr := strB.NewArray().(*array.String) + + assert("string lt", compareArrowValues(strArr, 0, 1), -1) + assert("string gt", compareArrowValues(strArr, 1, 0), 1) + assert("string eq", compareArrowValues(strArr, 0, 0), 0) + + strArr.Release() + strB.Release() + + // ---- INT TYPES ---- + int8Arr := buildInt8(mem, []int8{1, 3}) + assert("int8 lt", compareArrowValues(int8Arr, 0, 1), -1) + assert("int8 gt", compareArrowValues(int8Arr, 1, 0), 1) + assert("int8 eq", compareArrowValues(int8Arr, 0, 0), 0) + int8Arr.Release() + + int16Arr := buildInt16(mem, []int16{5, 2}) + assert("int16 gt", compareArrowValues(int16Arr, 0, 1), 1) + int16Arr.Release() + + int32Arr := buildInt32(mem, []int32{10, 10}) + assert("int32 eq", compareArrowValues(int32Arr, 0, 1), 0) + int32Arr.Release() + + int64Arr := buildInt64(mem, []int64{-5, 7}) + assert("int64 lt", compareArrowValues(int64Arr, 0, 1), -1) + int64Arr.Release() + + // ---- UINT TYPES ---- + u8Arr := buildUint8(mem, []uint8{9, 3}) + assert("uint8 gt", compareArrowValues(u8Arr, 0, 1), 1) + u8Arr.Release() + + u16Arr := buildUint16(mem, []uint16{3, 3}) + assert("uint16 eq", compareArrowValues(u16Arr, 0, 1), 0) + u16Arr.Release() + + u32Arr := buildUint32(mem, []uint32{3, 10}) + assert("uint32 lt", compareArrowValues(u32Arr, 0, 1), -1) + u32Arr.Release() + + u64Arr := buildUint64(mem, []uint64{100, 2}) + assert("uint64 gt", compareArrowValues(u64Arr, 0, 1), 1) + u64Arr.Release() + + // ---- FLOAT TYPES ---- + f32Arr := buildFloat32(mem, []float32{1.5, 1.5}) + assert("float32 eq", compareArrowValues(f32Arr, 0, 1), 0) + f32Arr.Release() + + f64Arr := buildFloat64(mem, []float64{-1.0, 2.3}) + assert("float64 lt", compareArrowValues(f64Arr, 0, 1), -1) + f64Arr.Release() + + // ---- BOOLEAN ---- + boolArr := buildBool(mem, []bool{false, true}) + assert("bool lt", compareArrowValues(boolArr, 0, 1), -1) + assert("bool gt", compareArrowValues(boolArr, 1, 0), 1) + assert("bool eq", compareArrowValues(boolArr, 1, 1), 0) + boolArr.Release() + + // ---- NULL CASES ---- + nullB := array.NewInt32Builder(mem) + nullB.AppendNull() + nullB.Append(10) + nullArr := nullB.NewArray().(*array.Int32) + + assert("null < value", compareArrowValues(nullArr, 0, 1), -1) + assert("value > null", compareArrowValues(nullArr, 1, 0), 1) + assert("null == null", compareArrowValues(nullArr, 0, 0), 0) + + nullArr.Release() + nullB.Release() + + // ---- UNSUPPORTED TYPE PANIC ---- + // Build a fixed-size binary array to trigger panic + fsb := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 2}) + fsb.Append([]byte{1, 2}) + fsb.Append([]byte{3, 4}) + fsArr := fsb.NewArray() + + didPanic := false + func() { + defer func() { + if recover() != nil { + didPanic = true + } + }() + _ = compareArrowValues(fsArr, 0, 1) + }() + if !didPanic { + t.Fatalf("expected panic for unsupported Arrow type") + } + + fsArr.Release() + fsb.Release() +} +func buildInt8(mem memory.Allocator, vals []int8) *array.Int8 { + b := array.NewInt8Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Int8) + b.Release() + return arr +} + +func buildInt16(mem memory.Allocator, vals []int16) *array.Int16 { + b := array.NewInt16Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Int16) + b.Release() + return arr +} + +func buildInt32(mem memory.Allocator, vals []int32) *array.Int32 { + b := array.NewInt32Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Int32) + b.Release() + return arr +} + +func buildInt64(mem memory.Allocator, vals []int64) *array.Int64 { + b := array.NewInt64Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Int64) + b.Release() + return arr +} + +func buildUint8(mem memory.Allocator, vals []uint8) *array.Uint8 { + b := array.NewUint8Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Uint8) + b.Release() + return arr } + +func buildUint16(mem memory.Allocator, vals []uint16) *array.Uint16 { + b := array.NewUint16Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Uint16) + b.Release() + return arr +} + +func buildUint32(mem memory.Allocator, vals []uint32) *array.Uint32 { + b := array.NewUint32Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Uint32) + b.Release() + return arr +} + +func buildUint64(mem memory.Allocator, vals []uint64) *array.Uint64 { + b := array.NewUint64Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Uint64) + b.Release() + return arr +} + +func buildFloat32(mem memory.Allocator, vals []float32) *array.Float32 { + b := array.NewFloat32Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Float32) + b.Release() + return arr +} + +func buildFloat64(mem memory.Allocator, vals []float64) *array.Float64 { + b := array.NewFloat64Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Float64) + b.Release() + return arr +} + +func buildBool(mem memory.Allocator, vals []bool) *array.Boolean { + b := array.NewBooleanBuilder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Boolean) + b.Release() + return arr +} + func TestBasicTopKSortExpr(t *testing.T) { t.Run("TopK Sort", func(t *testing.T) { proj := aggProject() From 72a6673703036f6e5c9a9c70376bc9f9fce650c6 Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Sat, 29 Nov 2025 23:04:36 -0500 Subject: [PATCH 11/21] feat:Implement TopK sort operator --- .../opti-sql-go/operators/aggr/sort.go | 380 +++++++++++++++++- .../opti-sql-go/operators/aggr/sort_test.go | 151 +++---- .../opti-sql-go/operators/filter/limit.go | 10 + 3 files changed, 465 insertions(+), 76 deletions(-) diff --git a/src/Backend/opti-sql-go/operators/aggr/sort.go b/src/Backend/opti-sql-go/operators/aggr/sort.go index 60d0cb5..aa78ca9 100644 --- a/src/Backend/opti-sql-go/operators/aggr/sort.go +++ b/src/Backend/opti-sql-go/operators/aggr/sort.go @@ -64,7 +64,6 @@ type SortExec struct { } func NewSortExec(child operators.Operator, sortKeys []SortKey) (*SortExec, error) { - fmt.Printf("sorts Keys %v\n", sortKeys) return &SortExec{ child: child, schema: child.Schema(), @@ -178,18 +177,28 @@ only sort and keep the top k elements in memory type TopKSortExec struct { child operators.Operator schema *arrow.Schema - done bool sortKeys []SortKey // resolves to columns k uint16 // top k + // internal book keeping + sortedColumns []arrow.Array + heap []heapRow // at any one point this will only hold k elements + totalRows uint64 + consumedOffset uint64 + consumed bool // did we finish reading all of the child record batches? + done bool } func NewTopKSortExec(child operators.Operator, sortKeys []SortKey, k uint16) (*TopKSortExec, error) { - fmt.Printf("sort keys %v\n", sortKeys) + fmt.Printf("k:%v\n", k) + size := len(child.Schema().Fields()) return &TopKSortExec{ child: child, schema: child.Schema(), sortKeys: sortKeys, k: k, + /// + sortedColumns: make([]arrow.Array, size), + heap: make([]heapRow, 0, k), }, nil } @@ -198,7 +207,48 @@ func (t *TopKSortExec) Next(n uint16) (*operators.RecordBatch, error) { if t.done { return nil, io.EOF } - return nil, nil + mem := memory.NewGoAllocator() + if !t.consumed { + for { + childBatch, err := t.child.Next(math.MaxUint16) + if err != nil { + if errors.Is(err, io.EOF) { + t.consumed = true + if len(t.sortedColumns) != 0 { + t.totalRows = uint64(t.sortedColumns[0].Len()) + } + break + } + return nil, err + } + // after the update, run take, and then update the sorted columns we store internally + // handle input validation here + err = t.UpdateTopKSorted(childBatch, t.sortKeys, mem) + if err != nil { + return nil, err + } + } + } + var readSize uint64 + remaining := t.totalRows - t.consumedOffset + if remaining < uint64(n) { + // if n is more than we have left just read up to remaining + readSize = uint64(remaining) + t.done = true + } else { + // remaining > n or remaining = n then just read n and return + readSize = uint64(n) + } + sortedArr, err := t.consumeSortedBatch(readSize, memory.NewGoAllocator()) + if err != nil { + return nil, err + } + return &operators.RecordBatch{ + Schema: t.schema, + Columns: sortedArr, + RowCount: readSize, + }, nil + } func (t *TopKSortExec) Schema() *arrow.Schema { return t.schema @@ -207,6 +257,107 @@ func (t *TopKSortExec) Close() error { return t.child.Close() } +type heapRow struct { + rowIdx uint64 + keys []interface{} // colummns +} + +/* +evaluate key cols +then iterate through all of the key columns and generate their key represenation +*/ +func (t *TopKSortExec) UpdateTopKSorted(newBatch *operators.RecordBatch, sortKeys []SortKey, mem memory.Allocator) error { + // 1. Evaluate key columns + keyCols := make([]arrow.Array, len(sortKeys)) + for i, sk := range sortKeys { + arr, err := Expr.EvalExpression(sk.Expr, newBatch) + if err != nil { + panic(err) + } + keyCols[i] = arr + } + allColumns, err := joinArrays(newBatch.Columns, t.sortedColumns, mem) + if err != nil { + return err + } + + rowCount := int(allColumns[0].Len()) + tmpBuff := make([]heapRow, 0, rowCount) + for i := 0; i < rowCount; i++ { + keys := make([]interface{}, len(sortKeys)) + for k, col := range keyCols { + keys[k] = extractValue(col, i) + } + row := heapRow{ + rowIdx: uint64(i), + keys: keys, + } + tmpBuff = append(tmpBuff, row) + + } + sortBySortKeys(tmpBuff, sortKeys) + fmt.Printf("buff: %v\nTopK:%v\n", tmpBuff, t.k) + tk := min(int(t.k), len(tmpBuff)) // in case k > len(tmpBuff) + topK := tmpBuff[:tk] + var idxArr []uint64 + for i := range topK { + idxArr = append(idxArr, topK[i].rowIdx) + } + takeArray := idxToArrowArray(idxArr, mem) + count := newBatch.Schema.NumFields() + for i := range count { + sc, err := compute.TakeArray(context.Background(), allColumns[i], takeArray) + if err != nil { + return err + } + t.sortedColumns[i] = sc + } + return nil +} + +func joinArrays(existing, newarrs []arrow.Array, mem memory.Allocator) ([]arrow.Array, error) { + if len(existing) == 0 { + return newarrs, nil + } + if len(newarrs) == 0 { + return existing, nil + } + result := make([]arrow.Array, len(existing)) + for i := range existing { + v1, v2 := existing[i], newarrs[i] + if v1 == nil { + result[i] = v2 + continue + } else if v2 == nil { + result[i] = v1 + continue + } + combined, err := array.Concatenate([]arrow.Array{v1, v2}, mem) + if err != nil { + return nil, err + } + result[i] = combined + } + return result, nil +} + +func (t *TopKSortExec) consumeSortedBatch(readsize uint64, mem memory.Allocator) ([]arrow.Array, error) { + ctx := context.TODO() + resultColumns := make([]arrow.Array, len(t.schema.Fields())) + offsetArray := genoffsetTakeIdx(t.consumedOffset, readsize, mem) + for i := range t.sortedColumns { + sortArr := t.sortedColumns[i] + arr, err := compute.TakeArray(ctx, sortArr, offsetArray) + if err != nil { + return nil, err + } + resultColumns[i] = arr + + } + t.consumedOffset += readsize + return resultColumns, nil +} + /* shared functions */ @@ -357,6 +508,227 @@ func compareFloat[T float32 | float64](a, b T) int { return 0 } } +func extractValue(col arrow.Array, idx int) interface{} { + switch arr := col.(type) { + + case *array.String: + return arr.Value(idx) + + case *array.Int8: + return int64(arr.Value(idx)) + case *array.Int16: + return int64(arr.Value(idx)) + case *array.Int32: + return int64(arr.Value(idx)) + case *array.Int64: + return arr.Value(idx) + + case *array.Uint8: + return uint64(arr.Value(idx)) + case *array.Uint16: + return uint64(arr.Value(idx)) + case *array.Uint32: + return uint64(arr.Value(idx)) + case *array.Uint64: + return arr.Value(idx) + + case *array.Float32: + return float64(arr.Value(idx)) + case *array.Float64: + return arr.Value(idx) + + case *array.Boolean: + return arr.Value(idx) + + default: + panic("unsupported type") + } +} + +func sortBySortKeys(rows []heapRow, sortKeys []SortKey) { + sort.Slice(rows, func(i, j int) bool { + ri := rows[i] + rj := rows[j] + + for k, sk := range sortKeys { + cmp := comparePrimitive(ri.keys[k], rj.keys[k]) + + if cmp == 0 { + continue // move to next key + } + + if sk.Ascending { + return cmp < 0 + } else { + return cmp > 0 + } + } + + return false + }) +} + +func comparePrimitive(a, b any) int { + switch va := a.(type) { + + case int: + vb := b.(int) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case int8: + vb := b.(int8) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case int16: + vb := b.(int16) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case int32: + vb := b.(int32) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case int64: + vb := b.(int64) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case uint: + vb := b.(uint) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case uint8: + vb := b.(uint8) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case uint16: + vb := b.(uint16) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case uint32: + vb := b.(uint32) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case uint64: + vb := b.(uint64) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case float32: + vb := b.(float32) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case float64: + vb := b.(float64) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case string: + vb := b.(string) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case bool: + vb := b.(bool) + if va == vb { + return 0 + } + if !va && vb { + return -1 + } + return 1 + + default: + panic("unsupported primitive type") + } +} + func idxToArrowArray(v []uint64, mem memory.Allocator) arrow.Array { // turn to array first b := array.NewUint64Builder(mem) diff --git a/src/Backend/opti-sql-go/operators/aggr/sort_test.go b/src/Backend/opti-sql-go/operators/aggr/sort_test.go index 95754c8..7e77e4f 100644 --- a/src/Backend/opti-sql-go/operators/aggr/sort_test.go +++ b/src/Backend/opti-sql-go/operators/aggr/sort_test.go @@ -1,64 +1,19 @@ package aggr import ( - "context" "errors" "fmt" "io" "opti-sql-go/Expr" - "opti-sql-go/operators" "opti-sql-go/operators/project" "testing" "github.com/apache/arrow/go/v17/arrow" "github.com/apache/arrow/go/v17/arrow/array" - "github.com/apache/arrow/go/v17/arrow/compute" "github.com/apache/arrow/go/v17/arrow/memory" "github.com/go-jose/go-jose/v4/testutils/require" ) -func buildAggTestRecordBatch(t *testing.T) *operators.RecordBatch { - names, cols := generateAggTestColumns() - mem := memory.NewGoAllocator() - - arrowCols := make([]arrow.Array, len(cols)) - fields := make([]arrow.Field, len(cols)) - - for i, col := range cols { - switch v := col.(type) { - - case []int32: - b := array.NewInt32Builder(mem) - defer b.Release() - b.AppendValues(v, nil) - arrowCols[i] = b.NewArray() - - case []string: - b := array.NewStringBuilder(mem) - defer b.Release() - b.AppendValues(v, nil) - arrowCols[i] = b.NewArray() - - case []float64: - b := array.NewFloat64Builder(mem) - defer b.Release() - b.AppendValues(v, nil) - arrowCols[i] = b.NewArray() - - default: - t.Fatalf("unsupported type in generateAggTestColumns") - } - - fields[i] = arrow.Field{Name: names[i], Type: arrowCols[i].DataType()} - } - - return &operators.RecordBatch{ - Schema: arrow.NewSchema(fields, nil), - Columns: arrowCols, - RowCount: uint64(len(cols[0].([]int32))), - } -} - func TestSortInit(t *testing.T) { // Simple passing test t.Run("sort Exec init", func(t *testing.T) { @@ -461,6 +416,8 @@ func TestCompareArrowValues_AllTypes(t *testing.T) { fsArr.Release() fsb.Release() } + +// Top-K sort tests kept simple and grouped into two test functions func buildInt8(mem memory.Allocator, vals []int8) *array.Int8 { b := array.NewInt8Builder(mem) for _, v := range vals { @@ -571,43 +528,93 @@ func buildBool(mem memory.Allocator, vals []bool) *array.Boolean { return arr } -func TestBasicTopKSortExpr(t *testing.T) { - t.Run("TopK Sort", func(t *testing.T) { +// Consolidated TopK tests: two functions with multiple subtests, placed at file bottom. +func TestTopKSort_BasicAndValues(t *testing.T) { + t.Run("AgeDesc_Top5", func(t *testing.T) { proj := aggProject() - nameExpr := Expr.NewColumnResolve("name") - nameSK := NewSortKey(nameExpr, true) ageExpr := Expr.NewColumnResolve("age") ageSK := NewSortKey(ageExpr, false) - sortExec, err := NewTopKSortExec(proj, CombineSortKeys(nameSK, ageSK), 5) + + sortExec, err := NewTopKSortExec(proj, CombineSortKeys(ageSK), 5) if err != nil { - t.Fatalf("unexpected error from NewTopKSortExec : %v\n", err) + t.Fatalf("NewTopKSortExec error: %v", err) + } + rb, err := sortExec.Next(5) + if err != nil { + t.Fatalf("Next failed: %v", err) + } + if rb.RowCount != 5 { + t.Fatalf("expected 5 rows, got %d", rb.RowCount) + } + ages := rb.Columns[2].(*array.Int32) + expected := []int32{50, 48, 46, 45, 43} + for i := range expected { + if ages.Value(i) != expected[i] { + t.Fatalf("age mismatch at %d: expected %v got %v", i, expected[i], ages.Value(i)) + } } - t.Logf("%v\n", sortExec) + for _, c := range rb.Columns { + c.Release() + } + if err := sortExec.Close(); err != nil { + t.Fatalf("close error: %v", err) + } + }) + t.Run("KGreaterThanRows_ReturnsAll", func(t *testing.T) { + proj := aggProject() + ageExpr := Expr.NewColumnResolve("age") + ageSK := NewSortKey(ageExpr, false) + sortExec, err := NewTopKSortExec(proj, CombineSortKeys(ageSK), 100) + if err != nil { + t.Fatalf("NewTopKSortExec error: %v", err) + } + rb, err := sortExec.Next(1000) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("Next error: %v", err) + } + if rb.RowCount == 0 { + t.Fatalf("expected rows when K > total rows") + } + for _, c := range rb.Columns { + c.Release() + } + if err := sortExec.Close(); err != nil { + t.Fatalf("close error: %v", err) + } }) } -func TestOne(t *testing.T) { - v := compute.GetExecCtx(context.Background()) - names := v.Registry.GetFunctionNames() - for i, name := range names { - fmt.Printf("%d: %v\n", i, name) - } - /* - mem := memory.NewGoAllocator() - floatB := array.NewFloat64Builder(mem) - floatB.AppendValues([]float64{10.5, 20.3, 30.1, 40.7, 50.2}, []bool{true, true, true, true, true}) - pos := array.NewInt32Builder(mem) - pos.AppendValues([]int32{1, 3, 4}, []bool{true, true, true}) - - dat, err := compute.Take(context.TODO(), *compute.DefaultTakeOptions(), compute.NewDatum(floatB.NewArray()), compute.NewDatum(pos.NewArray())) +func TestTopKSort_CombinedAndPagination(t *testing.T) { + t.Run("CombinedKeys_Pagination_TotalMatchesK", func(t *testing.T) { + proj := aggProject() + nameExpr := Expr.NewColumnResolve("name") + nameSK := NewSortKey(nameExpr, true) + ageExpr := Expr.NewColumnResolve("age") + ageSK := NewSortKey(ageExpr, false) + sortExec, err := NewTopKSortExec(proj, CombineSortKeys(ageSK, nameSK), 7) if err != nil { - t.Fatalf("Take failed: %v", err) + t.Fatalf("NewTopKSortExec error: %v", err) } - array, ok := dat.(*compute.ArrayDatum) - if !ok { - t.Logf("expected an array to be returned but got something else %T\n", dat) + total := uint64(0) + for _, sz := range []uint16{3, 3, 3} { + rb, err := sortExec.Next(sz) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("Next error: %v", err) + } + total += rb.RowCount + for _, c := range rb.Columns { + c.Release() + } + if errors.Is(err, io.EOF) { + break + } } - t.Logf("data: %v\n", array.MakeArray()) - */ + if total != 7 { + t.Fatalf("expected total 7 rows, got %d", total) + } + if err := sortExec.Close(); err != nil { + t.Fatalf("close error: %v", err) + } + }) } diff --git a/src/Backend/opti-sql-go/operators/filter/limit.go b/src/Backend/opti-sql-go/operators/filter/limit.go index e4c93a5..6161c1b 100644 --- a/src/Backend/opti-sql-go/operators/filter/limit.go +++ b/src/Backend/opti-sql-go/operators/filter/limit.go @@ -69,3 +69,13 @@ func (l *LimitExec) Schema() *arrow.Schema { func (l *LimitExec) Close() error { return l.input.Close() } + +/* +type Distinct struct { + child operators.Operator + schema *arrow.Schema + colExpr Expr.Expression // resolves to column that we want distinct values of + seenValues map[string]struct{} // arrow.Array.value(i) (stored and compared as string , structs occupie no space + done bool +} +*/ From cb72c104cd108658e6793d00f537e1b91707996c Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Sun, 30 Nov 2025 00:37:44 -0500 Subject: [PATCH 12/21] Fix:PR comments --- .../opti-sql-go/operators/Join/nested.go | 1 - .../opti-sql-go/operators/Join/nested_test.go | 7 ---- .../opti-sql-go/operators/aggr/sort.go | 39 +++++++++++-------- .../opti-sql-go/operators/aggr/sort_test.go | 9 ++--- 4 files changed, 27 insertions(+), 29 deletions(-) delete mode 100644 src/Backend/opti-sql-go/operators/Join/nested.go delete mode 100644 src/Backend/opti-sql-go/operators/Join/nested_test.go diff --git a/src/Backend/opti-sql-go/operators/Join/nested.go b/src/Backend/opti-sql-go/operators/Join/nested.go deleted file mode 100644 index 2502e5b..0000000 --- a/src/Backend/opti-sql-go/operators/Join/nested.go +++ /dev/null @@ -1 +0,0 @@ -package join diff --git a/src/Backend/opti-sql-go/operators/Join/nested_test.go b/src/Backend/opti-sql-go/operators/Join/nested_test.go deleted file mode 100644 index 0428968..0000000 --- a/src/Backend/opti-sql-go/operators/Join/nested_test.go +++ /dev/null @@ -1,7 +0,0 @@ -package join - -import "testing" - -func TestNested(t *testing.T) { - // Simple passing test -} diff --git a/src/Backend/opti-sql-go/operators/aggr/sort.go b/src/Backend/opti-sql-go/operators/aggr/sort.go index aa78ca9..18ca64a 100644 --- a/src/Backend/opti-sql-go/operators/aggr/sort.go +++ b/src/Backend/opti-sql-go/operators/aggr/sort.go @@ -52,7 +52,7 @@ func CombineSortKeys(sk ...*SortKey) []SortKey { } type SortExec struct { - child operators.Operator + input operators.Operator schema *arrow.Schema sortKeys []SortKey // resolves to columns // internal book keeping @@ -65,7 +65,7 @@ type SortExec struct { func NewSortExec(child operators.Operator, sortKeys []SortKey) (*SortExec, error) { return &SortExec{ - child: child, + input: child, schema: child.Schema(), sortKeys: sortKeys, }, nil @@ -84,7 +84,7 @@ func (s *SortExec) Next(n uint16) (*operators.RecordBatch, error) { mem := memory.NewGoAllocator() var count uint64 for { - childBatch, err := s.child.Next(math.MaxUint16) + childBatch, err := s.input.Next(math.MaxUint16) if err != nil { if errors.Is(err, io.EOF) { break @@ -116,8 +116,10 @@ func (s *SortExec) Next(n uint16) (*operators.RecordBatch, error) { return nil, err } // now update all mappings + idxArray := idxToArrowArray(idx, mem) + defer idxArray.Release() for i := range len(allColumns) { - arr, err := compute.TakeArray(context.TODO(), allColumns[i], idxToArrowArray(idx, mem)) + arr, err := compute.TakeArray(context.TODO(), allColumns[i], idxArray) if err != nil { return nil, err } @@ -128,6 +130,9 @@ func (s *SortExec) Next(n uint16) (*operators.RecordBatch, error) { } var readSize uint64 remaining := s.totalRows - s.consumedOffset + if remaining == 0 { + return nil, io.EOF + } if remaining < uint64(n) { // if n is more than we have left just read up to remaining readSize = uint64(remaining) @@ -152,12 +157,13 @@ func (s *SortExec) Schema() *arrow.Schema { return s.schema } func (s *SortExec) Close() error { - return s.child.Close() + return s.input.Close() } func (s *SortExec) consumeSortedBatch(readsize uint64, mem memory.Allocator) ([]arrow.Array, error) { ctx := context.TODO() resultColumns := make([]arrow.Array, len(s.schema.Fields())) offsetArray := genoffsetTakeIdx(s.consumedOffset, readsize, mem) + defer offsetArray.Release() for i := range s.totalColumns { sortArr := s.totalColumns[i] arr, err := compute.TakeArray(ctx, sortArr, offsetArray) @@ -175,7 +181,7 @@ func (s *SortExec) consumeSortedBatch(readsize uint64, mem memory.Allocator) ([] only sort and keep the top k elements in memory */ type TopKSortExec struct { - child operators.Operator + input operators.Operator schema *arrow.Schema sortKeys []SortKey // resolves to columns k uint16 // top k @@ -184,15 +190,14 @@ type TopKSortExec struct { heap []heapRow // at any one point this will only hold k elements totalRows uint64 consumedOffset uint64 - consumed bool // did we finish reading all of the child record batches? + consumed bool // did we finish reading all of the input record batches? done bool } func NewTopKSortExec(child operators.Operator, sortKeys []SortKey, k uint16) (*TopKSortExec, error) { - fmt.Printf("k:%v\n", k) size := len(child.Schema().Fields()) return &TopKSortExec{ - child: child, + input: child, schema: child.Schema(), sortKeys: sortKeys, k: k, @@ -210,7 +215,7 @@ func (t *TopKSortExec) Next(n uint16) (*operators.RecordBatch, error) { mem := memory.NewGoAllocator() if !t.consumed { for { - childBatch, err := t.child.Next(math.MaxUint16) + childBatch, err := t.input.Next(math.MaxUint16) if err != nil { if errors.Is(err, io.EOF) { t.consumed = true @@ -254,12 +259,12 @@ func (t *TopKSortExec) Schema() *arrow.Schema { return t.schema } func (t *TopKSortExec) Close() error { - return t.child.Close() + return t.input.Close() } type heapRow struct { rowIdx uint64 - keys []interface{} // colummns + keys []interface{} // columns } /* @@ -272,7 +277,7 @@ func (t *TopKSortExec) UpdateTopKSorted(newBatch *operators.RecordBatch, sortKey for i, sk := range sortKeys { arr, err := Expr.EvalExpression(sk.Expr, newBatch) if err != nil { - panic(err) + return err } keyCols[i] = arr } @@ -296,7 +301,6 @@ func (t *TopKSortExec) UpdateTopKSorted(newBatch *operators.RecordBatch, sortKey } sortBySortKeys(tmpBuff, sortKeys) - fmt.Printf("buff: %v\nTopK:%v\n", tmpBuff, t.k) tk := min(int(t.k), len(tmpBuff)) // in case k > len(tmpBuff) topK := tmpBuff[:tk] var idxArr []uint64 @@ -304,6 +308,7 @@ func (t *TopKSortExec) UpdateTopKSorted(newBatch *operators.RecordBatch, sortKey idxArr = append(idxArr, topK[i].rowIdx) } takeArray := idxToArrowArray(idxArr, mem) + defer takeArray.Release() count := newBatch.Schema.NumFields() for i := range count { sc, err := compute.TakeArray(context.Background(), allColumns[i], takeArray) @@ -345,6 +350,7 @@ func (t *TopKSortExec) consumeSortedBatch(readsize uint64, mem memory.Allocator) ctx := context.TODO() resultColumns := make([]arrow.Array, len(t.schema.Fields())) offsetArray := genoffsetTakeIdx(t.consumedOffset, readsize, mem) + defer offsetArray.Release() for i := range t.sortedColumns { sortArr := t.sortedColumns[i] arr, err := compute.TakeArray(ctx, sortArr, offsetArray) @@ -732,6 +738,7 @@ func comparePrimitive(a, b any) int { func idxToArrowArray(v []uint64, mem memory.Allocator) arrow.Array { // turn to array first b := array.NewUint64Builder(mem) + defer b.Release() for _, val := range v { b.Append(val) } @@ -740,9 +747,9 @@ func idxToArrowArray(v []uint64, mem memory.Allocator) arrow.Array { } func genoffsetTakeIdx(offset, size uint64, mem memory.Allocator) arrow.Array { b := array.NewUint64Builder(mem) + defer b.Release() for i := range size { b.Append(offset + i) } - arr := b.NewArray() - return arr + return b.NewArray() } diff --git a/src/Backend/opti-sql-go/operators/aggr/sort_test.go b/src/Backend/opti-sql-go/operators/aggr/sort_test.go index 7e77e4f..20b8afe 100644 --- a/src/Backend/opti-sql-go/operators/aggr/sort_test.go +++ b/src/Backend/opti-sql-go/operators/aggr/sort_test.go @@ -2,7 +2,6 @@ package aggr import ( "errors" - "fmt" "io" "opti-sql-go/Expr" "opti-sql-go/operators/project" @@ -37,13 +36,13 @@ func TestSortInit(t *testing.T) { }) t.Run("SortKey options", func(t *testing.T) { proj := aggProject() - _, err := NewSortExec(proj, []SortKey{*NewSortKey(col("-"), false, false)}) + _, err := NewSortExec(proj, []SortKey{*NewSortKey(col("id"), false, false)}) if err != nil { t.Fatal(err) } }) - t.Run("tok k sort exec init", func(t *testing.T) { + t.Run("top k sort exec init", func(t *testing.T) { proj := aggProject() topKVal := 5 topK, err := NewTopKSortExec(proj, nil, uint16(topKVal)) @@ -99,7 +98,7 @@ func TestBasicSortExpr(t *testing.T) { } t.Fatalf("unexpected error from sortExec Next : %v\n", err) } - fmt.Println(sortedBatch.PrettyPrint()) + t.Logf("%v\n", sortedBatch.PrettyPrint()) } }) } @@ -124,7 +123,7 @@ func TestFullSortOverNetwork(t *testing.T) { if err != nil { t.Fatalf("unexpected error %v\n", err) } - fmt.Println(rc.PrettyPrint()) + t.Logf("%v\n", rc.PrettyPrint()) }) From 1b40587af88ae321ed59d3c9c2aeef582d64979d Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Sun, 30 Nov 2025 02:23:48 -0500 Subject: [PATCH 13/21] feat:implement Distinct operator |test included --- .../opti-sql-go/operators/filter/limit.go | 180 ++++++++++- .../operators/filter/limit_test.go | 281 ++++++++++++++++++ 2 files changed, 454 insertions(+), 7 deletions(-) diff --git a/src/Backend/opti-sql-go/operators/filter/limit.go b/src/Backend/opti-sql-go/operators/filter/limit.go index 6161c1b..b1e28a6 100644 --- a/src/Backend/opti-sql-go/operators/filter/limit.go +++ b/src/Backend/opti-sql-go/operators/filter/limit.go @@ -1,14 +1,23 @@ package filter import ( + "context" + "errors" "io" + "math" + "opti-sql-go/Expr" "opti-sql-go/operators" + "strings" "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/compute" + "github.com/apache/arrow/go/v17/arrow/memory" ) var ( _ = (operators.Operator)(&LimitExec{}) + _ = (operators.Operator)(&Distinct{}) ) type LimitExec struct { @@ -70,12 +79,169 @@ func (l *LimitExec) Close() error { return l.input.Close() } -/* type Distinct struct { - child operators.Operator - schema *arrow.Schema - colExpr Expr.Expression // resolves to column that we want distinct values of - seenValues map[string]struct{} // arrow.Array.value(i) (stored and compared as string , structs occupie no space - done bool + input operators.Operator + schema *arrow.Schema + colExpr []Expr.Expression // resolves to column that we want distinct values of + seenValues map[string]struct{} // arrow.Array.value(i) (stored and compared as string , structs occupie no space + distinctValuesArray []arrow.Array // hold arrays of distinct values + consumedOffset uint64 // where did we leave off at when returning the distinct arrays to the caller + consumedInput bool // did we consume all the input record batches? + totalRows uint64 + done bool +} + +func NewDistinctExec(input operators.Operator, colExpr []Expr.Expression) (*Distinct, error) { + return &Distinct{ + input: input, + schema: input.Schema(), + colExpr: colExpr, + seenValues: make(map[string]struct{}), + distinctValuesArray: make([]arrow.Array, len(input.Schema().Fields())), + }, nil +} + +// pipeline breaker. consume all, if row combonation is already seen, dont include in output +func (d *Distinct) Next(n uint16) (*operators.RecordBatch, error) { + if d.done { + return nil, io.EOF + } + mem := memory.NewGoAllocator() + if !d.consumedInput { + for { + childBatch, err := d.input.Next(math.MaxUint16) + if err != nil { + if errors.Is(err, io.EOF) { + d.consumedInput = true + if d.distinctValuesArray[0] != nil { // nill check in case of no distict elements being found or even just input operator doesnt return anything + d.totalRows = uint64(d.distinctValuesArray[0].Len()) + } + break + } + return nil, err + } + // resolve the columns we care about + evaluatedArrays := make([]arrow.Array, len(d.colExpr)) + for i := range d.colExpr { + arr, err := Expr.EvalExpression(d.colExpr[i], childBatch) + if err != nil { + return nil, err + } + evaluatedArrays[i] = arr + } + var idxTracker []int32 + // Now iterate through each row in the batch + numRows := int(childBatch.RowCount) + for rowIdx := 0; rowIdx < numRows; rowIdx++ { + // Build a key from the combination of values in this row + var keyBuilder strings.Builder + for colIdx, arr := range evaluatedArrays { + if colIdx > 0 { + keyBuilder.WriteString("|") // separator between columns + } + // Check if value is null + if arr.IsNull(rowIdx) { + keyBuilder.WriteString("NULL") + } else { + keyBuilder.WriteString(arr.ValueStr(rowIdx)) + } + } + + key := keyBuilder.String() + if _, seen := d.seenValues[key]; !seen { + d.seenValues[key] = struct{}{} + idxTracker = append(idxTracker, int32(rowIdx)) + // check if its been seen, if it hasnt been add it to the table, + // and keep track of the index so we can grab the value from the array + } + } + takeArray := idxToArrowArray(idxTracker, mem) + for i := range len(childBatch.Columns) { + largeArray := childBatch.Columns[i] + uniqueElements, err := compute.TakeArray(context.TODO(), largeArray, takeArray) + if err != nil { + return nil, err + } + joinedArray, err := joinArrays(d.distinctValuesArray[i], uniqueElements, mem) + if err != nil { + return nil, err + } + d.distinctValuesArray[i] = joinedArray + } + } + } + var readsize uint64 + remaining := d.totalRows - d.consumedOffset + if remaining == 0 { // we've consumed all the distinct arrays, operator is finished + d.done = true + return nil, io.EOF + } + // If remaining >= n, read n. Otherwise read what's left. + if remaining >= uint64(n) { + readsize = uint64(n) + } else { + readsize = remaining + } + distinctArraySlice, err := d.consumeDistinctArrays(readsize, mem) + if err != nil { + return nil, err + } + + var rc uint64 + if len(distinctArraySlice) == 0 { + rc = 0 + } else { + rc = uint64(distinctArraySlice[0].Len()) + } + return &operators.RecordBatch{ + Schema: d.schema, + Columns: distinctArraySlice, + RowCount: rc, + }, nil +} +func (d *Distinct) Schema() *arrow.Schema { return d.schema } +func (d *Distinct) Close() error { return d.input.Close() } +func (d *Distinct) consumeDistinctArrays(readSize uint64, mem memory.Allocator) ([]arrow.Array, error) { + ctx := context.TODO() + resultColumns := make([]arrow.Array, len(d.schema.Fields())) + offsetArray := genoffsetTakeIdx(d.consumedOffset, readSize, mem) + defer offsetArray.Release() + for i := range d.distinctValuesArray { + col := d.distinctValuesArray[i] + slice, err := compute.TakeArray(ctx, col, offsetArray) + if err != nil { + return nil, err + } + resultColumns[i] = slice + } + d.consumedOffset += readSize + return resultColumns, nil +} + +func idxToArrowArray(v []int32, mem memory.Allocator) arrow.Array { + // turn to array first + b := array.NewInt32Builder(mem) + defer b.Release() + for _, val := range v { + b.Append(val) + } + arr := b.NewArray() + return arr +} +func joinArrays(a1, a2 arrow.Array, mem memory.Allocator) (arrow.Array, error) { + if a1 == nil || a1.Len() == 0 { + return a2, nil + } + if a2 == nil || a2.Len() == 0 { + return a1, nil + } + return array.Concatenate([]arrow.Array{a1, a2}, mem) +} +func genoffsetTakeIdx(offset, size uint64, mem memory.Allocator) arrow.Array { + b := array.NewUint64Builder(mem) + defer b.Release() + for i := range size { + b.Append(offset + i) + } + return b.NewArray() } -*/ diff --git a/src/Backend/opti-sql-go/operators/filter/limit_test.go b/src/Backend/opti-sql-go/operators/filter/limit_test.go index 64cd006..ef5aaba 100644 --- a/src/Backend/opti-sql-go/operators/filter/limit_test.go +++ b/src/Backend/opti-sql-go/operators/filter/limit_test.go @@ -9,6 +9,7 @@ import ( "github.com/apache/arrow/go/v17/arrow" "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/memory" ) func generateTestColumns() ([]string, []any) { @@ -45,11 +46,44 @@ func generateTestColumns() ([]string, []any) { return names, columns } +func generateTestColumnsDistinct() ([]string, []any) { + names := []string{ + "city", + "state", + "product", + } + columns := []any{ + // city - lots of repeated values + []string{ + "Boston", "Boston", "New York", "Boston", "Chicago", + "New York", "Boston", "Chicago", "New York", "Boston", + "Chicago", "Boston", "New York", "Chicago", "Boston", + }, + // state - corresponds to cities + []string{ + "MA", "MA", "NY", "MA", "IL", + "NY", "MA", "IL", "NY", "MA", + "IL", "MA", "NY", "IL", "MA", + }, + // product - repeated products + []string{ + "Laptop", "Phone", "Laptop", "Mouse", "Laptop", + "Phone", "Laptop", "Phone", "Tablet", "Mouse", + "Laptop", "Phone", "Laptop", "Tablet", "Mouse", + }, + } + return names, columns +} func basicProject() *project.InMemorySource { names, col := generateTestColumns() v, _ := project.NewInMemoryProjectExec(names, col) return v } +func distinctProject() *project.InMemorySource { + names, col := generateTestColumnsDistinct() + v, _ := project.NewInMemoryProjectExec(names, col) + return v +} func maskAny(t *testing.T, src *project.InMemorySource, expr Expr.Expression, expected []bool) { t.Helper() @@ -536,3 +570,250 @@ func TestLikeEdgeCases(t *testing.T) { maskAny(t, src, expr, expected) }) } + +// Distinct test cases + +func TestDistinctInit(t *testing.T) { + t.Run("distinct init and interface check", func(t *testing.T) { + proj := distinctProject() + exprs := []Expr.Expression{ + Expr.NewColumnResolve("city"), + } + distinctExec, err := NewDistinctExec(proj, exprs) + if err != nil { + t.Fatalf("unexpected error creating new distinct operator") + } + s := distinctExec.Schema() + if !s.Equal(proj.Schema()) { + t.Fatalf("distinct schema should be the exact same as input but recieved %v instead of %v", s, proj.Schema()) + } + t.Logf("distinct operator %v\n", distinctExec) + if err := distinctExec.Close(); err != nil { + t.Fatalf("unexpected error occured closing operator %v\n", err) + } + distinctExec.done = true + _, err = distinctExec.Next(3) + if !errors.Is(err, io.EOF) { + t.Fatalf("expected io.EOF but got %v\n", err) + } + }) + t.Run("Basic Next operator test", func(t *testing.T) { + proj := distinctProject() + exprs := []Expr.Expression{ + Expr.NewColumnResolve("city"), + } + distinctExec, err := NewDistinctExec(proj, exprs) + if err != nil { + t.Fatalf("unexpected error creating new distinct operator") + } + rc, err := distinctExec.Next(5) + if err != nil { + t.Fatalf("error occured grabbing next values from distinct operator %v", err) + } + t.Logf("rc:\t%v\n", rc.PrettyPrint()) + + }) + t.Run("Basic Next operator test | several distinct columns", func(t *testing.T) { + proj := distinctProject() + exprs := []Expr.Expression{ + Expr.NewColumnResolve("city"), + Expr.NewColumnResolve("state"), + } + distinctExec, err := NewDistinctExec(proj, exprs) + if err != nil { + t.Fatalf("unexpected error creating new distinct operator") + } + rc, err := distinctExec.Next(5) + if err != nil { + t.Fatalf("error occured grabbing next values from distinct operator %v", err) + } + t.Logf("rc:\t%v\n", rc.PrettyPrint()) + + }) +} +func TestDistinctNext(t *testing.T) { + t.Run("return limited columns", func(t *testing.T) { + proj := distinctProject() + exprs := []Expr.Expression{ + Expr.NewColumnResolve("city"), + Expr.NewColumnResolve("state"), + } + distinctExec, err := NewDistinctExec(proj, exprs) + if err != nil { + t.Fatalf("unexpected error creating new distinct operator") + } + batchsize := 1 + count := 0 + for { + rc, err := distinctExec.Next(uint16(batchsize)) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + t.Fatalf("error occured grabbing next values from distinct operator %v", err) + } + t.Logf("\t%v\n", rc.PrettyPrint()) + if rc.RowCount != uint64(batchsize) { + t.Fatalf("expected record batch of size %d but got %d", batchsize, rc.RowCount) + } + count += int(rc.RowCount) + } + // distinctProject has 3 distinct (city,state) combinations + if count != 3 { + t.Fatalf("expected total distinct rows 3, got %d", count) + } + }) + + t.Run("single column distinct returns expected order", func(t *testing.T) { + proj := distinctProject() + exprs := []Expr.Expression{ + Expr.NewColumnResolve("city"), + } + distinctExec, err := NewDistinctExec(proj, exprs) + if err != nil { + t.Fatalf("unexpected error creating new distinct operator") + } + // request all in one go + rc, err := distinctExec.Next(10) + if err != nil { + t.Fatalf("Next failed: %v", err) + } + if rc.RowCount != 3 { + t.Fatalf("expected 3 distinct cities, got %d", rc.RowCount) + } + // Expect first-seen order: Boston, New York, Chicago + cityArr := rc.Columns[0].(*array.String) + expect := []string{"Boston", "New York", "Chicago"} + for i := 0; i < int(rc.RowCount); i++ { + if cityArr.Value(i) != expect[i] { + t.Fatalf("expected city %s at idx %d, got %s", expect[i], i, cityArr.Value(i)) + } + } + for _, c := range rc.Columns { + c.Release() + } + }) + + t.Run("Next returns EOF after consumption and Close works", func(t *testing.T) { + proj := distinctProject() + exprs := []Expr.Expression{ + Expr.NewColumnResolve("city"), + Expr.NewColumnResolve("state"), + } + distinctExec, err := NewDistinctExec(proj, exprs) + if err != nil { + t.Fatalf("unexpected error creating new distinct operator") + } + // consume all + _, err = distinctExec.Next(10) + if err != nil && !errors.Is(err, io.EOF) { + print(1) + // it's ok if we got results; call Next again until EOF + } + // subsequent Next should return EOF + _, err = distinctExec.Next(1) + if !errors.Is(err, io.EOF) { + t.Fatalf("expected EOF after consuming distinct results, got %v", err) + } + if err := distinctExec.Close(); err != nil { + t.Fatalf("unexpected error on Close: %v", err) + } + }) +} + +func TestJoinArrays(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("first array nil or empty - returns second", func(t *testing.T) { + builder := array.NewInt32Builder(mem) + defer builder.Release() + builder.AppendValues([]int32{1, 2, 3}, nil) + a2 := builder.NewArray() + defer a2.Release() + + // Test with nil + result, err := joinArrays(nil, a2, mem) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Len() != 3 { + t.Fatalf("expected length 3, got %d", result.Len()) + } + + // Test with empty array + emptyBuilder := array.NewInt32Builder(mem) + defer emptyBuilder.Release() + a1Empty := emptyBuilder.NewArray() + defer a1Empty.Release() + + result, err = joinArrays(a1Empty, a2, mem) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Len() != 3 { + t.Fatalf("expected length 3, got %d", result.Len()) + } + }) + + t.Run("second array nil or empty - returns first", func(t *testing.T) { + builder := array.NewInt32Builder(mem) + defer builder.Release() + builder.AppendValues([]int32{4, 5, 6}, nil) + a1 := builder.NewArray() + defer a1.Release() + + // Test with nil + result, err := joinArrays(a1, nil, mem) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Len() != 3 { + t.Fatalf("expected length 3, got %d", result.Len()) + } + + // Test with empty array + emptyBuilder := array.NewInt32Builder(mem) + defer emptyBuilder.Release() + a2Empty := emptyBuilder.NewArray() + defer a2Empty.Release() + + result, err = joinArrays(a1, a2Empty, mem) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Len() != 3 { + t.Fatalf("expected length 3, got %d", result.Len()) + } + }) + + t.Run("both arrays have data - concatenates", func(t *testing.T) { + builder1 := array.NewInt32Builder(mem) + defer builder1.Release() + builder1.AppendValues([]int32{1, 2, 3}, nil) + a1 := builder1.NewArray() + defer a1.Release() + + builder2 := array.NewInt32Builder(mem) + defer builder2.Release() + builder2.AppendValues([]int32{4, 5, 6}, nil) + a2 := builder2.NewArray() + defer a2.Release() + + result, err := joinArrays(a1, a2, mem) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Len() != 6 { + t.Fatalf("expected length 6, got %d", result.Len()) + } + + // Verify concatenated values + int32Result := result.(*array.Int32) + expectedValues := []int32{1, 2, 3, 4, 5, 6} + for i := 0; i < int32Result.Len(); i++ { + if int32Result.Value(i) != expectedValues[i] { + t.Fatalf("at index %d: expected %d, got %d", i, expectedValues[i], int32Result.Value(i)) + } + } + }) +} From deb94a909272c631c909db3eabda706f5585c8fc Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Sun, 30 Nov 2025 02:59:34 -0500 Subject: [PATCH 14/21] fix:PR comments --- .../opti-sql-go/operators/filter/limit.go | 25 +++++++++++++------ .../operators/filter/limit_test.go | 13 ++++++++-- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/Backend/opti-sql-go/operators/filter/limit.go b/src/Backend/opti-sql-go/operators/filter/limit.go index b1e28a6..57b77d6 100644 --- a/src/Backend/opti-sql-go/operators/filter/limit.go +++ b/src/Backend/opti-sql-go/operators/filter/limit.go @@ -3,6 +3,7 @@ package filter import ( "context" "errors" + "fmt" "io" "math" "opti-sql-go/Expr" @@ -17,7 +18,7 @@ import ( var ( _ = (operators.Operator)(&LimitExec{}) - _ = (operators.Operator)(&Distinct{}) + _ = (operators.Operator)(&DistinctExec{}) ) type LimitExec struct { @@ -79,7 +80,7 @@ func (l *LimitExec) Close() error { return l.input.Close() } -type Distinct struct { +type DistinctExec struct { input operators.Operator schema *arrow.Schema colExpr []Expr.Expression // resolves to column that we want distinct values of @@ -91,8 +92,11 @@ type Distinct struct { done bool } -func NewDistinctExec(input operators.Operator, colExpr []Expr.Expression) (*Distinct, error) { - return &Distinct{ +func NewDistinctExec(input operators.Operator, colExpr []Expr.Expression) (*DistinctExec, error) { + if len(colExpr) == 0 { + return nil, errors.New("Distinct operator requires at least one column expression") + } + return &DistinctExec{ input: input, schema: input.Schema(), colExpr: colExpr, @@ -102,7 +106,7 @@ func NewDistinctExec(input operators.Operator, colExpr []Expr.Expression) (*Dist } // pipeline breaker. consume all, if row combonation is already seen, dont include in output -func (d *Distinct) Next(n uint16) (*operators.RecordBatch, error) { +func (d *DistinctExec) Next(n uint16) (*operators.RecordBatch, error) { if d.done { return nil, io.EOF } @@ -113,6 +117,7 @@ func (d *Distinct) Next(n uint16) (*operators.RecordBatch, error) { if err != nil { if errors.Is(err, io.EOF) { d.consumedInput = true + fmt.Printf("distinctArray: \t%v\n", d.distinctValuesArray) if d.distinctValuesArray[0] != nil { // nill check in case of no distict elements being found or even just input operator doesnt return anything d.totalRows = uint64(d.distinctValuesArray[0].Len()) } @@ -166,6 +171,7 @@ func (d *Distinct) Next(n uint16) (*operators.RecordBatch, error) { if err != nil { return nil, err } + // uniqueElements.Release() d.distinctValuesArray[i] = joinedArray } } @@ -199,9 +205,12 @@ func (d *Distinct) Next(n uint16) (*operators.RecordBatch, error) { RowCount: rc, }, nil } -func (d *Distinct) Schema() *arrow.Schema { return d.schema } -func (d *Distinct) Close() error { return d.input.Close() } -func (d *Distinct) consumeDistinctArrays(readSize uint64, mem memory.Allocator) ([]arrow.Array, error) { +func (d *DistinctExec) Schema() *arrow.Schema { return d.schema } +func (d *DistinctExec) Close() error { + operators.ReleaseArrays(d.distinctValuesArray) + return d.input.Close() +} +func (d *DistinctExec) consumeDistinctArrays(readSize uint64, mem memory.Allocator) ([]arrow.Array, error) { ctx := context.TODO() resultColumns := make([]arrow.Array, len(d.schema.Fields())) offsetArray := genoffsetTakeIdx(d.consumedOffset, readSize, mem) diff --git a/src/Backend/opti-sql-go/operators/filter/limit_test.go b/src/Backend/opti-sql-go/operators/filter/limit_test.go index ef5aaba..9516a89 100644 --- a/src/Backend/opti-sql-go/operators/filter/limit_test.go +++ b/src/Backend/opti-sql-go/operators/filter/limit_test.go @@ -574,6 +574,15 @@ func TestLikeEdgeCases(t *testing.T) { // Distinct test cases func TestDistinctInit(t *testing.T) { + t.Run("distinct no expressions", func(t *testing.T) { + + proj := distinctProject() + exprs := []Expr.Expression{} + _, err := NewDistinctExec(proj, exprs) + if err == nil { + t.Fatalf("expected error from passing in no expressions to distinct operator but got nil") + } + }) t.Run("distinct init and interface check", func(t *testing.T) { proj := distinctProject() exprs := []Expr.Expression{ @@ -613,7 +622,7 @@ func TestDistinctInit(t *testing.T) { t.Logf("rc:\t%v\n", rc.PrettyPrint()) }) - t.Run("Basic Next operator test | several distinct columns", func(t *testing.T) { + t.Run("BasicNextOperatorWithMultipleDistinctColumns", func(t *testing.T) { proj := distinctProject() exprs := []Expr.Expression{ Expr.NewColumnResolve("city"), @@ -707,7 +716,7 @@ func TestDistinctNext(t *testing.T) { // consume all _, err = distinctExec.Next(10) if err != nil && !errors.Is(err, io.EOF) { - print(1) + t.Fatalf("unexpected error while consuming distinct results: %v", err) // it's ok if we got results; call Next again until EOF } // subsequent Next should return EOF From 9d82ced59c7523afa85c22ec782efc16e49cfa84 Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Wed, 3 Dec 2025 13:44:28 -0500 Subject: [PATCH 15/21] feat:Implement hash join --- .../opti-sql-go/operators/Join/hashJoin.go | 497 +++++++++++++++ .../operators/Join/hashJoin_test.go | 579 +++++++++++++++++- .../opti-sql-go/operators/filter/limit.go | 2 +- 3 files changed, 1074 insertions(+), 4 deletions(-) diff --git a/src/Backend/opti-sql-go/operators/Join/hashJoin.go b/src/Backend/opti-sql-go/operators/Join/hashJoin.go index 2502e5b..1671a14 100644 --- a/src/Backend/opti-sql-go/operators/Join/hashJoin.go +++ b/src/Backend/opti-sql-go/operators/Join/hashJoin.go @@ -1 +1,498 @@ package join + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "math" + "opti-sql-go/Expr" + "opti-sql-go/operators" + "opti-sql-go/operators/aggr" + "strings" + + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/compute" + "github.com/apache/arrow/go/v17/arrow/memory" +) + +var ( + ErrInvalidJoinClauseCount = func(l, r int) error { + return fmt.Errorf("mismatched number of join expressions between left and right, left: %d vs right: %d", l, r) + } +) + +var ( + _ = (operators.Operator)(&SortMergeJoinExec{}) +) + +type JoinType int + +const ( + InnerJoin JoinType = iota + LeftJoin + RightJoin +) + +func (j JoinType) String() string { + switch j { + case InnerJoin: + return "INNER JOIN" + case LeftJoin: + return "LEFT JOIN" + case RightJoin: + return "RIGHT JOIN" + default: + return "UNKNOWN JOIN TYPE" + } +} + +// taking in arrays of expressions allows for multiple join clauses +// Example: JOIN t2 ON t1.region = t2.region AND t1.city = t2.city +type JoinClause struct { + leftS []Expr.Expression + rightS []Expr.Expression +} + +func (j *JoinClause) String() string { + var b bytes.Buffer + + // defensive: if lengths differ, print whatever pairs exist + n := len(j.leftS) + if len(j.rightS) < n { + n = len(j.rightS) + } + + for i := 0; i < n; i++ { + b.WriteString(j.leftS[i].String()) + b.WriteString(" = ") + b.WriteString(j.rightS[i].String()) + + // add separator between pairs + if i < n-1 { + b.WriteString(" AND ") + } + } + + return b.String() +} + +func NewJoinClause(leftS, rightS []Expr.Expression) JoinClause { + return JoinClause{ + leftS: leftS, + rightS: rightS, + } +} + +// use sort merge join when the output needs to be sorted on the join keys +type SortMergeJoinExec struct { + leftSource operators.Operator + rightSource operators.Operator + clause JoinClause + joinType JoinType + filters []Expr.Expression //TODO: incorpoarte + schema *arrow.Schema + done bool + // internalState + outputBatch []arrow.Array // intermediate storage for output arrays + +} + +func NewSortMergeJoinExec(left operators.Operator, right operators.Operator, clause JoinClause, joinType JoinType, filters []Expr.Expression) (*SortMergeJoinExec, error) { + fmt.Printf("join clause: \t%v\njoin Type: \t%v\n", clause.String(), joinType) + schema, err := joinSchemas(left.Schema(), right.Schema()) + if err != nil { + return nil, err + } + // handle sorting this here. so the .Next function has less logic + if len(clause.leftS) != len(clause.rightS) { + return nil, ErrInvalidJoinClauseCount(len(clause.leftS), len(clause.rightS)) + } + var Lsk []aggr.SortKey + for i := 0; i < len(clause.leftS); i++ { + Lsk = append(Lsk, aggr.SortKey{ + Expr: clause.leftS[i], + Ascending: true, + }) + } + var Rsk []aggr.SortKey + for i := 0; i < len(clause.rightS); i++ { + Rsk = append(Rsk, aggr.SortKey{ + Expr: clause.rightS[i], + Ascending: true, + }) + } + ls, err := aggr.NewSortExec(left, Lsk) + if err != nil { + return nil, err + } + rs, err := aggr.NewSortExec(right, Rsk) + if err != nil { + return nil, err + } + + return &SortMergeJoinExec{ + leftSource: rs, + rightSource: ls, + clause: clause, + joinType: joinType, + filters: filters, + schema: schema, + outputBatch: make([]arrow.Array, schema.NumFields()), + }, nil +} + +// TODO: + +func (smj *SortMergeJoinExec) Next(n uint16) (*operators.RecordBatch, error) { + if smj.done { + return nil, io.EOF + } + return nil, nil +} +func (smj *SortMergeJoinExec) Schema() *arrow.Schema { return smj.schema } +func (smj *SortMergeJoinExec) Close() error { + // do other clean up but for now just pass down to child + err1 := smj.leftSource.Close() + err2 := smj.rightSource.Close() + if err1 != nil { + return err1 + } + if err2 != nil { + return err2 + } + return nil +} + +// left schema + right schema, if left and right have same column name, prefix with left_ and right_ +func joinSchemas(left, right *arrow.Schema) (*arrow.Schema, error) { + // table1 : id , name , age + // table2 : id , dept , region + fields := []arrow.Field{} + + leftNames := map[string]bool{} + rightNames := map[string]bool{} + + for i := 0; i < left.NumFields(); i++ { + leftNames[left.Field(i).Name] = true + } + for i := 0; i < right.NumFields(); i++ { + rightNames[right.Field(i).Name] = true + } + // left side + for i := 0; i < left.NumFields(); i++ { + f := left.Field(i) + name := f.Name + if rightNames[name] { + name = "left_" + name + } + fields = append(fields, arrow.Field{ + Name: name, + Type: f.Type, + Nullable: f.Nullable, + Metadata: f.Metadata, + }) + } + + // right side + for i := 0; i < right.NumFields(); i++ { + f := right.Field(i) + name := f.Name + if leftNames[name] { + name = "right_" + name + } + fields = append(fields, arrow.Field{ + Name: name, + Type: f.Type, + Nullable: f.Nullable, + Metadata: f.Metadata, + }) + } + + return arrow.NewSchema(fields, nil), nil + // produces + // left_id ,name,age, right_id,dept,region +} + +// otherwise go with hash joins +type HashJoinExec struct { + leftSource operators.Operator + rightSource operators.Operator + clause JoinClause + joinType JoinType + filters []Expr.Expression //TODO: incorpoarte + schema *arrow.Schema + done bool + // internalState + outputBatch []arrow.Array // intermediate storage for output arrays + +} +type hashEntry struct { + row int +} +type joinPair struct { + leftRow int + rightRow int +} + +func NewHashJoinExec(left operators.Operator, right operators.Operator, clause JoinClause, joinType JoinType, filters []Expr.Expression) (*HashJoinExec, error) { + fmt.Printf("join clause: \t%v\njoin Type: \t%v\n", clause.String(), joinType) + schema, err := joinSchemas(left.Schema(), right.Schema()) + if err != nil { + return nil, err + } + if len(clause.leftS) != len(clause.rightS) { + return nil, ErrInvalidJoinClauseCount(len(clause.leftS), len(clause.rightS)) + } + return &HashJoinExec{ + leftSource: left, + rightSource: right, + clause: clause, + joinType: joinType, + filters: filters, + schema: schema, + outputBatch: make([]arrow.Array, schema.NumFields()), + }, nil +} + +func (hj *HashJoinExec) Next(_ uint16) (*operators.RecordBatch, error) { + if hj.done { + return nil, io.EOF + } + mem := memory.NewGoAllocator() + leftArr, err := consumeOperator(hj.leftSource, mem) + if err != nil { + return nil, err + } + rightArr, err := consumeOperator(hj.rightSource, mem) + if err != nil { + return nil, err + } + if len(leftArr) == 0 || len(rightArr) == 0 { + hj.done = true + return &operators.RecordBatch{ + Schema: hj.Schema(), + RowCount: uint64(0), + }, nil + } + leftRowCount := leftArr[0].Len() + rightRowCount := rightArr[0].Len() + //fmt.Printf("left:\t%v\nright:\t%v\n", leftArr, rightArr) + leftComp, err := buildComptables(hj.clause.leftS, leftArr, hj.leftSource.Schema()) + if err != nil { + return nil, err + } + + rightComp, err := buildComptables(hj.clause.rightS, rightArr, hj.rightSource.Schema()) + if err != nil { + return nil, err + } + fmt.Printf("left Comparission arrays:\t%v\nright Comparrission arrays:\t%v\n", leftComp, rightComp) + ht := buildRightHashTable(rightComp, rightRowCount) + pairs := probeJoin(leftComp, ht, leftRowCount) + if len(pairs) == 0 { + hj.done = true + return &operators.RecordBatch{ + Schema: hj.Schema(), + Columns: []arrow.Array{}, + RowCount: 0, + }, nil + } + fmt.Printf("ht:\t%v\npairs:\t%v\n", ht, pairs) + leftIdxArr, rightIdxArr, err := buildIndexArrays(mem, pairs) + if err != nil { + return nil, err + } + + fmt.Printf("leftIDX:\t%v\nrightIDX:\t%v\n", leftIdxArr, rightIdxArr) + outArr, err := hj.buildOutputArrays(mem, leftArr, rightArr, leftIdxArr, rightIdxArr) + if err != nil { + return nil, err + } + hj.done = true + return &operators.RecordBatch{ + Schema: hj.schema, + Columns: outArr, + RowCount: uint64(outArr[0].Len()), + }, nil +} +func (hj *HashJoinExec) Schema() *arrow.Schema { return hj.schema } +func (hj *HashJoinExec) Close() error { + // do other clean up but for now just pass down to child + err1 := hj.leftSource.Close() + err2 := hj.rightSource.Close() + if err1 != nil { + return err1 + } + if err2 != nil { + return err2 + } + return nil +} + +func consumeOperator(o operators.Operator, mem memory.Allocator) ([]arrow.Array, error) { + + AllArrays := make([]arrow.Array, o.Schema().NumFields()) // concated columns + for { + childRecordBatch, err := o.Next(math.MaxUint16) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, err + } + for i := range childRecordBatch.Columns { + if AllArrays[i] == nil { + AllArrays[i] = childRecordBatch.Columns[i] + continue + } + largerArray, err := array.Concatenate([]arrow.Array{AllArrays[i], childRecordBatch.Columns[i]}, mem) + if err != nil { + return nil, err + } + AllArrays[i] = largerArray + } + } + return AllArrays, nil +} + +func buildComptables(exprs []Expr.Expression, cols []arrow.Array, schema *arrow.Schema) ([]arrow.Array, error) { + compArr := make([]arrow.Array, len(exprs)) + for i, expr := range exprs { + arr, err := Expr.EvalExpression(expr, &operators.RecordBatch{ + Schema: schema, + Columns: cols, + RowCount: uint64(cols[0].Len()), + }) + if err != nil { + return nil, err + } + compArr[i] = arr + } + return compArr, nil + +} + +func buildRowKey(cols []arrow.Array, row int) string { + var b strings.Builder + hasNull := false + + for i, col := range cols { + if i > 0 { + b.WriteByte('|') // separator between cols + } + + if col.IsNull(row) { + hasNull = true + // Keep a placeholder so non-null rows can’t collide with “all-null” rows. + b.WriteString("NULL") + continue + } + + b.WriteString(col.ValueStr(row)) + } + + // If there were no NULLs in this row, we’re done. + // Equal non-NULL rows on left/right will produce identical keys → join behaves as usual. + if !hasNull { + return b.String() + } + + // SQL semantics: any NULL in the join key means this row should not match + // anything from the other side. We “salt” the key with the identity of the + // `cols` slice so left and right sides will produce *different* full keys. + // + // This still lets rows *on the same side* share a bucket (doesn’t hurt), + // but probe from the other side will never see them as equal. + b.WriteByte('#') + b.WriteString(fmt.Sprintf("%p", cols)) + + return b.String() +} + +func buildRightHashTable(rightComp []arrow.Array, rowCount int) map[string][]hashEntry { + ht := make(map[string][]hashEntry, rowCount) + + for r := 0; r < rowCount; r++ { + key := buildRowKey(rightComp, r) + ht[key] = append(ht[key], hashEntry{row: r}) + } + return ht +} +func probeJoin( + leftComp []arrow.Array, + rightHT map[string][]hashEntry, + leftRowCount int, +) []joinPair { + var pairs []joinPair + + for l := 0; l < leftRowCount; l++ { + key := buildRowKey(leftComp, l) + matches := rightHT[key] + if len(matches) == 0 { + // inner join: skip if no matching right row + continue + } + // emit all combinations + for _, m := range matches { + pairs = append(pairs, joinPair{ + leftRow: l, + rightRow: m.row, + }) + } + } + + return pairs +} + +func buildIndexArrays( + mem memory.Allocator, + pairs []joinPair, +) (arrow.Array, arrow.Array, error) { + // use int32 indexes (Arrow Take supports that) + lb := array.NewInt32Builder(mem) + rb := array.NewInt32Builder(mem) + + for _, p := range pairs { + lb.Append(int32(p.leftRow)) + rb.Append(int32(p.rightRow)) + } + + leftIdxArr := lb.NewArray() + rightIdxArr := rb.NewArray() + lb.Release() + rb.Release() + + return leftIdxArr, rightIdxArr, nil +} + +func (hj *HashJoinExec) buildOutputArrays( + mem memory.Allocator, + leftCols []arrow.Array, + rightCols []arrow.Array, + leftIdxArr arrow.Array, + rightIdxArr arrow.Array, +) ([]arrow.Array, error) { + ctx := context.TODO() + + output := make([]arrow.Array, hj.schema.NumFields()) + for i := range len(leftCols) { + col := leftCols[i] + slice, err := compute.TakeArray(ctx, col, leftIdxArr) + if err != nil { + return nil, err + } + output[i] = slice + } + for i := range len(rightCols) { + col := rightCols[i] + slice, err := compute.TakeArray(ctx, col, rightIdxArr) + if err != nil { + return nil, err + } + output[i+len(leftCols)] = slice + } + return output, nil +} diff --git a/src/Backend/opti-sql-go/operators/Join/hashJoin_test.go b/src/Backend/opti-sql-go/operators/Join/hashJoin_test.go index 363da9e..d0fd1ec 100644 --- a/src/Backend/opti-sql-go/operators/Join/hashJoin_test.go +++ b/src/Backend/opti-sql-go/operators/Join/hashJoin_test.go @@ -1,7 +1,580 @@ package join -import "testing" +import ( + "errors" + "io" + "opti-sql-go/Expr" + "opti-sql-go/operators" + "opti-sql-go/operators/project" + "strings" + "testing" -func TestHashJoin(t *testing.T) { - // Simple passing test + "github.com/apache/arrow/go/v15/arrow/memory" + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" +) + +func generateDataset1WithNulls(mem memory.Allocator) ([]string, []arrow.Array) { + names := []string{"id", "name", "age", "salary"} + + // ----- id (int32) ----- + idB := array.NewInt32Builder(mem) + idVals := []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + idValid := []bool{ + true, true, false, true, true, + false, true, true, true, false, + } + idB.AppendValues(idVals, idValid) + idArr := idB.NewArray() + + // ----- name (string) ----- + nameB := array.NewStringBuilder(mem) + nameVals := []string{ + "Alice", "Bob", "Charlie", "David", "Eve", + "Frank", "Grace", "Hannah", "Ivy", "Jake", + } + nameValid := []bool{ + true, true, true, false, true, + true, true, true, false, true, + } + nameB.AppendValues(nameVals, nameValid) + nameArr := nameB.NewArray() + + // ----- age (int32) ----- + ageB := array.NewInt32Builder(mem) + ageVals := []int32{28, 34, 45, 22, 31, 29, 40, 36, 50, 26} + ageValid := []bool{ + true, false, true, true, true, + true, false, true, true, true, + } + ageB.AppendValues(ageVals, ageValid) + ageArr := ageB.NewArray() + + // ----- salary (float64) ----- + salB := array.NewFloat64Builder(mem) + salVals := []float64{ + 70000, 82000, 54000, 91000, 60000, + 75000, 66000, 0, 45000, 99000, + } + salaryValid := []bool{ + true, true, true, true, true, + true, true, false, true, true, + } + salB.AppendValues(salVals, salaryValid) + salaryArr := salB.NewArray() + + return names, []arrow.Array{idArr, nameArr, ageArr, salaryArr} +} +func generateJoinDataset2(mem memory.Allocator) ([]string, []arrow.Array) { + names := []string{"id", "department", "region"} + + // ---- id (int32) ---- + // overlap on: 1,2,4,5 + // unique to dataset2: 11,12,13,14 + // and one null + idB := array.NewInt32Builder(mem) + idB.AppendValues( + []int32{1, 2, 4, 5, 11, 12, 13, 14, 3, 0}, + []bool{true, true, true, true, true, true, true, true, false, false}, // null at idx 8 and 9 + ) + idArr := idB.NewArray() + + // ---- department (string) ---- + deptB := array.NewStringBuilder(mem) + deptB.AppendValues( + []string{"HR", "Engineering", "Sales", "Finance", "Marketing", + "Support", "Research", "Security", "Unknown", "Unknown"}, + []bool{true, true, true, true, true, true, true, false, true, false}, // some nulls + ) + deptArr := deptB.NewArray() + + // ---- region (string) ---- + regionB := array.NewStringBuilder(mem) + regionB.AppendValues( + []string{"US", "EU", "EU", "APAC", "US", "US", "LATAM", "EU", "N/A", "N/A"}, + []bool{true, true, true, true, true, true, false, true, true, false}, + ) + regionArr := regionB.NewArray() + + return names, []arrow.Array{idArr, deptArr, regionArr} +} +func newSources() (*project.InMemorySource, *project.InMemorySource) { + mem := memory.NewGoAllocator() + leftNames, leftCols := generateDataset1WithNulls(mem) + rightNames, rightCols := generateJoinDataset2(mem) + + leftSource, _ := project.NewInMemoryProjectExecFromArrays(leftNames, leftCols) + rightSource, _ := project.NewInMemoryProjectExecFromArrays(rightNames, rightCols) + return leftSource, rightSource +} + +func TestJoinSchemas(t *testing.T) { + + makeField := func(name string, dt arrow.DataType) arrow.Field { + return arrow.Field{Name: name, Type: dt, Nullable: true} + } + + tests := []struct { + name string + left *arrow.Schema + right *arrow.Schema + wantFields []string + }{ + { + name: "No duplicate fields", + left: arrow.NewSchema([]arrow.Field{ + makeField("id", arrow.PrimitiveTypes.Int32), + makeField("name", arrow.BinaryTypes.String), + }, nil), + right: arrow.NewSchema([]arrow.Field{ + makeField("dept", arrow.BinaryTypes.String), + makeField("region", arrow.BinaryTypes.String), + }, nil), + wantFields: []string{"id", "name", "dept", "region"}, + }, + { + name: "Single duplicate (id)", + left: arrow.NewSchema([]arrow.Field{ + makeField("id", arrow.PrimitiveTypes.Int32), + makeField("name", arrow.BinaryTypes.String), + makeField("age", arrow.PrimitiveTypes.Int32), + }, nil), + right: arrow.NewSchema([]arrow.Field{ + makeField("id", arrow.PrimitiveTypes.Int32), + makeField("dept", arrow.BinaryTypes.String), + }, nil), + wantFields: []string{"left_id", "name", "age", "right_id", "dept"}, + }, + { + name: "Multiple duplicates", + left: arrow.NewSchema([]arrow.Field{ + makeField("id", arrow.PrimitiveTypes.Int32), + makeField("name", arrow.BinaryTypes.String), + }, nil), + right: arrow.NewSchema([]arrow.Field{ + makeField("id", arrow.PrimitiveTypes.Int32), + makeField("name", arrow.BinaryTypes.String), + makeField("salary", arrow.PrimitiveTypes.Float64), + }, nil), + wantFields: []string{"left_id", "left_name", "right_id", "right_name", "salary"}, + }, + { + name: "Nullable metadata preserved", + left: arrow.NewSchema([]arrow.Field{ + {Name: "id", Type: arrow.PrimitiveTypes.Int32, Nullable: false}, + }, nil), + right: arrow.NewSchema([]arrow.Field{ + {Name: "id", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, + }, nil), + wantFields: []string{"left_id", "right_id"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := joinSchemas(tt.left, tt.right) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if got.NumFields() != len(tt.wantFields) { + t.Fatalf("wrong number of fields: got %d want %d", + got.NumFields(), len(tt.wantFields)) + } + + for i, wantName := range tt.wantFields { + gotName := got.Field(i).Name + if gotName != wantName { + t.Fatalf("field %d mismatch: got %s want %s", i, gotName, wantName) + } + } + }) + } +} + +func TestHashJoin1(t *testing.T) { + t.Run("playground", func(t *testing.T) { + left, right := newSources() + joinPred := NewJoinClause(Expr.NewExpressions(Expr.NewColumnResolve("id")), Expr.NewExpressions(Expr.NewColumnResolve("id"))) + smjExec, err := NewHashJoinExec(left, right, joinPred, InnerJoin, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + v, _ := smjExec.Next(5) + t.Logf("expected schema:\t\n%v\n\n", smjExec.Schema()) + t.Logf("recieved schema:\t\n%v\n\n", v.Schema) + t.Logf("\t\n\n\t%+v\n", v.PrettyPrint()) + + }) +} + +// collectAllRows drains an operator into a slice of *operators.RecordBatch. +func collectAllRows(t *testing.T, op operators.Operator) []*operators.RecordBatch { + t.Helper() + + var batches []*operators.RecordBatch + for { + b, err := op.Next(1024) + if errors.Is(err, io.EOF) { + break + } + if err != nil { + t.Fatalf("unexpected error from Next: %v", err) + } + if b == nil || b.RowCount == 0 { + continue + } + batches = append(batches, b) + } + return batches +} + +// flattenRowCount sums total rows across all batches. +func flattenRowCount(batches []*operators.RecordBatch) int { + total := 0 + for _, b := range batches { + total += int(b.RowCount) + } + return total +} + +// evalInt32Slice evaluates an expression to an Int32 array and returns values + validity bitmap. +func evalInt32Slice(t *testing.T, expr Expr.Expression, batch *operators.RecordBatch) ([]int32, []bool) { + t.Helper() + + arr, err := Expr.EvalExpression(expr, batch) + if err != nil { + t.Fatalf("EvalExpression failed: %v", err) + } + defer arr.Release() + + intArr, ok := arr.(*array.Int32) + if !ok { + t.Fatalf("expected Int32 array, got %T", arr) + } + + n := intArr.Len() + values := make([]int32, n) + valid := make([]bool, n) + for i := 0; i < n; i++ { + if intArr.IsNull(i) { + valid[i] = false + continue + } + valid[i] = true + values[i] = intArr.Value(i) + } + return values, valid +} + +// +// Simple helpers to get your left/right sources for the id-join dataset. +// + +// +// Multi-attribute dataset: first_name + last_name join. +// + +func generateMultiAttrLeft(mem memory.Allocator) ([]string, []arrow.Array) { + names := []string{"first_name", "last_name", "emp_id"} + + fnB := array.NewStringBuilder(mem) + fnB.AppendValues( + []string{"Alice", "Bob", "Charlie", "Diana"}, + []bool{true, true, true, true}, + ) + fnArr := fnB.NewArray() + + lnB := array.NewStringBuilder(mem) + lnB.AppendValues( + []string{"Smith", "Jones", "Stone", "Lopez"}, + []bool{true, true, true, true}, + ) + lnArr := lnB.NewArray() + + empB := array.NewInt32Builder(mem) + empB.AppendValues( + []int32{1, 2, 3, 4}, + []bool{true, true, true, true}, + ) + empArr := empB.NewArray() + + return names, []arrow.Array{fnArr, lnArr, empArr} +} + +func generateMultiAttrRight(mem memory.Allocator) ([]string, []arrow.Array) { + names := []string{"first_name", "last_name", "department"} + + fnB := array.NewStringBuilder(mem) + fnB.AppendValues( + []string{"Alice", "Charlie", "Evan"}, + []bool{true, true, true}, + ) + fnArr := fnB.NewArray() + + lnB := array.NewStringBuilder(mem) + lnB.AppendValues( + []string{"Smith", "Stone", "Miller"}, + []bool{true, true, true}, + ) + lnArr := lnB.NewArray() + + deptB := array.NewStringBuilder(mem) + deptB.AppendValues( + []string{"HR", "Engineering", "Sales"}, + []bool{true, true, true}, + ) + deptArr := deptB.NewArray() + + return names, []arrow.Array{fnArr, lnArr, deptArr} +} + +// +// "Computed" key dataset: we simulate a computed join key by precomputing a normalized field. +// + +func generateEmailLeft(mem memory.Allocator) ([]string, []arrow.Array) { + names := []string{"id", "email_lower"} + + idB := array.NewInt32Builder(mem) + idB.AppendValues([]int32{1, 2, 3}, []bool{true, true, true}) + idArr := idB.NewArray() + + emailB := array.NewStringBuilder(mem) + emailB.AppendValues( + []string{"alice@example.com", "bob@example.com", "charlie@example.com"}, + []bool{true, true, true}, + ) + emailArr := emailB.NewArray() + + return names, []arrow.Array{idArr, emailArr} +} + +func generateEmailRight(mem memory.Allocator) ([]string, []arrow.Array) { + names := []string{"id", "email_lower", "group"} + + idB := array.NewInt32Builder(mem) + idB.AppendValues([]int32{10, 20, 30}, []bool{true, true, true}) + idArr := idB.NewArray() + + emailB := array.NewStringBuilder(mem) + emailB.AppendValues( + []string{"alice@example.com", "notused@example.com", "charlie@example.com"}, + []bool{true, true, true}, + ) + emailArr := emailB.NewArray() + + groupB := array.NewStringBuilder(mem) + groupB.AppendValues([]string{"A", "B", "C"}, []bool{true, true, true}) + groupArr := groupB.NewArray() + + return names, []arrow.Array{idArr, emailArr, groupArr} +} + +// +// ---------- (1) Simple id join tests ---------- +// + +func TestHashJoin_OnSimpleKey(t *testing.T) { + t.Run("inner join on id with SQL NULL semantics", func(t *testing.T) { + left, right := newSources() + + leftExpr := Expr.NewExpressions(Expr.NewColumnResolve("id")) + rightExpr := Expr.NewExpressions(Expr.NewColumnResolve("id")) + clause := NewJoinClause(leftExpr, rightExpr) + + hj, err := NewHashJoinExec(left, right, clause, InnerJoin, nil) + if err != nil { + t.Fatalf("NewHashJoinExec failed: %v", err) + } + defer func() { + if err := hj.Close(); err != nil { + t.Fatalf("HashJoinExec Close failed: %v", err) + } + + }() + + batches := collectAllRows(t, hj) + totalRows := flattenRowCount(batches) + + // Overlap on non-NULL ids is: 1, 2, 4, 5 => 4 rows for inner join. + if totalRows != 4 { + t.Fatalf("expected 4 joined rows, got %d", totalRows) + } + + if len(batches) == 0 { + t.Fatal("expected at least one output batch") + } + first := batches[0] + + leftIDExpr := Expr.NewColumnResolve("left_id") + rightIDExpr := Expr.NewColumnResolve("right_id") + + leftVals, leftValid := evalInt32Slice(t, leftIDExpr, first) + rightVals, rightValid := evalInt32Slice(t, rightIDExpr, first) + + for i := range leftVals { + if !leftValid[i] || !rightValid[i] { + t.Fatalf("unexpected NULL id in joined row %d", i) + } + if leftVals[i] != rightVals[i] { + t.Fatalf("mismatched ids at row %d: left=%d right=%d", + i, leftVals[i], rightVals[i]) + } + } + }) + + t.Run("constructor error on mismatched join clause length", func(t *testing.T) { + left, right := newSources() + + // left has 1 expression, right has 2 → must error + leftExpr := Expr.NewExpressions(Expr.NewColumnResolve("id")) + rightExpr := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewColumnResolve("department"), + ) + + clause := NewJoinClause(leftExpr, rightExpr) + _, err := NewHashJoinExec(left, right, clause, InnerJoin, nil) + if err == nil { + t.Fatal("expected error due to mismatched join expression counts, got nil") + } + if !strings.Contains(err.Error(), "mismatched number of join expressions") { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +// +// ---------- (2) Multi-attribute join tests ---------- +// + +func TestHashJoin_MultiAttributeKey(t *testing.T) { + mem := memory.NewGoAllocator() + leftNames, leftCols := generateMultiAttrLeft(mem) + rightNames, rightCols := generateMultiAttrRight(mem) + + leftSource, _ := project.NewInMemoryProjectExecFromArrays(leftNames, leftCols) + rightSource, _ := project.NewInMemoryProjectExecFromArrays(rightNames, rightCols) + + leftExprs := Expr.NewExpressions( + Expr.NewColumnResolve("first_name"), + Expr.NewColumnResolve("last_name"), + ) + rightExprs := Expr.NewExpressions( + Expr.NewColumnResolve("first_name"), + Expr.NewColumnResolve("last_name"), + ) + clause := NewJoinClause(leftExprs, rightExprs) + + hj, err := NewHashJoinExec(leftSource, rightSource, clause, InnerJoin, nil) + if err != nil { + t.Fatalf("NewHashJoinExec failed: %v", err) + } + + defer func() { + if err := hj.Close(); err != nil { + t.Fatalf("HashJoinExec Close failed: %v", err) + } + + }() + + batches := collectAllRows(t, hj) + totalRows := flattenRowCount(batches) + + // Matches: ("Alice","Smith") and ("Charlie","Stone") => 2 rows. + if totalRows != 2 { + t.Fatalf("expected 2 rows from multi-attribute join, got %d", totalRows) + } + + if len(batches) == 0 { + t.Fatal("expected at least one batch") + } + first := batches[0] + + deptExpr := Expr.NewColumnResolve("department") + arr, err := Expr.EvalExpression(deptExpr, first) + if err != nil { + t.Fatalf("EvalExpression department failed: %v", err) + } + defer arr.Release() + + strArr := arr.(*array.String) + if strArr.Len() != totalRows { + t.Fatalf("expected department array len %d, got %d", totalRows, strArr.Len()) + } + for i := 0; i < strArr.Len(); i++ { + if strArr.IsNull(i) { + t.Fatalf("expected non-null department at row %d", i) + } + } +} + +// +// ---------- (3) "Computed" key join tests ---------- +// + +func TestHashJoin_ComputedKeySimulation(t *testing.T) { + mem := memory.NewGoAllocator() + leftNames, leftCols := generateEmailLeft(mem) + rightNames, rightCols := generateEmailRight(mem) + + leftSource, _ := project.NewInMemoryProjectExecFromArrays(leftNames, leftCols) + rightSource, _ := project.NewInMemoryProjectExecFromArrays(rightNames, rightCols) + + leftExprs := Expr.NewExpressions(Expr.NewColumnResolve("email_lower")) + rightExprs := Expr.NewExpressions(Expr.NewColumnResolve("email_lower")) + clause := NewJoinClause(leftExprs, rightExprs) + + hj, err := NewHashJoinExec(leftSource, rightSource, clause, InnerJoin, nil) + if err != nil { + t.Fatalf("NewHashJoinExec failed: %v", err) + } + + defer func() { + if err := hj.Close(); err != nil { + t.Fatalf("HashJoinExec Close failed: %v", err) + } + + }() + batches := collectAllRows(t, hj) + totalRows := flattenRowCount(batches) + + // Overlap on email_lower: alice + charlie => 2 rows. + if totalRows != 2 { + t.Fatalf("expected 2 joined rows on email_lower, got %d", totalRows) + } + + if len(batches) == 0 { + t.Fatal("expected at least one batch") + } + first := batches[0] + + leftEmailExpr := Expr.NewColumnResolve("left_email_lower") + rightEmailExpr := Expr.NewColumnResolve("right_email_lower") + + leftArr, err := Expr.EvalExpression(leftEmailExpr, first) + if err != nil { + t.Fatalf("EvalExpression left_email_lower failed: %v", err) + } + defer leftArr.Release() + + rightArr, err := Expr.EvalExpression(rightEmailExpr, first) + if err != nil { + t.Fatalf("EvalExpression right_email_lower failed: %v", err) + } + defer rightArr.Release() + + lStr := leftArr.(*array.String) + rStr := rightArr.(*array.String) + + if lStr.Len() != rStr.Len() { + t.Fatalf("expected same length for left/right email arrays, got %d vs %d", + lStr.Len(), rStr.Len()) + } + for i := 0; i < lStr.Len(); i++ { + if lStr.IsNull(i) || rStr.IsNull(i) { + t.Fatalf("unexpected NULL email at row %d", i) + } + } } diff --git a/src/Backend/opti-sql-go/operators/filter/limit.go b/src/Backend/opti-sql-go/operators/filter/limit.go index 57b77d6..196160f 100644 --- a/src/Backend/opti-sql-go/operators/filter/limit.go +++ b/src/Backend/opti-sql-go/operators/filter/limit.go @@ -94,7 +94,7 @@ type DistinctExec struct { func NewDistinctExec(input operators.Operator, colExpr []Expr.Expression) (*DistinctExec, error) { if len(colExpr) == 0 { - return nil, errors.New("Distinct operator requires at least one column expression") + return nil, errors.New("distinct operator requires at least one column expression") } return &DistinctExec{ input: input, From 67f654a0c9a14958f002cffee9cd2945323a27d5 Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Wed, 3 Dec 2025 17:27:07 -0500 Subject: [PATCH 16/21] fix: PR comments --- .../opti-sql-go/operators/Join/hashJoin.go | 100 ++---------------- 1 file changed, 10 insertions(+), 90 deletions(-) diff --git a/src/Backend/opti-sql-go/operators/Join/hashJoin.go b/src/Backend/opti-sql-go/operators/Join/hashJoin.go index 1671a14..e630a70 100644 --- a/src/Backend/opti-sql-go/operators/Join/hashJoin.go +++ b/src/Backend/opti-sql-go/operators/Join/hashJoin.go @@ -9,7 +9,6 @@ import ( "math" "opti-sql-go/Expr" "opti-sql-go/operators" - "opti-sql-go/operators/aggr" "strings" "github.com/apache/arrow/go/v17/arrow" @@ -18,6 +17,11 @@ import ( "github.com/apache/arrow/go/v17/arrow/memory" ) +// TODO: clean up PR and push again +// TODO: write intergration test for operators to work together +// TODO: see ticket #27 +// TODO: take small break from this project to work on inverted index search for a couple days + var ( ErrInvalidJoinClauseCount = func(l, r int) error { return fmt.Errorf("mismatched number of join expressions between left and right, left: %d vs right: %d", l, r) @@ -25,7 +29,7 @@ var ( ) var ( - _ = (operators.Operator)(&SortMergeJoinExec{}) + _ = (operators.Operator)(&HashJoinExec{}) ) type JoinType int @@ -86,86 +90,6 @@ func NewJoinClause(leftS, rightS []Expr.Expression) JoinClause { } } -// use sort merge join when the output needs to be sorted on the join keys -type SortMergeJoinExec struct { - leftSource operators.Operator - rightSource operators.Operator - clause JoinClause - joinType JoinType - filters []Expr.Expression //TODO: incorpoarte - schema *arrow.Schema - done bool - // internalState - outputBatch []arrow.Array // intermediate storage for output arrays - -} - -func NewSortMergeJoinExec(left operators.Operator, right operators.Operator, clause JoinClause, joinType JoinType, filters []Expr.Expression) (*SortMergeJoinExec, error) { - fmt.Printf("join clause: \t%v\njoin Type: \t%v\n", clause.String(), joinType) - schema, err := joinSchemas(left.Schema(), right.Schema()) - if err != nil { - return nil, err - } - // handle sorting this here. so the .Next function has less logic - if len(clause.leftS) != len(clause.rightS) { - return nil, ErrInvalidJoinClauseCount(len(clause.leftS), len(clause.rightS)) - } - var Lsk []aggr.SortKey - for i := 0; i < len(clause.leftS); i++ { - Lsk = append(Lsk, aggr.SortKey{ - Expr: clause.leftS[i], - Ascending: true, - }) - } - var Rsk []aggr.SortKey - for i := 0; i < len(clause.rightS); i++ { - Rsk = append(Rsk, aggr.SortKey{ - Expr: clause.rightS[i], - Ascending: true, - }) - } - ls, err := aggr.NewSortExec(left, Lsk) - if err != nil { - return nil, err - } - rs, err := aggr.NewSortExec(right, Rsk) - if err != nil { - return nil, err - } - - return &SortMergeJoinExec{ - leftSource: rs, - rightSource: ls, - clause: clause, - joinType: joinType, - filters: filters, - schema: schema, - outputBatch: make([]arrow.Array, schema.NumFields()), - }, nil -} - -// TODO: - -func (smj *SortMergeJoinExec) Next(n uint16) (*operators.RecordBatch, error) { - if smj.done { - return nil, io.EOF - } - return nil, nil -} -func (smj *SortMergeJoinExec) Schema() *arrow.Schema { return smj.schema } -func (smj *SortMergeJoinExec) Close() error { - // do other clean up but for now just pass down to child - err1 := smj.leftSource.Close() - err2 := smj.rightSource.Close() - if err1 != nil { - return err1 - } - if err2 != nil { - return err2 - } - return nil -} - // left schema + right schema, if left and right have same column name, prefix with left_ and right_ func joinSchemas(left, right *arrow.Schema) (*arrow.Schema, error) { // table1 : id , name , age @@ -238,7 +162,6 @@ type joinPair struct { } func NewHashJoinExec(left operators.Operator, right operators.Operator, clause JoinClause, joinType JoinType, filters []Expr.Expression) (*HashJoinExec, error) { - fmt.Printf("join clause: \t%v\njoin Type: \t%v\n", clause.String(), joinType) schema, err := joinSchemas(left.Schema(), right.Schema()) if err != nil { return nil, err @@ -270,16 +193,17 @@ func (hj *HashJoinExec) Next(_ uint16) (*operators.RecordBatch, error) { if err != nil { return nil, err } + emptyCols := make([]arrow.Array, hj.schema.NumFields()) if len(leftArr) == 0 || len(rightArr) == 0 { hj.done = true return &operators.RecordBatch{ Schema: hj.Schema(), + Columns: emptyCols, RowCount: uint64(0), }, nil } leftRowCount := leftArr[0].Len() rightRowCount := rightArr[0].Len() - //fmt.Printf("left:\t%v\nright:\t%v\n", leftArr, rightArr) leftComp, err := buildComptables(hj.clause.leftS, leftArr, hj.leftSource.Schema()) if err != nil { return nil, err @@ -289,25 +213,22 @@ func (hj *HashJoinExec) Next(_ uint16) (*operators.RecordBatch, error) { if err != nil { return nil, err } - fmt.Printf("left Comparission arrays:\t%v\nright Comparrission arrays:\t%v\n", leftComp, rightComp) ht := buildRightHashTable(rightComp, rightRowCount) pairs := probeJoin(leftComp, ht, leftRowCount) if len(pairs) == 0 { hj.done = true return &operators.RecordBatch{ Schema: hj.Schema(), - Columns: []arrow.Array{}, + Columns: emptyCols, RowCount: 0, }, nil } - fmt.Printf("ht:\t%v\npairs:\t%v\n", ht, pairs) leftIdxArr, rightIdxArr, err := buildIndexArrays(mem, pairs) if err != nil { return nil, err } - fmt.Printf("leftIDX:\t%v\nrightIDX:\t%v\n", leftIdxArr, rightIdxArr) - outArr, err := hj.buildOutputArrays(mem, leftArr, rightArr, leftIdxArr, rightIdxArr) + outArr, err := hj.buildOutputArrays(leftArr, rightArr, leftIdxArr, rightIdxArr) if err != nil { return nil, err } @@ -469,7 +390,6 @@ func buildIndexArrays( } func (hj *HashJoinExec) buildOutputArrays( - mem memory.Allocator, leftCols []arrow.Array, rightCols []arrow.Array, leftIdxArr arrow.Array, From c9a12771f5f3b1d14dfd90c609698499bd78c559 Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Wed, 3 Dec 2025 19:12:16 -0500 Subject: [PATCH 17/21] implemented project and filter intergration test --- src/Backend/opti-sql-go/Expr/expr.go | 54 ++ src/Backend/opti-sql-go/Expr/expr_test.go | 14 + .../operators/Join/hashJoin_test.go | 16 - .../opti-sql-go/operators/filter/filter.go | 10 + .../operators/project/projectExec.go | 11 + .../opti-sql-go/operators/test/t1_test.go | 501 ++++++++++++++++++ 6 files changed, 590 insertions(+), 16 deletions(-) diff --git a/src/Backend/opti-sql-go/Expr/expr.go b/src/Backend/opti-sql-go/Expr/expr.go index f9d88de..22fc905 100644 --- a/src/Backend/opti-sql-go/Expr/expr.go +++ b/src/Backend/opti-sql-go/Expr/expr.go @@ -105,6 +105,8 @@ func EvalExpression(expr Expression, batch *operators.RecordBatch) (arrow.Array, return EvalScalarFunction(e, batch) case *CastExpr: return EvalCast(e, batch) + case *NullCheckExpr: + return EvalNullCheckMask(e.Expr, batch) default: return nil, ErrUnsupportedExpression(expr.String()) } @@ -146,6 +148,8 @@ func ExprDataType(e Expression, inputSchema *arrow.Schema) (arrow.DataType, erro return nil, err } return inferScalarFunctionType(ex.Function, argType), nil + case *NullCheckExpr: + return arrow.FixedWidthTypes.Boolean, nil default: return nil, ErrUnsupportedExpression(ex.String()) @@ -355,6 +359,16 @@ func EvalLiteral(l *LiteralResolve, batch *operators.RecordBatch) (arrow.Array, b.Append(v) } return b.NewArray(), nil + // ------------------------------ + // Nulls + // ------------------------------ + case arrow.NULL: + b := array.NewNullBuilder(memory.DefaultAllocator) + defer b.Release() + for i := 0; i < n; i++ { + b.AppendNull() + } + return b.NewArray(), nil default: return nil, fmt.Errorf("literal type %s not supported", l.Type) @@ -518,6 +532,7 @@ func unpackDatum(d compute.Datum) (arrow.Array, error) { if !ok { return nil, fmt.Errorf("datum %v is not of type array", d) } + fmt.Printf("unpackDatum: array str: \t%v\n", array.String()) return array.MakeArray(), nil } @@ -612,6 +627,45 @@ func (c *CastExpr) String() string { return fmt.Sprintf("Cast(%s AS %s)", c.Expr, c.TargetType) } +type NullCheckExpr struct { + Expr Expression +} + +func NewNullCheckExpr(expr Expression) *NullCheckExpr { + return &NullCheckExpr{Expr: expr} +} +func (n *NullCheckExpr) ExprNode() {} +func (n *NullCheckExpr) String() string { + return fmt.Sprintf("NullCheck(%s)", n.Expr.String()) +} +func EvalNullCheckMask(expr Expression, batch *operators.RecordBatch) (arrow.Array, error) { + // Step 1: Evaluate underlying expression → get its array + arr, err := EvalExpression(expr, batch) + if err != nil { + return nil, err + } + length := arr.Len() + + // Step 2: BooleanBuilder for the mask + builder := array.NewBooleanBuilder(memory.DefaultAllocator) + defer builder.Release() + + builder.Resize(length) + + // Step 3: Fill boolean mask (true = NOT NULL, false = NULL) + for i := 0; i < length; i++ { + if arr.IsNull(i) { + builder.Append(false) + } else { + builder.Append(true) + } + } + + // Step 4: produce final boolean array + mask := builder.NewArray() // *array.Boolean + return mask, nil +} + func upperImpl(arr arrow.Array) (arrow.Array, error) { strArr, ok := arr.(*array.String) if !ok { diff --git a/src/Backend/opti-sql-go/Expr/expr_test.go b/src/Backend/opti-sql-go/Expr/expr_test.go index 487e8f2..1c93211 100644 --- a/src/Backend/opti-sql-go/Expr/expr_test.go +++ b/src/Backend/opti-sql-go/Expr/expr_test.go @@ -1673,3 +1673,17 @@ func TestLikeOperatorSQL(t *testing.T) { } }) } + +func TestNullCases(t *testing.T) { + t.Run("null Column literal", func(t *testing.T) { + v := NewLiteralResolve(arrow.Null, nil) + array, err := EvalExpression(v, &operators.RecordBatch{ + RowCount: 10, + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + t.Logf("\t%v\n", array) + }) + +} diff --git a/src/Backend/opti-sql-go/operators/Join/hashJoin_test.go b/src/Backend/opti-sql-go/operators/Join/hashJoin_test.go index d0fd1ec..e22872f 100644 --- a/src/Backend/opti-sql-go/operators/Join/hashJoin_test.go +++ b/src/Backend/opti-sql-go/operators/Join/hashJoin_test.go @@ -192,22 +192,6 @@ func TestJoinSchemas(t *testing.T) { } } -func TestHashJoin1(t *testing.T) { - t.Run("playground", func(t *testing.T) { - left, right := newSources() - joinPred := NewJoinClause(Expr.NewExpressions(Expr.NewColumnResolve("id")), Expr.NewExpressions(Expr.NewColumnResolve("id"))) - smjExec, err := NewHashJoinExec(left, right, joinPred, InnerJoin, nil) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - v, _ := smjExec.Next(5) - t.Logf("expected schema:\t\n%v\n\n", smjExec.Schema()) - t.Logf("recieved schema:\t\n%v\n\n", v.Schema) - t.Logf("\t\n\n\t%+v\n", v.PrettyPrint()) - - }) -} - // collectAllRows drains an operator into a slice of *operators.RecordBatch. func collectAllRows(t *testing.T, op operators.Operator) []*operators.RecordBatch { t.Helper() diff --git a/src/Backend/opti-sql-go/operators/filter/filter.go b/src/Backend/opti-sql-go/operators/filter/filter.go index 6c30c8f..243f645 100644 --- a/src/Backend/opti-sql-go/operators/filter/filter.go +++ b/src/Backend/opti-sql-go/operators/filter/filter.go @@ -3,6 +3,7 @@ package filter import ( "context" "errors" + "fmt" "io" "opti-sql-go/Expr" "opti-sql-go/operators" @@ -43,12 +44,17 @@ func (f *FilterExec) Next(n uint16) (*operators.RecordBatch, error) { } childBatch, err := f.input.Next(n) if err != nil { + if errors.Is(err, io.EOF) { + f.done = true + return nil, io.EOF + } return nil, err } booleanMask, err := Expr.EvalExpression(f.predicate, childBatch) if err != nil { return nil, err } + fmt.Printf("boolean mask: %v\n", booleanMask) boolArr, ok := booleanMask.(*array.Boolean) // impossible for this to not be a boolean array,assuming validPredicates works as it should if !ok { return nil, errors.New("predicate did not evaluate to boolean array") @@ -122,6 +128,8 @@ func validPredicates(pred Expr.Expression, schema *arrow.Schema) bool { if err != nil { return false } + //TODO: allow for nulls to be comparable + fmt.Printf("dt1:\t%v\ndt2:\t%v\n", dt1, dt2) if !arrow.TypeEqual(dt1, dt2) { return false } @@ -132,6 +140,8 @@ func validPredicates(pred Expr.Expression, schema *arrow.Schema) bool { case *Expr.LiteralResolve: return true + case *Expr.NullCheckExpr: + return validPredicates(p.Expr, schema) default: return false } diff --git a/src/Backend/opti-sql-go/operators/project/projectExec.go b/src/Backend/opti-sql-go/operators/project/projectExec.go index 033a58c..abd3da8 100644 --- a/src/Backend/opti-sql-go/operators/project/projectExec.go +++ b/src/Backend/opti-sql-go/operators/project/projectExec.go @@ -41,6 +41,17 @@ func NewProjectExec(input operators.Operator, exprs []Expr.Expression) (*Project Type: tp, Nullable: true, } + case *Expr.ColumnResolve: + tp, err := Expr.ExprDataType(ex, input.Schema()) + if err != nil { + return nil, fmt.Errorf("project exec: failed to get expression data type for expr %d: %w", i, err) + } + fields[i] = arrow.Field{ + Name: ex.Name, + Type: tp, + Nullable: true, + } + default: name := fmt.Sprintf("col_%d", i) Type, err := Expr.ExprDataType(e, input.Schema()) diff --git a/src/Backend/opti-sql-go/operators/test/t1_test.go b/src/Backend/opti-sql-go/operators/test/t1_test.go index a571421..ec99f8d 100644 --- a/src/Backend/opti-sql-go/operators/test/t1_test.go +++ b/src/Backend/opti-sql-go/operators/test/t1_test.go @@ -1,3 +1,504 @@ package test +import ( + "errors" + "io" + "opti-sql-go/Expr" + "opti-sql-go/operators" + "opti-sql-go/operators/filter" + "opti-sql-go/operators/project" + "strings" + "testing" + + "github.com/apache/arrow/go/v15/arrow/memory" + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" +) + // test for all operators together +// using in memory format at first +func generateIntegrationDataset1(mem memory.Allocator) ([]string, []arrow.Array) { + names := []string{ + "id", "first_name", "last_name", "age", "salary", "department", "region", + } + + // id + idB := array.NewInt32Builder(mem) + idB.AppendValues( + []int32{ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + }, + []bool{ + true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, + }, + ) + idArr := idB.NewArray() + + // first_name + fnB := array.NewStringBuilder(mem) + fnB.AppendValues([]string{ + "Alice", "Bob", "Charlie", "Diana", "Eve", + "Frank", "Grace", "Hank", "Ivy", "Jake", + "Karen", "Leo", "Mona", "Nate", "Olivia", + "Paul", "Quinn", "Ruth", "Steve", "Tina", + }, []bool{ + true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, + }) + fnArr := fnB.NewArray() + + // last_name + lnB := array.NewStringBuilder(mem) + lnB.AppendValues([]string{ + "Smith", "Jones", "Stone", "Lopez", "King", + "Hall", "Young", "Wright", "Hill", "Green", + "Adams", "Clark", "Allen", "Baker", "Cox", + "Diaz", "Evans", "Ford", "Gray", "Hart", + }, []bool{ + true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, + }) + lnArr := lnB.NewArray() + + // age + ageB := array.NewInt32Builder(mem) + ageB.AppendValues([]int32{ + 29, 34, 41, 26, 33, + 45, 38, 28, 52, 31, + 27, 49, 36, 42, 30, + 40, 50, 39, 55, 25, + }, []bool{ + true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, + }) + ageArr := ageB.NewArray() + + // salary + salB := array.NewFloat64Builder(mem) + salB.AppendValues([]float64{ + 70000, 80000, 65000, 72000, 59000, + 82000, 91000, 54000, 68000, 60000, + 75000, 88000, 56000, 69000, 62000, + 93000, 97000, 58000, 89000, 61000, + }, []bool{ + true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, + }) + salArr := salB.NewArray() + + // department (some NULLs) + deptB := array.NewStringBuilder(mem) + deptB.AppendValues([]string{ + "HR", "Engineering", "Sales", "Finance", "HR", + "Engineering", "Sales", "Finance", "HR", "Engineering", + "Sales", "Finance", "HR", "Engineering", "Sales", + "Finance", "HR", "Engineering", "Sales", "Finance", + }, []bool{ + true, true, true, false, true, + true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + }) + deptArr := deptB.NewArray() + + // region (with NULLs) + regB := array.NewStringBuilder(mem) + regB.AppendValues([]string{ + "US", "EU", "US", "APAC", "LATAM", + "US", "EU", "APAC", "LATAM", "US", + "EU", "US", "LATAM", "EU", "APAC", + "US", "EU", "LATAM", "US", "EU", + }, []bool{ + true, true, true, true, true, + true, true, false, true, true, + true, true, true, true, true, + true, true, true, true, false, + }) + regArr := regB.NewArray() + + return names, []arrow.Array{idArr, fnArr, lnArr, ageArr, salArr, deptArr, regArr} +} + +func generateIntegrationDataset2(mem memory.Allocator) ([]string, []arrow.Array) { + names := []string{"dept_id", "department", "region", "budget", "manager"} + + // dept_id + idB := array.NewInt32Builder(mem) + idB.AppendValues([]int32{ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + }, []bool{ + true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, + }) + idArr := idB.NewArray() + + // department + deptB := array.NewStringBuilder(mem) + deptB.AppendValues([]string{ + "HR", "Engineering", "Sales", "Finance", "Marketing", + "Support", "Research", "Security", "Legal", "Operations", + "HR", "Engineering", "Sales", "Finance", "Marketing", + "Support", "Research", "Security", "Legal", "Operations", + }, []bool{ + true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, + }) + deptArr := deptB.NewArray() + + // region (with NULLs) + regB := array.NewStringBuilder(mem) + regB.AppendValues([]string{ + "US", "EU", "LATAM", "APAC", "US", + "EU", "LATAM", "APAC", "US", "EU", + "LATAM", "US", "EU", "APAC", "US", + "LATAM", "US", "EU", "APAC", "US", + }, []bool{ + true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, false, + true, true, true, true, true, + }) + regArr := regB.NewArray() + + // budget + budB := array.NewFloat64Builder(mem) + budB.AppendValues([]float64{ + 1e6, 2e6, 3e6, 1.5e6, 1.2e6, + 900000, 850000, 780000, 950000, 1100000, + 1e6, 2e6, 3e6, 1.5e6, 1.2e6, + 900000, 850000, 780000, 950000, 1100000, + }, []bool{ + true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, + }) + budArr := budB.NewArray() + + // manager (with NULLs) + manB := array.NewStringBuilder(mem) + manB.AppendValues([]string{ + "Anna", "Ben", "Chris", "Dana", "Eli", + "Faye", "George", "Holly", "Ian", "Jane", + "Karl", "Lilly", "Mason", "Nora", "Owen", + "Pam", "Quinn", "Rose", "Sam", "Tara", + }, []bool{ + true, true, true, true, true, + true, true, true, false, true, + true, true, true, true, true, + true, true, true, true, true, + }) + manArr := manB.NewArray() + + return names, []arrow.Array{idArr, deptArr, regArr, budArr, manArr} +} +func NewIntegrationSource1(mem memory.Allocator) (*project.InMemorySource, error) { + names, cols := generateIntegrationDataset1(mem) + return project.NewInMemoryProjectExecFromArrays(names, cols) +} + +func NewIntegrationSource2(mem memory.Allocator) (*project.InMemorySource, error) { + names, cols := generateIntegrationDataset2(mem) + return project.NewInMemoryProjectExecFromArrays(names, cols) +} +func runAll(t *testing.T, op operators.Operator) *operators.RecordBatch { + t.Helper() + + b, err := op.Next(1000) + if errors.Is(err, io.EOF) { + return nil + } + if err != nil { + t.Fatalf("unexpected err from Next: %v", err) + } + return b +} + +/* +============================================================================ +Project tests +============================================================================ +*/ +func TestIntegrationProjectExec(t *testing.T) { + t.Run("integration_project_exec", func(t *testing.T) { + mem := memory.NewGoAllocator() + + src, err := NewIntegrationSource1(mem) + if err != nil { + t.Fatalf("failed to create integration source: %v", err) + } + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewAlias(Expr.NewColumnResolve("age"), "age"), + Expr.NewColumnResolve("salary"), + Expr.NewColumnResolve("department"), + ) + basicProj, err := project.NewProjectExec(src, exprs) + if err != nil { + t.Fatalf("unexpected error\t%v\n", basicProj) + } + //t.Logf("%v\n", basicProj.Schema()) + rc, err := basicProj.Next(100) + if err != nil { + if !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error %v\n", err) + } + } + if rc.RowCount != 20 { + t.Fatalf("expected 20 rows, got %d", rc.RowCount) + } + }) + t.Run("projection_with_alias", func(t *testing.T) { + mem := memory.NewGoAllocator() + src, _ := NewIntegrationSource1(mem) + + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewAlias(Expr.NewColumnResolve("salary"), "emp_salary"), + ) + + proj, err := project.NewProjectExec(src, exprs) + if err != nil { + t.Fatalf("error: %v", err) + } + + batch, _ := proj.Next(50) + + // verify alias appears in schema + if batch.Schema.Fields()[1].Name != "emp_salary" { + t.Fatalf("expected alias emp_salary, got %s", batch.Schema.Fields()[1].Name) + } + }) + t.Run("projection_expression_math", func(t *testing.T) { + mem := memory.NewGoAllocator() + src, _ := NewIntegrationSource1(mem) + + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewAlias( + Expr.NewBinaryExpr( + Expr.NewColumnResolve("salary"), + Expr.Multiplication, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(1.10)), + ), + "adjusted_salary", + ), + ) + + proj, err := project.NewProjectExec(src, exprs) + if err != nil { + t.Fatalf("error: %v", err) + } + + batch, _ := proj.Next(50) + + adjCol := batch.Columns[1].(*array.Float64) + _, origin := generateIntegrationDataset1(mem) + sal := origin[4].(*array.Float64) + // check: for a non-null salary (row 0 = 50000) + if adjCol.Len() != sal.Len() { + t.Fatalf("expected adjusted salary length %d, got %d", sal.Len(), adjCol.Len()) + } + for i := 0; i < adjCol.Len(); i++ { + if !sal.IsNull(i) { + expected := sal.Value(i) * 1.10 + if adjCol.Value(i) != expected { + t.Fatalf("row %d: expected adjusted salary %f, got %f", i, expected, adjCol.Value(i)) + } + } + } + }) + t.Run("projection_upper_first_name", func(t *testing.T) { + mem := memory.NewGoAllocator() + + src, err := NewIntegrationSource1(mem) + if err != nil { + t.Fatalf("failed to create integration source: %v", err) + } + + exprs := Expr.NewExpressions( + Expr.NewAlias( + Expr.NewScalarFunction(Expr.Upper, Expr.NewColumnResolve("first_name")), + "first_name_upper", + ), + ) + + proj, err := project.NewProjectExec(src, exprs) + if err != nil { + t.Fatalf("unexpected project exec error: %v", err) + } + + batch, err := proj.Next(100) // pull all rows at once + if err != nil { + t.Fatalf("unexpected error on Next: %v", err) + } + if batch == nil { + t.Fatalf("expected a batch but got nil") + } + + // ---- get projected column (index 0) ---- + upperCol := batch.Columns[0].(*array.String) + + // ---- get original dataset to compare ---- + _, originCols := generateIntegrationDataset1(mem) + firstNameCol := originCols[1].(*array.String) // index 1 is first_name + + if upperCol.Len() != firstNameCol.Len() { + t.Fatalf("length mismatch: expected %d got %d", + firstNameCol.Len(), upperCol.Len()) + } + + // ---- validate uppercase projection ---- + for i := 0; i < upperCol.Len(); i++ { + if firstNameCol.IsNull(i) { + if !upperCol.IsNull(i) { + t.Fatalf("row %d: expected NULL but got value", i) + } + continue + } + + expected := strings.ToUpper(firstNameCol.Value(i)) + got := upperCol.Value(i) + + if expected != got { + t.Fatalf("row %d: expected %q, got %q", i, expected, got) + } + } + }) + +} +func TestIntegrationFilterExec(t *testing.T) { + mem := memory.NewGoAllocator() + + // ----- load dataset ----- + + // convenience handles to original cols for expected-value validation + //ageArr := cols[3].(*array.Int32) + //salaryArr := cols[4].(*array.Float64) + //deptArr := cols[5].(*array.String) + //regionArr := cols[6].(*array.String) + + // ---------------------------------------------------------------------- + t.Run("filter_age_gt_30", func(t *testing.T) { + names, cols := generateIntegrationDataset1(mem) + src, err := project.NewInMemoryProjectExecFromArrays(names, cols) + if err != nil { + t.Fatalf("failed to create in-memory source: %v", err) + } + pred := Expr.NewBinaryExpr( + Expr.NewColumnResolve("age"), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30)), + ) + + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + batch, err := filt.Next(1000) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + + if batch == nil { + t.Fatalf("expected rows, got nil batch") + } + ageCol, _ := batch.ColumnByName("age") + for i := 0; i < ageCol.Len(); i++ { + ageValue := ageCol.(*array.Int32).Value(i) + if ageValue <= 30 { + t.Fatalf("expected age > 30, got %d", ageValue) + } + } + + }) + + // ---------------------------------------------------------------------- + t.Run("filter_engineering_and_salary_gt_70000", func(t *testing.T) { + names, cols := generateIntegrationDataset1(mem) + src, err := project.NewInMemoryProjectExecFromArrays(names, cols) + if err != nil { + t.Fatalf("failed to create in-memory source: %v", err) + } + pred := Expr.NewBinaryExpr( + Expr.NewBinaryExpr( + Expr.NewColumnResolve("department"), + Expr.Equal, + Expr.NewLiteralResolve(arrow.BinaryTypes.String, "Engineering"), + ), + Expr.And, + Expr.NewBinaryExpr( + Expr.NewColumnResolve("salary"), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(70000)), + ), + ) + // department = 'Engineering' AND salary > 70000 + + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + batch, err := filt.Next(1000) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Fatalf("expected non-nil batch") + } + + // validate + deptCol, _ := batch.ColumnByName("department") + salCol, _ := batch.ColumnByName("salary") + depColumn, _ := deptCol.(*array.String) + salColumn, _ := salCol.(*array.Float64) + for i := 0; i < int(batch.RowCount); i++ { + if depColumn.Value(i) != "Engineering" { + t.Fatalf("expected department 'Engineering', got %s", depColumn.Value(i)) + } + if salColumn.Value(i) <= 70000 { + t.Fatalf("expected salary > 70000, got %f", salColumn.Value(i)) + } + } + }) + + // ---------------------------------------------------------------------- + t.Run("filter_region_is_null", func(t *testing.T) { + names, cols := generateIntegrationDataset1(mem) + src, err := project.NewInMemoryProjectExecFromArrays(names, cols) + if err != nil { + t.Fatalf("failed to create in-memory source: %v", err) + } + // We're filtering region IS NULL + pred := Expr.NewNullCheckExpr(Expr.NewColumnResolve("region")) + + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + batch, err := filt.Next(1000) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + + if batch == nil { + // possible: no NULLS + t.Fatalf("expected atleast one null") + return + } + t.Logf("batch: \t%v\n", batch.PrettyPrint()) + // validate + regionCol, _ := batch.ColumnByName("region") + regionArr := regionCol.(*array.String) + for i := 0; i < int(batch.RowCount); i++ { + if regionArr.IsNull(i) { + t.Fatalf("expected NULL region but got value=%s", regionArr.Value(i)) + } + } + }) + +} From 7d7590669088dd1b3c2405e6b08e885675b3a865 Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Thu, 4 Dec 2025 00:51:08 -0500 Subject: [PATCH 18/21] Documentation: added operator test,Need intergration test --- src/Backend/opti-sql-go/Expr/expr.go | 20 +- src/Backend/opti-sql-go/Expr/expr_test.go | 131 ++ .../opti-sql-go/operators/filter/filter.go | 1 - .../opti-sql-go/operators/test/t1_test.go | 1095 ++++++++++++++++- 4 files changed, 1211 insertions(+), 36 deletions(-) diff --git a/src/Backend/opti-sql-go/Expr/expr.go b/src/Backend/opti-sql-go/Expr/expr.go index 22fc905..8b6fb03 100644 --- a/src/Backend/opti-sql-go/Expr/expr.go +++ b/src/Backend/opti-sql-go/Expr/expr.go @@ -639,30 +639,24 @@ func (n *NullCheckExpr) String() string { return fmt.Sprintf("NullCheck(%s)", n.Expr.String()) } func EvalNullCheckMask(expr Expression, batch *operators.RecordBatch) (arrow.Array, error) { - // Step 1: Evaluate underlying expression → get its array + // Step 1: Evaluate underlying expression arr, err := EvalExpression(expr, batch) if err != nil { return nil, err } + length := arr.Len() - // Step 2: BooleanBuilder for the mask + // Step 2: Build boolean mask builder := array.NewBooleanBuilder(memory.DefaultAllocator) - defer builder.Release() - builder.Resize(length) - // Step 3: Fill boolean mask (true = NOT NULL, false = NULL) for i := 0; i < length; i++ { - if arr.IsNull(i) { - builder.Append(false) - } else { - builder.Append(true) - } + builder.Append(!arr.IsNull(i)) // true = not null } - - // Step 4: produce final boolean array - mask := builder.NewArray() // *array.Boolean + // Step 3: produce final Boolean array + mask := builder.NewArray() + builder.Release() return mask, nil } diff --git a/src/Backend/opti-sql-go/Expr/expr_test.go b/src/Backend/opti-sql-go/Expr/expr_test.go index 1c93211..8eee24e 100644 --- a/src/Backend/opti-sql-go/Expr/expr_test.go +++ b/src/Backend/opti-sql-go/Expr/expr_test.go @@ -1,6 +1,7 @@ package Expr import ( + "fmt" "log" "opti-sql-go/operators" "testing" @@ -1687,3 +1688,133 @@ func TestNullCases(t *testing.T) { }) } +func makeBatch(schema *arrow.Schema, cols []arrow.Array) *operators.RecordBatch { + return &operators.RecordBatch{ + Schema: schema, + Columns: cols, + RowCount: uint64(cols[0].Len()), + } +} + +func TestNullCheckExpr(t *testing.T) { + + t.Run("int32_some_nulls_mask", func(t *testing.T) { + mem := memory.NewGoAllocator() + // col = [10, null, 30, null, 50] + b := array.NewInt32Builder(mem) + b.AppendValues( + []int32{10, 20, 30, 40, 50}, + []bool{true, false, true, false, true}, + ) + arr := b.NewArray() + b.Release() + defer arr.Release() + + schema := arrow.NewSchema( + []arrow.Field{ + {Name: "col", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, + }, + nil, + ) + batch := makeBatch(schema, []arrow.Array{arr}) + t.Logf("%v\n", batch.PrettyPrint()) + expr := NewColumnResolve("col") + + maskArr, err := EvalNullCheckMask(expr, batch) + if err != nil { + t.Fatalf("EvalNullCheckMask failed: %v", err) + } + defer maskArr.Release() + + boolMask := maskArr.(*array.Boolean) + fmt.Printf("boolean mask:\t%v\n", boolMask) + if boolMask.Len() != 5 { + t.Fatalf("expected length 5 mask, got %d", boolMask.Len()) + } + + // expected mask: [true, false, true, false, true] + want := []bool{true, false, true, false, true} + + for i := 0; i < 5; i++ { + if boolMask.Value(i) != want[i] { + t.Fatalf("mask[%d]: expected %v, got %v", i, want[i], boolMask.Value(i)) + } + } + }) + + // ─────────────────────────────────────────────── + + t.Run("string_all_nulls_mask", func(t *testing.T) { + mem := memory.NewGoAllocator() + + b := array.NewStringBuilder(mem) + b.AppendValues([]string{"A", "B", "C"}, []bool{false, false, false}) + arr := b.NewArray() + b.Release() + defer arr.Release() + + schema := arrow.NewSchema( + []arrow.Field{ + {Name: "name", Type: arrow.BinaryTypes.String, Nullable: true}, + }, + nil, + ) + + batch := makeBatch(schema, []arrow.Array{arr}) + + expr := NewColumnResolve("name") + + maskArr, err := EvalNullCheckMask(expr, batch) + if err != nil { + t.Fatalf("EvalNullCheckMask failed: %v", err) + } + defer maskArr.Release() + + boolMask := maskArr.(*array.Boolean) + + // expected: [false, false, false] + for i := 0; i < boolMask.Len(); i++ { + if boolMask.Value(i) != false { + t.Fatalf("expected all false, got true at row %d", i) + } + } + }) + + // ─────────────────────────────────────────────── + + t.Run("no_nulls_all_true_mask", func(t *testing.T) { + mem := memory.NewGoAllocator() + + b := array.NewFloat64Builder(mem) + b.AppendValues([]float64{1.1, 2.2, 3.3}, []bool{true, true, true}) + arr := b.NewArray() + b.Release() + defer arr.Release() + + schema := arrow.NewSchema( + []arrow.Field{ + {Name: "val", Type: arrow.PrimitiveTypes.Float64, Nullable: false}, + }, + nil, + ) + + batch := makeBatch(schema, []arrow.Array{arr}) + + expr := NewColumnResolve("val") + + maskArr, err := EvalNullCheckMask(expr, batch) + if err != nil { + t.Fatalf("EvalNullCheckMask failed: %v", err) + } + defer maskArr.Release() + + boolMask := maskArr.(*array.Boolean) + + // expected mask = [true, true, true] + for i := 0; i < boolMask.Len(); i++ { + if !boolMask.Value(i) { + t.Fatalf("expected true at %d, got false", i) + } + } + }) +} diff --git a/src/Backend/opti-sql-go/operators/filter/filter.go b/src/Backend/opti-sql-go/operators/filter/filter.go index 243f645..8beb80f 100644 --- a/src/Backend/opti-sql-go/operators/filter/filter.go +++ b/src/Backend/opti-sql-go/operators/filter/filter.go @@ -54,7 +54,6 @@ func (f *FilterExec) Next(n uint16) (*operators.RecordBatch, error) { if err != nil { return nil, err } - fmt.Printf("boolean mask: %v\n", booleanMask) boolArr, ok := booleanMask.(*array.Boolean) // impossible for this to not be a boolean array,assuming validPredicates works as it should if !ok { return nil, errors.New("predicate did not evaluate to boolean array") diff --git a/src/Backend/opti-sql-go/operators/test/t1_test.go b/src/Backend/opti-sql-go/operators/test/t1_test.go index ec99f8d..f2e6e33 100644 --- a/src/Backend/opti-sql-go/operators/test/t1_test.go +++ b/src/Backend/opti-sql-go/operators/test/t1_test.go @@ -2,9 +2,13 @@ package test import ( "errors" + "fmt" "io" + "math" "opti-sql-go/Expr" "opti-sql-go/operators" + join "opti-sql-go/operators/Join" + "opti-sql-go/operators/aggr" "opti-sql-go/operators/filter" "opti-sql-go/operators/project" "strings" @@ -202,25 +206,13 @@ func NewIntegrationSource2(mem memory.Allocator) (*project.InMemorySource, error names, cols := generateIntegrationDataset2(mem) return project.NewInMemoryProjectExecFromArrays(names, cols) } -func runAll(t *testing.T, op operators.Operator) *operators.RecordBatch { - t.Helper() - - b, err := op.Next(1000) - if errors.Is(err, io.EOF) { - return nil - } - if err != nil { - t.Fatalf("unexpected err from Next: %v", err) - } - return b -} /* ============================================================================ Project tests ============================================================================ */ -func TestIntegrationProjectExec(t *testing.T) { +func TestProjectExec(t *testing.T) { t.Run("integration_project_exec", func(t *testing.T) { mem := memory.NewGoAllocator() @@ -368,16 +360,14 @@ func TestIntegrationProjectExec(t *testing.T) { }) } -func TestIntegrationFilterExec(t *testing.T) { - mem := memory.NewGoAllocator() - // ----- load dataset ----- - - // convenience handles to original cols for expected-value validation - //ageArr := cols[3].(*array.Int32) - //salaryArr := cols[4].(*array.Float64) - //deptArr := cols[5].(*array.String) - //regionArr := cols[6].(*array.String) +/* +============================================================================ +Filter tests +============================================================================ +*/ +func TestFilterExec(t *testing.T) { + mem := memory.NewGoAllocator() // ---------------------------------------------------------------------- t.Run("filter_age_gt_30", func(t *testing.T) { @@ -502,3 +492,1064 @@ func TestIntegrationFilterExec(t *testing.T) { }) } + +/* +============================================================================ +Sort tests +============================================================================ +*/ +func TestSortTest(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("sort_salary_ascending", func(t *testing.T) { + src, _ := NewIntegrationSource1(mem) + + sortKeys := []aggr.SortKey{ + {Expr: Expr.NewColumnResolve("salary"), Ascending: true}, + } + + sortExec, err := aggr.NewSortExec(src, sortKeys) + if err != nil { + t.Fatalf("failed to create sort exec: %v", err) + } + + batch, err := sortExec.Next(1000) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + + salaryArr := batch.Columns[4].(*array.Float64) + + for i := 1; i < salaryArr.Len(); i++ { + if salaryArr.IsNull(i-1) || salaryArr.IsNull(i) { + continue + } + if salaryArr.Value(i) < salaryArr.Value(i-1) { + t.Fatalf("salary not sorted ASC at row %d: %f < %f", + i, salaryArr.Value(i), salaryArr.Value(i-1)) + } + } + + }) + + // ───────────────────────────────────────────────────────────── + + t.Run("sort_lastname_descending", func(t *testing.T) { + src, _ := NewIntegrationSource1(mem) + + sortKeys := []aggr.SortKey{ + {Expr: Expr.NewColumnResolve("last_name"), Ascending: false}, + } + + sortExec, err := aggr.NewSortExec(src, sortKeys) + if err != nil { + t.Fatalf("failed to create sort exec: %v", err) + } + + batch, err := sortExec.Next(1000) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + + lastArr := batch.Columns[2].(*array.String) + + for i := 1; i < lastArr.Len(); i++ { + if lastArr.IsNull(i-1) || lastArr.IsNull(i) { + continue + } + + // descending → current <= previous + if lastArr.Value(i) > lastArr.Value(i-1) { + t.Fatalf("last_name not sorted DESC at %d: %s > %s", + i, lastArr.Value(i), lastArr.Value(i-1)) + } + } + }) + + // ───────────────────────────────────────────────────────────── + + t.Run("sort_department_then_salary_desc", func(t *testing.T) { + src, _ := NewIntegrationSource1(mem) + + sortKeys := []aggr.SortKey{ + {Expr: Expr.NewColumnResolve("department"), Ascending: true}, // asc + {Expr: Expr.NewColumnResolve("salary"), Ascending: false}, // desc + } + + sortExec, err := aggr.NewSortExec(src, sortKeys) + if err != nil { + t.Fatalf("failed to create sort exec: %v", err) + } + + batch, err := sortExec.Next(1000) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + + deptArr := batch.Columns[5].(*array.String) + salaryArr := batch.Columns[4].(*array.Float64) + + for i := 1; i < deptArr.Len(); i++ { + if deptArr.IsNull(i) || deptArr.IsNull(i-1) { + continue + } + + prevDept := deptArr.Value(i - 1) + currDept := deptArr.Value(i) + + // department ascending grouping + if currDept < prevDept { + t.Fatalf("department not sorted ASC at %d: %s < %s", + i, currDept, prevDept) + } + + // if same department → salary must be descending + if currDept == prevDept { + if !salaryArr.IsNull(i) && !salaryArr.IsNull(i-1) { + if salaryArr.Value(i) > salaryArr.Value(i-1) { + t.Fatalf("salary not DESC within department '%s' at row %d", + currDept, i) + } + } + } + } + }) +} + +/* +============================================================================ +Aggregations tests +============================================================================ +*/ +func TestIntegrationAggregations(t *testing.T) { + t.Run("sum_avg_min_max_salary", func(t *testing.T) { + mem := memory.NewGoAllocator() + + // Load integration dataset + _, cols := generateIntegrationDataset1(mem) + salaryArr := cols[4].(*array.Float64) + + // Expected values + var sum float64 + min := math.MaxFloat64 + max := -math.MaxFloat64 + count := 0 + + for i := 0; i < salaryArr.Len(); i++ { + if salaryArr.IsNull(i) { + continue + } + v := salaryArr.Value(i) + sum += v + count++ + if v < min { + min = v + } + if v > max { + max = v + } + } + avg := sum / float64(count) + + // Build aggregation operator + src, _ := NewIntegrationSource1(mem) + + salCol := Expr.NewColumnResolve("salary") + + agg, err := aggr.NewGlobalAggrExec(src, + []aggr.AggregateFunctions{aggr.NewAggregateFunctions(aggr.Sum, salCol), + aggr.NewAggregateFunctions(aggr.Avg, salCol), + aggr.NewAggregateFunctions(aggr.Min, salCol), + aggr.NewAggregateFunctions(aggr.Max, salCol)}) + if err != nil { + t.Fatalf("aggregation init failed: %v", err) + } + + batch, err := agg.Next(100) + if err != nil { + t.Fatalf("aggregation next failed: %v", err) + } + + // Extract columns from result + sumArr := batch.Columns[0].(*array.Float64) + avgArr := batch.Columns[1].(*array.Float64) + minArr := batch.Columns[2].(*array.Float64) + maxArr := batch.Columns[3].(*array.Float64) + + if sumArr.Value(0) != sum { + t.Fatalf("SUM mismatch: expected %f, got %f", sum, sumArr.Value(0)) + } + if avgArr.Value(0) != avg { + t.Fatalf("AVG mismatch: expected %f, got %f", avg, avgArr.Value(0)) + } + if minArr.Value(0) != min { + t.Fatalf("MIN mismatch: expected %f, got %f", min, minArr.Value(0)) + } + if maxArr.Value(0) != max { + t.Fatalf("MAX mismatch: expected %f, got %f", max, maxArr.Value(0)) + } + }) + + // ───────────────────────────────────────────────────────────── + + t.Run("sum_age", func(t *testing.T) { + mem := memory.NewGoAllocator() + _, cols := generateIntegrationDataset1(mem) + ageArr := cols[3].(*array.Int32) + + // Expected SUM(age) + var sum int32 + for i := 0; i < ageArr.Len(); i++ { + if !ageArr.IsNull(i) { + sum += ageArr.Value(i) + } + } + + src, _ := NewIntegrationSource1(mem) + + agg, err := aggr.NewGlobalAggrExec( + src, + []aggr.AggregateFunctions{ + aggr.NewAggregateFunctions( + aggr.Sum, Expr.NewColumnResolve("age")), + }, + ) + if err != nil { + t.Fatalf("agg init failed: %v", err) + } + + batch, _ := agg.Next(100) + sumArr := batch.Columns[0].(*array.Float64) // SUM(int32) -> int64 + + if sumArr.Value(0) != float64(sum) { + t.Fatalf("SUM(age) mismatch: expected %v, got %v", sum, sumArr.Value(0)) + } + }) + + // ───────────────────────────────────────────────────────────── + + t.Run("min_max_age", func(t *testing.T) { + mem := memory.NewGoAllocator() + _, cols := generateIntegrationDataset1(mem) + ageArr := cols[3].(*array.Int32) + + min := int32(math.MaxInt32) + max := int32(math.MinInt32) + + for i := 0; i < ageArr.Len(); i++ { + if ageArr.IsNull(i) { + continue + } + v := ageArr.Value(i) + if v < min { + min = v + } + if v > max { + max = v + } + } + + src, _ := NewIntegrationSource1(mem) + + agg, err := aggr.NewGlobalAggrExec(src, + []aggr.AggregateFunctions{ + aggr.NewAggregateFunctions(aggr.Min, Expr.NewColumnResolve("age")), + aggr.NewAggregateFunctions(aggr.Max, Expr.NewColumnResolve("age")), + }) + if err != nil { + t.Fatalf("agg init failed: %v", err) + } + + batch, _ := agg.Next(100) + + minArr := batch.Columns[0].(*array.Float64) + maxArr := batch.Columns[1].(*array.Float64) + + if minArr.Value(0) != float64(min) { + t.Fatalf("MIN(age) mismatch: expected %v, got %v", min, minArr.Value(0)) + } + if maxArr.Value(0) != float64(max) { + t.Fatalf("MAX(age) mismatch: expected %v, got %v", max, maxArr.Value(0)) + } + }) +} + +/* +============================================================================ +Group-by tests +============================================================================ +*/ + +func TestGroupByExec(t *testing.T) { + mem := memory.NewGoAllocator() + + // Utility helper to get origin dataset quickly + _, originCols := generateIntegrationDataset1(mem) + + // ------------------------------------------------------------ + t.Run("group_by_department_count", func(t *testing.T) { + src, _ := NewIntegrationSource1(mem) + + dept := Expr.NewColumnResolve("department") + + groupByExpr := []Expr.Expression{dept} + aggs := []aggr.AggregateFunctions{ + {AggrFunc: aggr.Count, Child: Expr.NewColumnResolve("id")}, + } + + gb, err := aggr.NewGroupByExec(src, aggs, groupByExpr) + if err != nil { + t.Fatalf("gb init failed: %v", err) + } + + batch, err := gb.Next(1024) + if err != nil { + t.Fatalf("group by Next failed: %v", err) + } + + deptCol := batch.Columns[0].(*array.String) + countCol := batch.Columns[1].(*array.Float64) // count returns float64 in your impl + + // Validate counts by manually counting departments + origDept := originCols[5].(*array.String) + expected := make(map[string]int) + + for i := 0; i < origDept.Len(); i++ { + if origDept.IsNull(i) { + expected["NULL"]++ + } else { + expected[origDept.Value(i)]++ + } + } + + for i := 0; i < int(batch.RowCount); i++ { + key := "NULL" + if !deptCol.IsNull(i) { + key = deptCol.Value(i) + } + got := int(countCol.Value(i)) + want := expected[key] + + if got != want { + t.Fatalf("group %s: expected %d, got %d", key, want, got) + } + } + }) + + // ------------------------------------------------------------ + t.Run("group_by_department_region_sum_salary", func(t *testing.T) { + src, _ := NewIntegrationSource1(mem) + + dept := Expr.NewColumnResolve("department") + region := Expr.NewColumnResolve("region") + + groupByExpr := []Expr.Expression{dept, region} + aggs := []aggr.AggregateFunctions{ + {AggrFunc: aggr.Sum, Child: Expr.NewColumnResolve("salary")}, + } + + gb, err := aggr.NewGroupByExec(src, aggs, groupByExpr) + if err != nil { + t.Fatalf("init failed: %v", err) + } + + batch, err := gb.Next(1024) + if err != nil { + t.Fatalf("Next failed: %v", err) + } + + deptCol := batch.Columns[0].(*array.String) + regionCol := batch.Columns[1].(*array.String) + sumCol := batch.Columns[2].(*array.Float64) + + origDept := originCols[5].(*array.String) + origRegion := originCols[6].(*array.String) + origSalary := originCols[4].(*array.Float64) + + expected := make(map[string]float64) + + for i := 0; i < origSalary.Len(); i++ { + d := "NULL" + if !origDept.IsNull(i) { + d = origDept.Value(i) + } + + r := "NULL" + if !origRegion.IsNull(i) { + r = origRegion.Value(i) + } + + key := d + "|" + r + expected[key] += origSalary.Value(i) + } + + for i := 0; i < int(batch.RowCount); i++ { + d := "NULL" + if !deptCol.IsNull(i) { + d = deptCol.Value(i) + } + + r := "NULL" + if !regionCol.IsNull(i) { + r = regionCol.Value(i) + } + + key := d + "|" + r + got := sumCol.Value(i) + want := expected[key] + + if got != want { + t.Fatalf("(%s,%s): expected sum=%f, got %f", d, r, want, got) + } + } + }) + + // ------------------------------------------------------------ + t.Run("group_by_with_null_keys", func(t *testing.T) { + mem := memory.NewGoAllocator() + src, _ := NewIntegrationSource1(mem) + + region := Expr.NewColumnResolve("region") + + groupByExpr := []Expr.Expression{region} + aggs := []aggr.AggregateFunctions{ + {AggrFunc: aggr.Count, Child: Expr.NewColumnResolve("id")}, + } + + gb, _ := aggr.NewGroupByExec(src, aggs, groupByExpr) + + batch, err := gb.Next(1024) + if err != nil { + t.Fatalf("Next failed: %v", err) + } + + regionCol := batch.Columns[0].(*array.String) + countCol := batch.Columns[1].(*array.Float64) + + origRegion := originCols[6].(*array.String) + expected := make(map[string]int) + + for i := 0; i < origRegion.Len(); i++ { + key := "NULL" + if !origRegion.IsNull(i) { + key = origRegion.Value(i) + } + expected[key]++ + } + + for i := 0; i < int(batch.RowCount); i++ { + k := "NULL" + if !regionCol.IsNull(i) { + k = regionCol.Value(i) + } + + got := int(countCol.Value(i)) + want := expected[k] + + if got != want { + t.Fatalf("region=%s expected %d got %d", k, want, got) + } + } + }) +} + +/* +============================================================================ +Having tests +============================================================================ +*/ +func TestHavingExec(t *testing.T) { + mem := memory.NewGoAllocator() + + // helper — build group by department avg salary + buildDeptAvg := func() operators.Operator { + src, _ := NewIntegrationSource1(mem) + + aggs := []aggr.AggregateFunctions{ + {AggrFunc: aggr.Avg, Child: Expr.NewColumnResolve("salary")}, + } + + gb, _ := aggr.NewGroupByExec(src, aggs, + []Expr.Expression{Expr.NewColumnResolve("department")}, + ) + return gb + } + + // ------------------------------------------------------------ + t.Run("having_avg_salary_gt_75000", func(t *testing.T) { + gb := buildDeptAvg() + + having := Expr.NewBinaryExpr( + Expr.NewColumnResolve("avg_Column(salary)"), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(75000)), + ) + + hv, _ := aggr.NewHavingExec(gb, having) + fmt.Printf("\t%v\n", hv.Schema()) + batch, err := hv.Next(500) + if err != nil { + t.Fatalf("having next failed: %v", err) + } + t.Logf("batch:\t%v\n", batch.PrettyPrint()) + + deptCol := batch.Columns[0].(*array.String) + avgCol := batch.Columns[1].(*array.Float64) + + for i := 0; i < int(batch.RowCount); i++ { + if avgCol.Value(i) <= 75000 { + t.Fatalf("expected avg > 75k, got %f for dept %s", + avgCol.Value(i), deptCol.Value(i)) + } + } + }) + + // ------------------------------------------------------------ + t.Run("having_no_group_passes", func(t *testing.T) { + gb := buildDeptAvg() + + having := Expr.NewBinaryExpr( + Expr.NewColumnResolve("avg_Column(salary)"), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(999999)), + ) + + hv, _ := aggr.NewHavingExec(gb, having) + batch, _ := hv.Next(100) + + if batch.RowCount != 0 { + t.Fatalf("expected empty result") + } + }) + + // ------------------------------------------------------------ + t.Run("having_everything_passes", func(t *testing.T) { + gb := buildDeptAvg() + + having := Expr.NewBinaryExpr( + Expr.NewColumnResolve("avg_Column(salary)"), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(0)), + ) + + hv, _ := aggr.NewHavingExec(gb, having) + batch, _ := hv.Next(1000) + + if batch.RowCount == 0 { + t.Fatalf("expected some rows") + } + }) +} + +/* +============================================================================ +Distinct tests +============================================================================ +*/ +func TestDistinctExec(t *testing.T) { + mem := memory.NewGoAllocator() + + // Utility: load dataset + names, cols := generateIntegrationDataset1(mem) + src, err := project.NewInMemoryProjectExecFromArrays(names, cols) + if err != nil { + t.Fatalf("failed to create source: %v", err) + } + + // ------------------------------- + // 1) DISTINCT on department + // ------------------------------- + t.Run("distinct_department", func(t *testing.T) { + expr := Expr.NewExpressions( + Expr.NewColumnResolve("department"), + ) + + de, err := filter.NewDistinctExec(src, expr) + if err != nil { + t.Fatalf("distinct init failed: %v", err) + } + + batch, err := de.Next(100) + if err != nil { + t.Fatalf("distinct next failed: %v", err) + } + + //deptArr := batch.Columns[5].(*array.String) + + // get expected unique departments from original dataset + origDept := cols[5].(*array.String) + expected := make(map[string]struct{}) + for i := 0; i < origDept.Len(); i++ { + if origDept.IsNull(i) { + expected["NULL"] = struct{}{} + } else { + expected[origDept.Value(i)] = struct{}{} + } + } + + if int(batch.RowCount) != len(expected) { + t.Fatalf("expected %d distinct departments, got %d", + len(expected), batch.RowCount) + } + }) + + // ------------------------------- + // 2) DISTINCT on region + // ------------------------------- + t.Run("distinct_region", func(t *testing.T) { + // reload source (distinct consumes input) + src2, _ := project.NewInMemoryProjectExecFromArrays(names, cols) + + expr := Expr.NewExpressions( + Expr.NewColumnResolve("region"), + ) + + de, err := filter.NewDistinctExec(src2, expr) + if err != nil { + t.Fatalf("distinct init failed: %v", err) + } + + batch, err := de.Next(100) + if err != nil { + t.Fatalf("distinct next failed: %v", err) + } + + regionArr := batch.Columns[6].(*array.String) + + orig := cols[6].(*array.String) + expected := make(map[string]struct{}) + for i := 0; i < orig.Len(); i++ { + if orig.IsNull(i) { + expected["NULL"] = struct{}{} + } else { + expected[orig.Value(i)] = struct{}{} + } + } + + if int(regionArr.Len()) != len(expected) { + t.Fatalf("expected %d distinct regions, got %d", + len(expected), regionArr.Len()) + } + }) + + // ------------------------------- + // 3) DISTINCT(id) → should return all 20 rows + // ------------------------------- + t.Run("distinct_id_all_unique", func(t *testing.T) { + src3, _ := project.NewInMemoryProjectExecFromArrays(names, cols) + + expr := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + ) + + de, err := filter.NewDistinctExec(src3, expr) + if err != nil { + t.Fatalf("distinct init failed: %v", err) + } + + batch, err := de.Next(100) + if err != nil { + t.Fatalf("distinct next failed: %v", err) + } + + if batch.RowCount != 20 { + t.Fatalf("expected 20 distinct id rows, got %d", batch.RowCount) + } + }) +} + +/* +============================================================================ +Limit tests +============================================================================ +*/ +func TestLimitExec(t *testing.T) { + mem := memory.NewGoAllocator() + names, cols := generateIntegrationDataset1(mem) + + // ---------------------------------- + // 1) LIMIT 5 + // ---------------------------------- + t.Run("limit_5", func(t *testing.T) { + src, _ := project.NewInMemoryProjectExecFromArrays(names, cols) + + lim, err := filter.NewLimitExec(src, 5) + if err != nil { + t.Fatalf("limit init failed: %v", err) + } + + batch, err := lim.Next(100) + if err != nil { + t.Fatalf("limit next error: %v", err) + } + + if batch.RowCount != 5 { + t.Fatalf("expected 5 rows, got %d", batch.RowCount) + } + + // verify first 5 IDs match original dataset + idArr := batch.Columns[0].(*array.Int32) + origID := cols[0].(*array.Int32) + + for i := 0; i < 5; i++ { + if idArr.Value(i) != origID.Value(i) { + t.Fatalf("row %d: expected id=%d, got id=%d", + i, origID.Value(i), idArr.Value(i)) + } + } + }) + + // ---------------------------------- + // 2) LIMIT EXACT = 20 + // ---------------------------------- + t.Run("limit_exact", func(t *testing.T) { + src, _ := project.NewInMemoryProjectExecFromArrays(names, cols) + + lim, err := filter.NewLimitExec(src, 20) + if err != nil { + t.Fatalf("limit init failed: %v", err) + } + + batch, err := lim.Next(100) + if err != nil { + t.Fatalf("limit error: %v", err) + } + + if batch.RowCount != 20 { + t.Fatalf("expected 20 rows, got %d", batch.RowCount) + } + }) + + // ---------------------------------- + // 3) LIMIT larger than dataset + // ---------------------------------- + t.Run("limit_too_large", func(t *testing.T) { + src, _ := project.NewInMemoryProjectExecFromArrays(names, cols) + + lim, err := filter.NewLimitExec(src, 50) + if err != nil { + t.Fatalf("limit init failed: %v", err) + } + + batch, err := lim.Next(100) + if err != nil { + t.Fatalf("limit next failed: %v", err) + } + + if batch.RowCount != 20 { + t.Fatalf("expected 20 rows when limit > dataset size, got %d", batch.RowCount) + } + }) +} + +/* +============================================================================ +Scalar function tests +============================================================================ +*/ +func TestScalarStringFunctions(t *testing.T) { + mem := memory.NewGoAllocator() + + // We will run: SELECT department, UPPER(department), LOWER(department) + // Using ScalarFunction(Upper, col("department")) + // And ScalarFunction(Lower, col("department")) + + t.Run("UpperFunction", func(t *testing.T) { + src, _ := NewIntegrationSource1(mem) + colDept := Expr.NewColumnResolve("department") + + upperExpr := Expr.NewScalarFunction(Expr.Upper, colDept) + + // Evaluate: UPPER(department) + batch, err := src.Next(100) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + arr, err := Expr.EvalScalarFunction(upperExpr, batch) + if err != nil { + t.Fatalf("upper eval failed: %v", err) + } + + out := arr.(*array.String) + + // Compare with strings.ToUpper + deptCol, _ := Expr.EvalExpression(colDept, batch) + deptArr := deptCol.(*array.String) + + for i := 0; i < int(out.Len()); i++ { + if deptArr.IsNull(i) { + if !out.IsNull(i) { + t.Fatalf("expected null at %d", i) + } + continue + } + expected := strings.ToUpper(deptArr.Value(i)) + if out.Value(i) != expected { + t.Fatalf("UPPER mismatch at row %d: got %s, expected %s", + i, out.Value(i), expected) + } + } + }) + + t.Run("LowerFunction", func(t *testing.T) { + src, _ := NewIntegrationSource1(mem) + colDept := Expr.NewColumnResolve("department") + + lowerExpr := Expr.NewScalarFunction(Expr.Lower, colDept) + + // Evaluate: LOWER(department) + batch, err := src.Next(100) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + arr, err := Expr.EvalScalarFunction(lowerExpr, batch) + if err != nil { + t.Fatalf("lower eval failed: %v", err) + } + + out := arr.(*array.String) + + deptCol, _ := Expr.EvalExpression(colDept, batch) + deptArr := deptCol.(*array.String) + + for i := 0; i < int(out.Len()); i++ { + if deptArr.IsNull(i) { + if !out.IsNull(i) { + t.Fatalf("expected null at %d", i) + } + continue + } + expected := strings.ToLower(deptArr.Value(i)) + if out.Value(i) != expected { + t.Fatalf("LOWER mismatch at row %d: got %s, expected %s", + i, out.Value(i), expected) + } + } + }) + t.Run("Abs", func(t *testing.T) { + src, _ := NewIntegrationSource1(mem) + + fn := Expr.NewScalarFunction(Expr.Abs, Expr.NewColumnResolve("salary")) + exec, err := project.NewProjectExec(src, []Expr.Expression{fn}) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + batch, err := exec.Next(50) + if err != nil { + t.Fatalf("exec failed: %v", err) + } + + out := batch.Columns[0].(*array.Float64) + + for i := 0; i < out.Len(); i++ { + val := out.Value(i) + if val < 0 { + t.Fatalf("abs result should never be negative, got %v", val) + } + } + }) + + // ───────────────────────────────────────────── + // ROUND(salary) + // ───────────────────────────────────────────── + t.Run("Round", func(t *testing.T) { + src, _ := NewIntegrationSource1(mem) + _, col := generateIntegrationDataset1(mem) + + fn := Expr.NewScalarFunction(Expr.Round, Expr.NewColumnResolve("salary")) + exec, err := project.NewProjectExec(src, []Expr.Expression{fn}) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + batch, err := exec.Next(50) + if err != nil { + t.Fatalf("exec failed: %v", err) + } + + out := batch.Columns[0].(*array.Float64) + orig := col[4].(*array.Float64) // salary column + + for i := 0; i < out.Len(); i++ { + expected := math.Round(orig.Value(i)) + got := out.Value(i) + + if expected != got { + t.Fatalf("round mismatch at %d: expected=%v got=%v", i, expected, got) + } + } + }) +} + +/* +============================================================================ +Hash join tests +============================================================================ +*/ +func TestHashJoinExec(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("InnerJoin_SimpleDept", func(t *testing.T) { + src1, _ := NewIntegrationSource1(mem) + src2, _ := NewIntegrationSource2(mem) + + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("department")}, + []Expr.Expression{Expr.NewColumnResolve("department")}, + ) + + j, err := join.NewHashJoinExec(src1, src2, clause, join.InnerJoin, nil) + if err != nil { + t.Fatalf("inner join init failed: %v", err) + } + + batch, err := j.Next(1000) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + if batch.RowCount == 0 { + t.Fatalf("inner join returned zero rows (expected matches)") + } + }) + + t.Run("LeftJoin_AllLeftPreserved", func(t *testing.T) { + src1, _ := NewIntegrationSource1(mem) + src2, _ := NewIntegrationSource2(mem) + + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("region")}, + []Expr.Expression{Expr.NewColumnResolve("region")}, + ) + + j, err := join.NewHashJoinExec(src1, src2, clause, join.LeftJoin, nil) + if err != nil { + t.Fatalf("left join init failed: %v", err) + } + + batch, err := j.Next(1000) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + if batch.RowCount < 20 { + t.Fatalf("left join should preserve all 20 left rows, got %d", batch.RowCount) + } + }) + + t.Run("RightJoin_AllRightPreserved", func(t *testing.T) { + src1, _ := NewIntegrationSource1(mem) + src2, _ := NewIntegrationSource2(mem) + + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("region")}, + []Expr.Expression{Expr.NewColumnResolve("region")}, + ) + + j, err := join.NewHashJoinExec(src1, src2, clause, join.RightJoin, nil) + if err != nil { + t.Fatalf("right join init failed: %v", err) + } + + batch, err := j.Next(1000) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + if batch.RowCount < 20 { + t.Fatalf("right join should preserve all 20 right rows, got %d", batch.RowCount) + } + }) + + t.Run("InnerJoin_NoMatches", func(t *testing.T) { + src1, _ := NewIntegrationSource1(mem) + src2, _ := NewIntegrationSource2(mem) + + // Join on unrelated keys → expect zero matches + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("age")}, + []Expr.Expression{Expr.NewColumnResolve("dept_id")}, + ) + + j, err := join.NewHashJoinExec(src1, src2, clause, join.InnerJoin, nil) + if err != nil { + t.Fatalf("inner join init failed: %v", err) + } + + batch, err := j.Next(1000) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + if batch.RowCount != 0 { + t.Fatalf("expected zero matches, got %d", batch.RowCount) + } + }) + + t.Run("MultiColumnJoin", func(t *testing.T) { + src1, _ := NewIntegrationSource1(mem) + src2, _ := NewIntegrationSource2(mem) + + clause := join.NewJoinClause( + []Expr.Expression{ + Expr.NewColumnResolve("department"), + Expr.NewColumnResolve("region"), + }, + []Expr.Expression{ + Expr.NewColumnResolve("department"), + Expr.NewColumnResolve("region"), + }, + ) + + j, err := join.NewHashJoinExec(src1, src2, clause, join.InnerJoin, nil) + if err != nil { + t.Fatalf("multi-col join init failed: %v", err) + } + + batch, err := j.Next(1000) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + if batch.RowCount == 0 { + t.Fatalf("multi-column join should match some rows") + } + }) + + t.Run("InnerJoin_CheckSchemaPrefixed", func(t *testing.T) { + src1, _ := NewIntegrationSource1(mem) + src2, _ := NewIntegrationSource2(mem) + + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("department")}, + []Expr.Expression{Expr.NewColumnResolve("department")}, + ) + + j, err := join.NewHashJoinExec(src1, src2, clause, join.InnerJoin, nil) + if err != nil { + t.Fatalf("join init failed: %v", err) + } + + schema := j.Schema() + + // Check prefixing (department exists on both sides) + foundLeft := false + foundRight := false + + for _, f := range schema.Fields() { + if f.Name == "left_department" { + foundLeft = true + } + if f.Name == "right_department" { + foundRight = true + } + } + + if !foundLeft || !foundRight { + t.Fatalf("schema prefixing failed: left_department=%v right_department=%v", foundLeft, foundRight) + } + }) +} From 158f3512eb8bb5c3df6d338bd3a9ff66677e2c76 Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Thu, 4 Dec 2025 21:42:37 -0500 Subject: [PATCH 19/21] Documentation: Implement first wave of intergration test --- .gitignore | 2 + src/Backend/opti-sql-go/Expr/expr.go | 48 +- src/Backend/opti-sql-go/Expr/expr_test.go | 82 +- .../opti-sql-go/operators/aggr/having_test.go | 8 +- .../opti-sql-go/operators/filter/filter.go | 164 ++- .../operators/filter/filter_test.go | 61 +- .../opti-sql-go/operators/filter/limit.go | 2 - .../operators/project/projectExecExpr_test.go | 24 +- .../operators/test/intergration_test.go | 340 ++++++ .../opti-sql-go/operators/test/t1_test.go | 17 +- .../csv/intergration_test_data_1.csv | 1001 +++++++++++++++++ .../csv/intergration_test_data_2.csv | 1001 +++++++++++++++++ 12 files changed, 2657 insertions(+), 93 deletions(-) create mode 100644 src/Backend/opti-sql-go/operators/test/intergration_test.go create mode 100644 src/Backend/test_data/csv/intergration_test_data_1.csv create mode 100644 src/Backend/test_data/csv/intergration_test_data_2.csv diff --git a/.gitignore b/.gitignore index 3970ccd..6ea7b8b 100644 --- a/.gitignore +++ b/.gitignore @@ -107,6 +107,8 @@ src/Backend/test_data/json # Allow a specific CSV dataset that we want tracked despite the general csv ignores !src/Backend/test_data/csv/ !src/Backend/test_data/csv/Mental_Health_and_Social_Media_Balance_Dataset.csv +!src/Backend/test_data/csv/intergration_test_data_1.csv +!src/Backend/test_data/csv/intergration_test_data_2.csv # allow parquet file !src/Backend/test_data/parquet/ !src/Backend/test_data/parquet/capitals_clean.parquet \ No newline at end of file diff --git a/src/Backend/opti-sql-go/Expr/expr.go b/src/Backend/opti-sql-go/Expr/expr.go index 8b6fb03..665990d 100644 --- a/src/Backend/opti-sql-go/Expr/expr.go +++ b/src/Backend/opti-sql-go/Expr/expr.go @@ -219,7 +219,52 @@ type LiteralResolve struct { } func NewLiteralResolve(Type arrow.DataType, Value any) *LiteralResolve { - return &LiteralResolve{Type: Type, Value: Value} + var castVal any + + switch v := Value.(type) { + + // ------------------------------------------------------ + // INT → cast based on Arrow integer type + // ------------------------------------------------------ + case int: + switch Type.ID() { + case arrow.INT8: + castVal = int8(v) + case arrow.INT16: + castVal = int16(v) + case arrow.INT32: + castVal = int32(v) + case arrow.INT64: + castVal = int64(v) + case arrow.UINT8: + castVal = uint8(v) + case arrow.UINT16: + castVal = uint16(v) + case arrow.UINT32: + castVal = uint32(v) + case arrow.UINT64: + castVal = uint64(v) + default: + // not an integer Arrow type → store original + castVal = v + } + case string: + castVal = string(v) + case bool: + castVal = bool(v) + case float64: + switch Type.ID() { + case arrow.FLOAT32: + castVal = float32(v) + case arrow.FLOAT64: + castVal = float64(v) + } + default: + fmt.Printf("%v did not match any case, of type %T\n", v, v) + castVal = Value + } + fmt.Printf("sotred as -> %v\t%v\n", Type, castVal) + return &LiteralResolve{Type: Type, Value: castVal} } func EvalLiteral(l *LiteralResolve, batch *operators.RecordBatch) (arrow.Array, error) { n := int(batch.RowCount) @@ -532,7 +577,6 @@ func unpackDatum(d compute.Datum) (arrow.Array, error) { if !ok { return nil, fmt.Errorf("datum %v is not of type array", d) } - fmt.Printf("unpackDatum: array str: \t%v\n", array.String()) return array.MakeArray(), nil } diff --git a/src/Backend/opti-sql-go/Expr/expr_test.go b/src/Backend/opti-sql-go/Expr/expr_test.go index 8eee24e..7f839bc 100644 --- a/src/Backend/opti-sql-go/Expr/expr_test.go +++ b/src/Backend/opti-sql-go/Expr/expr_test.go @@ -1125,7 +1125,7 @@ func TestInferScalarFunctionType(t *testing.T) { // test constructor methods for expressions func TestExprInitMethods(t *testing.T) { t.Run("New Alias", func(t *testing.T) { - literal := NewLiteralResolve(arrow.BinaryTypes.String, string("the golfer")) + literal := NewLiteralResolve(arrow.BinaryTypes.String, "the golfer") a := NewAlias(literal, "nickname") if a == nil { t.Fatalf("failed to create Alias expression") @@ -1138,35 +1138,35 @@ func TestExprInitMethods(t *testing.T) { } }) t.Run("New LiteralResolve", func(t *testing.T) { - lit := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(42)) + lit := NewLiteralResolve(arrow.PrimitiveTypes.Int32, 42) if lit == nil { t.Fatalf("failed to create LiteralResolve expression") } }) t.Run("New BinaryExpr", func(t *testing.T) { - left := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(10)) - right := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(5)) + left := NewLiteralResolve(arrow.PrimitiveTypes.Int32, 10) + right := NewLiteralResolve(arrow.PrimitiveTypes.Int32, 5) be := NewBinaryExpr(left, Addition, right) if be == nil { t.Fatalf("failed to create BinaryExpr expression") } }) t.Run("New ScalarFunc", func(t *testing.T) { - arg := NewLiteralResolve(arrow.BinaryTypes.String, string("hello")) + arg := NewLiteralResolve(arrow.BinaryTypes.String, "hello") sf := NewScalarFunction(Upper, arg) if sf == nil { t.Fatalf("failed to create ScalarFunction expression") } }) t.Run("New CastExpr", func(t *testing.T) { - expr := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(100)) + expr := NewLiteralResolve(arrow.PrimitiveTypes.Int32, 100) ce := NewCastExpr(expr, arrow.PrimitiveTypes.Float64) if ce == nil { t.Fatalf("failed to create CastExpr expression") } }) t.Run("New Expressions", func(t *testing.T) { - literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(7)) + literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, 7) cr := NewColumnResolve("score") left := NewBinaryExpr(literal, Multiplication, cr) sf := NewScalarFunction(Abs, left) @@ -1183,7 +1183,7 @@ func TestExprInitMethods(t *testing.T) { func TestFilterBinaryExpr(t *testing.T) { t.Run("age == 22", func(t *testing.T) { rc := generateTestColumns() //4 - literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(22)) + literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, (22)) col := NewColumnResolve("age") be := NewBinaryExpr(col, Equal, literal) arr, err := EvalExpression(be, rc) @@ -1204,7 +1204,7 @@ func TestFilterBinaryExpr(t *testing.T) { }) t.Run("age != 22", func(t *testing.T) { rc := generateTestColumns() - literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(22)) + literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, (22)) col := NewColumnResolve("age") be := NewBinaryExpr(col, NotEqual, literal) @@ -1227,7 +1227,7 @@ func TestFilterBinaryExpr(t *testing.T) { }) t.Run("age < 34", func(t *testing.T) { rc := generateTestColumns() - literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(34)) + literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, (34)) col := NewColumnResolve("age") be := NewBinaryExpr(col, LessThan, literal) @@ -1250,7 +1250,7 @@ func TestFilterBinaryExpr(t *testing.T) { }) t.Run("age <= 34", func(t *testing.T) { rc := generateTestColumns() - literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(34)) + literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, (34)) col := NewColumnResolve("age") be := NewBinaryExpr(col, LessThanOrEqual, literal) @@ -1273,7 +1273,7 @@ func TestFilterBinaryExpr(t *testing.T) { }) t.Run("age > 30", func(t *testing.T) { rc := generateTestColumns() - literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30)) + literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, (30)) col := NewColumnResolve("age") be := NewBinaryExpr(col, GreaterThan, literal) @@ -1296,7 +1296,7 @@ func TestFilterBinaryExpr(t *testing.T) { }) t.Run("age >= 34", func(t *testing.T) { rc := generateTestColumns() - literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(34)) + literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, (34)) col := NewColumnResolve("age") be := NewBinaryExpr(col, GreaterThanOrEqual, literal) @@ -1323,7 +1323,7 @@ func TestFilterBinaryExpr(t *testing.T) { left := NewBinaryExpr( NewColumnResolve("age"), GreaterThan, - NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30)), + NewLiteralResolve(arrow.PrimitiveTypes.Int32, (30)), ) right := NewBinaryExpr( @@ -1353,7 +1353,7 @@ func TestFilterBinaryExpr(t *testing.T) { left := NewBinaryExpr( NewColumnResolve("age"), LessThan, - NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30)), + NewLiteralResolve(arrow.PrimitiveTypes.Int32, (30)), ) right := NewBinaryExpr( @@ -1598,7 +1598,7 @@ func TestLikeOperatorSQL(t *testing.T) { t.Run("name ends with d", func(t *testing.T) { rc := generateTestColumns() sqlStatment := "%d" - whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, string(sqlStatment))) + whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, (sqlStatment))) boolMask, err := EvalExpression(whereStatment, rc) if err != nil { @@ -1818,3 +1818,53 @@ func TestNullCheckExpr(t *testing.T) { } }) } + +func TestLiteralCast(t *testing.T) { + + tests := []struct { + name string + dtype arrow.DataType + value any + rowCount uint64 + }{ + // ---- INT CASTS ---- + {"Int8 literal", arrow.PrimitiveTypes.Int8, 5, 3}, + {"Int16 literal", arrow.PrimitiveTypes.Int16, 5, 3}, + {"Int32 literal", arrow.PrimitiveTypes.Int32, 5, 3}, + {"Int64 literal", arrow.PrimitiveTypes.Int64, 5, 3}, + {"Uint8 literal", arrow.PrimitiveTypes.Uint8, 5, 3}, + {"Uint16 literal", arrow.PrimitiveTypes.Uint16, 5, 3}, + {"Uint32 literal", arrow.PrimitiveTypes.Uint32, 5, 3}, + {"Uint64 literal", arrow.PrimitiveTypes.Uint64, 5, 3}, + + // ---- FLOAT CASTS ---- + {"Float32 literal", arrow.PrimitiveTypes.Float32, 23.5, 4}, + {"Float64 literal", arrow.PrimitiveTypes.Float64, 23.5, 4}, + + // ---- STRING ---- + {"String literal", arrow.BinaryTypes.String, "hello", 2}, + + // ---- BOOL ---- + {"Bool literal", arrow.FixedWidthTypes.Boolean, true, 5}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + lit := NewLiteralResolve(tt.dtype, tt.value) + + batch := &operators.RecordBatch{ + RowCount: tt.rowCount, + } + + arr, err := EvalExpression(lit, batch) + if err != nil { + t.Fatalf("EvalExpression failed: %v", err) + } + defer arr.Release() + + if !arrow.TypeEqual(arr.DataType(), tt.dtype) { + t.Fatalf("expected Arrow type %v but got %v", tt.dtype, arr.DataType()) + } + }) + } +} diff --git a/src/Backend/opti-sql-go/operators/aggr/having_test.go b/src/Backend/opti-sql-go/operators/aggr/having_test.go index 9321639..45275b2 100644 --- a/src/Backend/opti-sql-go/operators/aggr/having_test.go +++ b/src/Backend/opti-sql-go/operators/aggr/having_test.go @@ -37,7 +37,7 @@ func TestHavingExec_OnGroupBy(t *testing.T) { havingExpr := Expr.NewBinaryExpr( Expr.NewColumnResolve(sumCol), Expr.GreaterThan, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(600000)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 600000.0), ) having, err := NewHavingExec(gb, havingExpr) @@ -81,7 +81,7 @@ func TestHavingExec_OnGroupBy(t *testing.T) { havingExpr := Expr.NewBinaryExpr( Expr.NewColumnResolve(countCol), Expr.GreaterThanOrEqual, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(10)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 10.0), ) having, err := NewHavingExec(gb, havingExpr) @@ -119,7 +119,7 @@ func TestHavingExec_OnGroupBy(t *testing.T) { havingExpr := Expr.NewBinaryExpr( Expr.NewColumnResolve(sumCol), Expr.GreaterThan, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(1_000_000_000)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 1_000_000_000.0), ) having, _ := NewHavingExec(gb, havingExpr) @@ -180,7 +180,7 @@ func TestHavingExec_OnGroupBy(t *testing.T) { havingExpr := Expr.NewBinaryExpr( Expr.NewColumnResolve(countCol), Expr.GreaterThan, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(0)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 0.0), ) h, _ := NewHavingExec(gb, havingExpr) diff --git a/src/Backend/opti-sql-go/operators/filter/filter.go b/src/Backend/opti-sql-go/operators/filter/filter.go index 8beb80f..a476ac8 100644 --- a/src/Backend/opti-sql-go/operators/filter/filter.go +++ b/src/Backend/opti-sql-go/operators/filter/filter.go @@ -11,6 +11,7 @@ import ( "github.com/apache/arrow/go/v17/arrow" "github.com/apache/arrow/go/v17/arrow/array" "github.com/apache/arrow/go/v17/arrow/compute" + "github.com/apache/arrow/go/v17/arrow/memory" ) var ( @@ -23,6 +24,9 @@ type FilterExec struct { schema *arrow.Schema predicate Expr.Expression done bool + // + bufferedCols []arrow.Array // not yet returned + bufferedSize int64 } func NewFilterExec(input operators.Operator, pred Expr.Expression) (*FilterExec, error) { @@ -30,52 +34,84 @@ func NewFilterExec(input operators.Operator, pred Expr.Expression) (*FilterExec, return nil, errors.New("predicates passed to FilterExec are invalid") } return &FilterExec{ - input: input, - predicate: pred, - schema: input.Schema(), + input: input, + predicate: pred, + schema: input.Schema(), + bufferedCols: make([]arrow.Array, input.Schema().NumFields()), }, nil } func (f *FilterExec) Next(n uint16) (*operators.RecordBatch, error) { - if n == 0 { - return nil, errors.New("must pass in wanted batch size > 0") - } - if f.done { + if f.done && f.bufferedSize == 0 { return nil, io.EOF } - childBatch, err := f.input.Next(n) - if err != nil { - if errors.Is(err, io.EOF) { - f.done = true - return nil, io.EOF + mem := memory.NewGoAllocator() + for f.bufferedSize < int64(n) && !f.done { + childBatch, err := f.input.Next(n) + if err != nil { + if errors.Is(err, io.EOF) { + f.done = true + break // might be some in the buffer still + } + return nil, err } - return nil, err - } - booleanMask, err := Expr.EvalExpression(f.predicate, childBatch) - if err != nil { - return nil, err - } - boolArr, ok := booleanMask.(*array.Boolean) // impossible for this to not be a boolean array,assuming validPredicates works as it should - if !ok { - return nil, errors.New("predicate did not evaluate to boolean array") - } - filteredCol := make([]arrow.Array, len(childBatch.Columns)) - for i, col := range childBatch.Columns { - filteredCol[i], err = ApplyBooleanMask(col, boolArr) + booleanMask, err := Expr.EvalExpression(f.predicate, childBatch) if err != nil { return nil, err } + boolArr, ok := booleanMask.(*array.Boolean) // impossible for this to not be a boolean array,assuming validPredicates works as it should + if !ok { + return nil, errors.New("predicate did not evaluate to boolean array") + } + filteredCol := make([]arrow.Array, len(childBatch.Columns)) + for i, col := range childBatch.Columns { + filteredCol[i], err = ApplyBooleanMask(col, boolArr) + if err != nil { + return nil, err + } + } + booleanMask.Release() + // combine with buffered columns + for i, col := range f.bufferedCols { + if col == nil { + f.bufferedCols[i] = filteredCol[i] + continue + } + // otherwise concate old + new + combined, err := array.Concatenate([]arrow.Array{col, filteredCol[i]}, mem) + if err != nil { + return nil, err + } + + // Release old buffer column + col.Release() + + f.bufferedCols[i] = combined + } + if len(childBatch.Columns) > 0 { + size := int64(filteredCol[0].Len()) + f.bufferedSize += int64(size) + } + } + if f.bufferedSize == 0 { + return nil, io.EOF + } + toEmit := min(int64(n), f.bufferedSize) + out, err := f.sliceFilterCols(toEmit, mem) + if err != nil { + return nil, err } - booleanMask.Release() - // release old columns - operators.ReleaseArrays(childBatch.Columns) - size := uint64(filteredCol[0].Len()) - - return &operators.RecordBatch{ - Schema: childBatch.Schema, - Columns: filteredCol, + // subtract emitted rows from buffer; guard against accidental negative values + + size := uint64(out[0].Len()) + + rc := &operators.RecordBatch{ + Schema: f.schema, + Columns: out, RowCount: size, - }, nil + } + return rc, nil } + func (f *FilterExec) Schema() *arrow.Schema { return f.schema } @@ -145,3 +181,63 @@ func validPredicates(pred Expr.Expression, schema *arrow.Schema) bool { return false } } + +func (f *FilterExec) sliceFilterCols(n int64, mem memory.Allocator) ([]arrow.Array, error) { + out := make([]arrow.Array, len(f.bufferedCols)) + + // Build index arrays for: + // 1) rows to emit: 0 .. n-1 + // 2) rows to keep: n .. f.bufferedSize-1 + emitIdx := array.NewInt64Builder(mem) + keepIdx := array.NewInt64Builder(mem) + + total := f.bufferedSize + limit := n + if limit > total { + limit = total + } + + // emit rows [0 , limit) + for i := int64(0); i < limit; i++ { + emitIdx.Append(i) + } + + // keep rows [limit , total) + for i := limit; i < total; i++ { + keepIdx.Append(i) + } + + emitArr := emitIdx.NewArray() + keepArr := keepIdx.NewArray() + emitIdx.Release() + keepIdx.Release() + defer emitArr.Release() + defer keepArr.Release() + + // For each column: materialize output slice + update buffer + for i, col := range f.bufferedCols { + // emit slice + sliceOut, err := compute.TakeArray(context.TODO(), col, emitArr) + if err != nil { + return nil, err + } + out[i] = sliceOut + + // keep remaining slice + keepSlice, err := compute.TakeArray(context.TODO(), col, keepArr) + if err != nil { + return nil, err + } + + // release old buffer column + col.Release() + + // store updated buffer + f.bufferedCols[i] = keepSlice + } + + // update size + f.bufferedSize = total - limit + + return out, nil +} diff --git a/src/Backend/opti-sql-go/operators/filter/filter_test.go b/src/Backend/opti-sql-go/operators/filter/filter_test.go index 8e531c9..9facb8c 100644 --- a/src/Backend/opti-sql-go/operators/filter/filter_test.go +++ b/src/Backend/opti-sql-go/operators/filter/filter_test.go @@ -2,6 +2,7 @@ package filter import ( "errors" + "fmt" "io" "opti-sql-go/Expr" "testing" @@ -24,7 +25,7 @@ func TestFilterInit_1(t *testing.T) { predicate := Expr.NewBinaryExpr( Expr.NewColumnResolve("age"), Expr.GreaterThan, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 30), ) _, err := NewFilterExec(proj, predicate) if err != nil { @@ -50,7 +51,7 @@ func TestFilterInit_1(t *testing.T) { predicate := Expr.NewBinaryExpr( Expr.NewColumnResolve("does_not_exist"), Expr.Equal, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(1)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 1), ) _, err := NewFilterExec(proj, predicate) if err == nil { @@ -75,7 +76,7 @@ func TestFilterExec_BasicPredicates(t *testing.T) { pred := Expr.NewBinaryExpr( Expr.NewColumnResolve("age"), Expr.GreaterThan, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 30), ) f, _ := NewFilterExec(proj, pred) @@ -136,7 +137,7 @@ func TestFilterExec_BasicPredicates(t *testing.T) { pred := Expr.NewBinaryExpr( Expr.NewColumnResolve("salary"), Expr.LessThan, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(60000)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(60000.0)), ) f, _ := NewFilterExec(proj, pred) @@ -199,7 +200,7 @@ func TestFilterExec_EdgeCases(t *testing.T) { pred := Expr.NewBinaryExpr( Expr.NewColumnResolve("age"), Expr.GreaterThan, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(20)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 20), ) f, _ := NewFilterExec(proj, pred) @@ -216,7 +217,7 @@ func TestFilterExec_EdgeCases(t *testing.T) { pred := Expr.NewBinaryExpr( Expr.NewColumnResolve("age"), Expr.GreaterThan, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(0)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 0), ) f, _ := NewFilterExec(proj, pred) @@ -235,18 +236,17 @@ func TestFilterExec_EdgeCases(t *testing.T) { pred := Expr.NewBinaryExpr( Expr.NewColumnResolve("age"), Expr.Equal, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(-1)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, -1), ) f, _ := NewFilterExec(proj, pred) - rb, err := f.Next(20) - if err != nil { - t.Fatalf("unexpected: %v", err) + _, err := f.Next(20) + if err == nil { + t.Fatalf("expected EOF error but got nil") } - - if rb.RowCount != 0 { - t.Fatalf("expected 0 rows, got %d", rb.RowCount) + if !errors.Is(err, io.EOF) { + t.Fatalf("expected EOF error but got %v", err) } }) @@ -269,7 +269,7 @@ func TestFilterExec_EdgeCases(t *testing.T) { func TestFilterExecVariantCase(t *testing.T) { t.Run("filter done", func(t *testing.T) { proj := basicProject() - predicate := Expr.NewBinaryExpr(Expr.NewColumnResolve("age"), Expr.GreaterThan, Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30))) + predicate := Expr.NewBinaryExpr(Expr.NewColumnResolve("age"), Expr.GreaterThan, Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 30)) f, _ := NewFilterExec(proj, predicate) _, err := f.Next(1) if err != nil { @@ -284,7 +284,7 @@ func TestFilterExecVariantCase(t *testing.T) { }) t.Run("filter schema ", func(t *testing.T) { proj := basicProject() - predicate := Expr.NewBinaryExpr(Expr.NewColumnResolve("age"), Expr.GreaterThan, Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30))) + predicate := Expr.NewBinaryExpr(Expr.NewColumnResolve("age"), Expr.GreaterThan, Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 30)) f, _ := NewFilterExec(proj, predicate) t.Logf("%s", f.Schema()) if !f.schema.Equal(proj.Schema()) { @@ -294,7 +294,7 @@ func TestFilterExecVariantCase(t *testing.T) { }) t.Run("filter close ", func(t *testing.T) { proj := basicProject() - predicate := Expr.NewBinaryExpr(Expr.NewColumnResolve("age"), Expr.GreaterThan, Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30))) + predicate := Expr.NewBinaryExpr(Expr.NewColumnResolve("age"), Expr.GreaterThan, Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 30)) f, _ := NewFilterExec(proj, predicate) if f.Close() != nil { t.Fatalf("expected nil error on close") @@ -302,7 +302,7 @@ func TestFilterExecVariantCase(t *testing.T) { }) t.Run("filter unsupported binary operator ", func(t *testing.T) { proj := basicProject() - predicate := Expr.NewBinaryExpr(Expr.NewColumnResolve("age"), Expr.Addition, Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30))) + predicate := Expr.NewBinaryExpr(Expr.NewColumnResolve("age"), Expr.Addition, Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 30)) _, err := NewFilterExec(proj, predicate) if err == nil { t.Fatalf("expected error for unsupported binary operator") @@ -319,3 +319,30 @@ func TestFilterExecVariantCase(t *testing.T) { }) } + +func TestFilterBuffer(t *testing.T) { + t.Run("test", func(t *testing.T) { + + proj := basicProject() + predicate := Expr.NewBinaryExpr( + Expr.NewColumnResolve("age"), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 30), + ) + f, err := NewFilterExec(proj, predicate) + if err != nil { + t.Fatalf("failed to create filter exec: %v", err) + } + rc, err := f.Next(5) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + fmt.Printf("First Batch:\t%v\n", rc.PrettyPrint()) + rc, err = f.Next(5) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + fmt.Printf("second Batch:\t%v\n", rc.PrettyPrint()) + + }) +} diff --git a/src/Backend/opti-sql-go/operators/filter/limit.go b/src/Backend/opti-sql-go/operators/filter/limit.go index 196160f..d25b848 100644 --- a/src/Backend/opti-sql-go/operators/filter/limit.go +++ b/src/Backend/opti-sql-go/operators/filter/limit.go @@ -3,7 +3,6 @@ package filter import ( "context" "errors" - "fmt" "io" "math" "opti-sql-go/Expr" @@ -117,7 +116,6 @@ func (d *DistinctExec) Next(n uint16) (*operators.RecordBatch, error) { if err != nil { if errors.Is(err, io.EOF) { d.consumedInput = true - fmt.Printf("distinctArray: \t%v\n", d.distinctValuesArray) if d.distinctValuesArray[0] != nil { // nill check in case of no distict elements being found or even just input operator doesnt return anything d.totalRows = uint64(d.distinctValuesArray[0].Len()) } diff --git a/src/Backend/opti-sql-go/operators/project/projectExecExpr_test.go b/src/Backend/opti-sql-go/operators/project/projectExecExpr_test.go index 354db56..47435b7 100644 --- a/src/Backend/opti-sql-go/operators/project/projectExecExpr_test.go +++ b/src/Backend/opti-sql-go/operators/project/projectExecExpr_test.go @@ -420,7 +420,7 @@ func TestProjectExec_CastLiteral_Column(t *testing.T) { exprs := []Expr.Expression{ Expr.NewCastExpr( - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int64, int64(4)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int64, 4), arrow.PrimitiveTypes.Float64, ), } @@ -486,7 +486,7 @@ func TestProjectExec_Column_Literal(t *testing.T) { Expr.NewBinaryExpr( Expr.NewColumnResolve("age"), Expr.Addition, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int8, int8(10)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int8, 10), ), ) @@ -511,7 +511,7 @@ func TestProjectExec_Column_Literal(t *testing.T) { Expr.NewBinaryExpr( Expr.NewColumnResolve("score"), Expr.Subtraction, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float32, float32(5.0)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float32, 5.0), ), ) @@ -536,7 +536,7 @@ func TestProjectExec_Column_Literal(t *testing.T) { Expr.NewBinaryExpr( Expr.NewColumnResolve("id"), Expr.Multiplication, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int64, int64(2)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int64, 2), ), ) @@ -558,7 +558,7 @@ func TestProjectExec_Column_Literal(t *testing.T) { Expr.NewBinaryExpr( Expr.NewColumnResolve("score"), Expr.Division, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float32, float32(2)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float32, 2.0), ), ) @@ -607,7 +607,7 @@ func TestProjectExec_AliasExpr(t *testing.T) { Expr.NewBinaryExpr( Expr.NewColumnResolve("age"), Expr.Addition, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int8, int8(10)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int8, 10), ), "boosted_age", ), @@ -635,7 +635,7 @@ func TestProjectExec_AliasExpr(t *testing.T) { exprs := Expr.NewExpressions( Expr.NewAlias( - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(7)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 7), "constant_value", ), ) @@ -659,9 +659,9 @@ func TestProjectExec_AliasExpr(t *testing.T) { memSrc, _ := NewInMemoryProjectExec(names, cols) inner := Expr.NewBinaryExpr( - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(2)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 2), Expr.Addition, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(3)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 3), ) exprs := Expr.NewExpressions( @@ -741,7 +741,7 @@ func TestProjectExec_FunctionExpr(t *testing.T) { t.Run("LOWER('MonKey_x')", func(t *testing.T) { memSrc, _ := NewInMemoryProjectExec(names, cols) - expr := Expr.NewLiteralResolve(arrow.BinaryTypes.String, string("MoNKey_X")) + expr := Expr.NewLiteralResolve(arrow.BinaryTypes.String, "MoNKey_X") exprs := Expr.NewExpressions( Expr.NewScalarFunction( @@ -779,7 +779,7 @@ func TestProjectExec_FunctionExpr(t *testing.T) { Expr.NewBinaryExpr( Expr.NewColumnResolve("score"), Expr.Subtraction, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float32, float32(100)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float32, float32(100.0)), ), ) @@ -811,7 +811,7 @@ func TestProjectExec_FunctionExpr(t *testing.T) { expr := Expr.NewScalarFunction( Expr.Round, Expr.NewBinaryExpr( - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(2.5)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 2.5), Expr.Multiplication, Expr.NewColumnResolve("score"), ), diff --git a/src/Backend/opti-sql-go/operators/test/intergration_test.go b/src/Backend/opti-sql-go/operators/test/intergration_test.go new file mode 100644 index 0000000..af51020 --- /dev/null +++ b/src/Backend/opti-sql-go/operators/test/intergration_test.go @@ -0,0 +1,340 @@ +package test + +import ( + "errors" + "fmt" + "io" + "opti-sql-go/Expr" + "opti-sql-go/operators" + "opti-sql-go/operators/filter" + "opti-sql-go/operators/project" + "os" + "testing" + + "github.com/apache/arrow/go/v17/arrow" +) + +/* +composes individual operators into one another to test multiple together +*/ +const ( + source1Path = "../../../test_data/csv/intergration_test_data_1.csv" + source2Path = "../../../test_data/csv/intergration_test_data_2.csv" +) + +/* +column names: +id,username,email_address,is_active,age_years,account_balance_usd,average_session_minutes,favorite_color +*/ +func source1Project() operators.Operator { + f, err := os.Open(source1Path) + if err != nil { + panic(fmt.Sprintf("failed to open source file: %v", err)) + } + p, _ := project.NewProjectCSVLeaf(f) + return p +} + +/* +colunn names: +id,department_name,manager_name,manager_email +*/ +func source2Project() operators.Operator { + f, err := os.Open(source2Path) + if err != nil { + panic(fmt.Sprintf("failed to open source file: %v", err)) + } + p, _ := project.NewProjectCSVLeaf(f) + return p +} +func TestPrettyPrintSources(t *testing.T) { + p1, p2 := source1Project(), source2Project() + rc1, _ := p1.Next(5) + rc2, _ := p2.Next(5) + + t.Logf("source 1 batch: %v\n", rc1.PrettyPrint()) + t.Logf("source 2 batch: %v\n", rc2.PrettyPrint()) +} + +// TestSelectFilterLimit contains two subtests that build pipelines +// combining Select (project), Filter, and Limit for source1 CSV. +// Each subtest constructs the pipeline, calls Next once, and prints the +// resulting batch via PrettyPrint. +/* +(1) +Operators : Select, Filter, Limit +sql query: +(1.A)SELECT id, username, age_years FROM source1 WHERE age_years > 30 LIMIT 10; +(1.B)SELECT username, age_years +FROM source1 +WHERE is_active = true AND age_years < 25 +LIMIT 3; +(1.C)SELECT id, favorite_color +FROM source1 +WHERE favorite_color = 'Red' +LIMIT 7; +*/ + +func TestSelectFilterLimit(t *testing.T) { + // (1.A) SELECT id, username, age_years FROM source1 WHERE age_years > 30 LIMIT 10; + t.Run("1A", func(t *testing.T) { + // (1.A) SELECT id, username, age_years FROM source1 WHERE age_years > 30 LIMIT 10; + src := source1Project() + t.Logf("\t%v\n", src.Schema()) + + pred := Expr.NewBinaryExpr( + Expr.NewColumnResolve("age_years"), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int64, 30), + ) + + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + projExprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewColumnResolve("username"), + Expr.NewColumnResolve("age_years"), + ) + proj, err := project.NewProjectExec(filt, projExprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + lim, err := filter.NewLimitExec(proj, 10) + if err != nil { + t.Fatalf("limit init failed: %v", err) + } + + batch, err := lim.Next(10) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + + if batch == nil { + t.Logf("(1.A) got nil batch (possibly EOF)") + return + } + + t.Logf("(1.A) batch:\n%v\n", batch.PrettyPrint()) + }) + + // (1.B) SELECT username, age_years FROM source1 WHERE is_active = true AND age_years < 25 LIMIT 3; + t.Run("1B", func(t *testing.T) { + src := source1Project() + + left := Expr.NewBinaryExpr( + Expr.NewColumnResolve("is_active"), + Expr.Equal, + Expr.NewLiteralResolve(arrow.FixedWidthTypes.Boolean, true), + ) + right := Expr.NewBinaryExpr( + Expr.NewColumnResolve("age_years"), + Expr.LessThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int64, 25), + ) + pred := Expr.NewBinaryExpr(left, Expr.And, right) + + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + projExprs := Expr.NewExpressions( + Expr.NewColumnResolve("username"), + Expr.NewColumnResolve("age_years"), + ) + proj, err := project.NewProjectExec(filt, projExprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + lim, err := filter.NewLimitExec(proj, 3) + if err != nil { + t.Fatalf("limit init failed: %v", err) + } + + batch, err := lim.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + + if batch == nil { + t.Logf("(1.B) got nil batch (possibly EOF)") + return + } + + t.Logf("(1.B) batch:\n%v\n", batch.PrettyPrint()) + }) + // (1.C) SELECT id, favorite_color FROM source1 WHERE favorite_color = 'Red' LIMIT 7; + t.Run("(1.C)", func(t *testing.T) { + src := source1Project() + + pred := Expr.NewBinaryExpr( + Expr.NewColumnResolve("favorite_color"), + Expr.Equal, + Expr.NewLiteralResolve(arrow.BinaryTypes.String, "Red"), + ) + + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + projExprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewColumnResolve("favorite_color"), + ) + proj, err := project.NewProjectExec(filt, projExprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + lim, err := filter.NewLimitExec(proj, 7) + if err != nil { + t.Fatalf("limit init failed: %v", err) + } + + batch, err := lim.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + + if batch == nil { + t.Logf("(1.C) got nil batch (possibly EOF)") + return + } + + t.Logf("(1.C) batch:\n%v\n", batch.PrettyPrint()) + }) + +} + +/* +(2) +Operators: Filter, Scalar functions +sql query: +(2.A)SELECT id, username, LOWER(favorite_color) as fav_color_lower FROM source1 WHERE UPPER(favorite_color) = 'BLUE'; +(2.B)SELECT username, LOWER(email_address) AS email_lower +FROM source1 +WHERE UPPER(username) = 'ALICE'; +*/ + +/* +(3) +Operators: select, Sort +sql query: +(3.A)SELECT id, account_balance_usd, username +FROM source1 +ORDER BY account_balance_usd ASC +(3.B)SELECT id, favorite_color +FROM source1 +ORDER BY favorite_color ASC; +*/ + +/* +(4) +Operators: Join(INNER), Select +SQL: +(4.A)SELECT s1.id, s1.username, s2.department_name +FROM source1 AS s1 +INNER JOIN source2 AS s2 +ON s1.favorite_color = s2.manager_name; +(4.B)SELECT s1.id, s1.email_address, s2.department_name +FROM source1 AS s1 +INNER JOIN source2 AS s2 +ON s1.favorite_color = s2.manager_name; +*/ + +/* +(5) +Operators: GroupBy, Aggregation(SUM, AVG), Select +SQL: +(5.A)SELECT favorite_color, AVG(age_years) AS avg_age, SUM(account_balance_usd) AS total_balance +FROM source1 +GROUP BY favorite_color; +(5.B)SELECT is_active, COUNT(*) AS active_count, AVG(age_years) AS avg_age +FROM source1 +GROUP BY is_active; + +*/ + +/* +(6) +Operators: Distinct, Sort(DESC) +SQL: +(6.A)SELECT DISTINCT favorite_color +FROM source1 +ORDER BY favorite_color DESC; +(6.B)SELECT DISTINCT is_active +FROM source1 +ORDER BY is_active DESC; + +*/ + +/* +(7) +Operators: Join(INNER), Filter, Projection, Limit + +SQL: +(7.A)SELECT s1.id, s1.username, s2.department_name +FROM source1 AS s1 +INNER JOIN source2 AS s2 +ON s1.favorite_color = s2.manager_name +WHERE s1.age_years > 30 +LIMIT 5; +(7.B)SELECT s1.username, s2.manager_email +FROM source1 AS s1 +JOIN source2 AS s2 +ON s1.favorite_color = s2.manager_name +WHERE s2.department_name = 'Engineering' +LIMIT 3; +(7.C)SELECT s1.id, s2.manager_name +FROM source1 s1 +JOIN source2 s2 +ON s1.favorite_color = s2.manager_name +WHERE s1.account_balance_usd > 10000 +LIMIT 2; +*/ + +/* +(8) +Operators: ScalarFunction(ABS, ROUND), Filter, Projection + +SQL: +(8.A)SELECT id, ROUND(ABS(average_session_minutes)) AS rounded_session +FROM source1 +WHERE ABS(average_session_minutes) > 5; +(8.B)SELECT username, ROUND(account_balance_usd) AS rounded_balance +FROM source1 +WHERE ABS(account_balance_usd) > 5000; +*/ + +/* +(9) +Operators: Sort (multiple columns), Select + +SQL: +(9.A)SELECT id, username, age_years +FROM source1 +ORDER BY age_years DESC, username ASC; +(9.B)SELECT id, email_address, age_years +FROM source1 +ORDER BY age_years ASC, email_address DESC; + +*/ + +/* +(10) +Operators: Join (INNER, multiple conditions), Select, Sort (multiple columns) + +(10.A)SELECT s1.id, s1.username, s2.manager_name, s2.budget +FROM source1 AS s1 +INNER JOIN source2 AS s2 + ON s1.favorite_color = s2.manager_name + AND s1.region = s2.region +ORDER BY s2.budget DESC, s1.username ASC; + +*/ diff --git a/src/Backend/opti-sql-go/operators/test/t1_test.go b/src/Backend/opti-sql-go/operators/test/t1_test.go index f2e6e33..71bf2b1 100644 --- a/src/Backend/opti-sql-go/operators/test/t1_test.go +++ b/src/Backend/opti-sql-go/operators/test/t1_test.go @@ -19,6 +19,11 @@ import ( "github.com/apache/arrow/go/v17/arrow/array" ) +/* +indivdial unit test for each operator +serves as documentation as to how to use each operator +*/ + // test for all operators together // using in memory format at first func generateIntegrationDataset1(mem memory.Allocator) ([]string, []arrow.Array) { @@ -272,7 +277,7 @@ func TestProjectExec(t *testing.T) { Expr.NewBinaryExpr( Expr.NewColumnResolve("salary"), Expr.Multiplication, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(1.10)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 1.10), ), "adjusted_salary", ), @@ -379,7 +384,7 @@ func TestFilterExec(t *testing.T) { pred := Expr.NewBinaryExpr( Expr.NewColumnResolve("age"), Expr.GreaterThan, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 30), ) filt, err := filter.NewFilterExec(src, pred) @@ -422,7 +427,7 @@ func TestFilterExec(t *testing.T) { Expr.NewBinaryExpr( Expr.NewColumnResolve("salary"), Expr.GreaterThan, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(70000)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 70000.0), ), ) // department = 'Engineering' AND salary > 70000 @@ -982,7 +987,7 @@ func TestHavingExec(t *testing.T) { having := Expr.NewBinaryExpr( Expr.NewColumnResolve("avg_Column(salary)"), Expr.GreaterThan, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(75000)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 75000.0), ) hv, _ := aggr.NewHavingExec(gb, having) @@ -1011,7 +1016,7 @@ func TestHavingExec(t *testing.T) { having := Expr.NewBinaryExpr( Expr.NewColumnResolve("avg_Column(salary)"), Expr.GreaterThan, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(999999)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 999999.0), ) hv, _ := aggr.NewHavingExec(gb, having) @@ -1029,7 +1034,7 @@ func TestHavingExec(t *testing.T) { having := Expr.NewBinaryExpr( Expr.NewColumnResolve("avg_Column(salary)"), Expr.GreaterThan, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(0)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(0.0)), ) hv, _ := aggr.NewHavingExec(gb, having) diff --git a/src/Backend/test_data/csv/intergration_test_data_1.csv b/src/Backend/test_data/csv/intergration_test_data_1.csv new file mode 100644 index 0000000..a5e8985 --- /dev/null +++ b/src/Backend/test_data/csv/intergration_test_data_1.csv @@ -0,0 +1,1001 @@ +id,username,email_address,is_active,age_years,account_balance_usd,average_session_minutes,favorite_color +1,kabrahmer0,jbranson0@joomla.org,false,23,78568.75,108.7,Blue +2,rackred1,eskinn1@usa.gov,true,66,21933.55,19.7,Violet +3,hdeacon2,cmccomish2@xing.com,true,73,69730.89,9.2,Maroon +4,mdany3,cvreede3@pagesperso-orange.fr,true,45,80371.32,19.0,Maroon +5,mgile4,cdumbrall4@pinterest.com,true,63,82536.72,57.3,Indigo +6,ypedlow5,hmelchior5@domainmarket.com,false,96,2276.7,187.8,Aquamarine +7,bpotier6,ibaseke6@state.tx.us,false,77,32239.63,75.7,Maroon +8,tglenn7,tpawley7@github.io,true,42,33917.35,106.2,Teal +9,naleksankin8,cwarstall8@washington.edu,false,20,61450.41,231.2,Orange +10,ktrevain9,apinchen9@mapquest.com,false,58,29065.08,226.7,Turquoise +11,tfuttya,etwigginsa@uiuc.edu,true,28,76633.92,157.7,Turquoise +12,bodoranb,tmadineb@wix.com,false,46,38205.06,151.9,Puce +13,lmacqueenc,hriccardc@mayoclinic.com,true,28,13588.26,105.1,Teal +14,lkwietakd,emalesd@123-reg.co.uk,false,28,51587.99,67.8,Purple +15,dbethoe,ccricke@mit.edu,false,33,43569.4,236.0,Red +16,gcochranf,bbartoszekf@washington.edu,false,61,55799.32,168.6,Khaki +17,ekeoghaneg,charleg@scientificamerican.com,true,14,4585.2,228.0,Goldenrod +18,rlillieh,gskaseh@tripadvisor.com,true,58,46285.12,101.5,Violet +19,pblondellei,sjosskovitzi@csmonitor.com,true,33,90800.0,16.2,Blue +20,kchappellj,bmetcalfej@hatena.ne.jp,false,74,96359.33,82.1,Goldenrod +21,mgolbyk,plangcasterk@t-online.de,false,80,21171.21,55.5,Indigo +22,aebornl,bwaliszewskil@utexas.edu,false,64,2323.61,129.2,Indigo +23,bkimblym,blellom@nasa.gov,true,24,89531.03,226.3,Green +24,ohupkan,gquesnen@telegraph.co.uk,false,79,84280.86,162.3,Puce +25,tbroginio,tgreasero@opera.com,true,52,83599.32,220.3,Puce +26,mcometsonp,bguinnanep@networkadvertising.org,false,16,11008.19,35.7,Aquamarine +27,mcooksonq,abremenq@nytimes.com,false,46,66430.38,67.8,Mauv +28,gcarmenr,ebladonr@bizjournals.com,false,78,14283.62,67.9,Puce +29,malmonds,bsootss@archive.org,false,97,57417.63,12.0,Turquoise +30,cmacpharlaint,awelbandt@wix.com,true,68,78044.56,219.8,Green +31,bgagenu,jbrickneru@clickbank.net,true,87,75215.47,27.5,Orange +32,cgraverv,tvittetv@who.int,false,55,48576.54,154.3,Khaki +33,ckleinhandlerw,gboissierw@forbes.com,false,76,24205.9,80.2,Aquamarine +34,mnewtonx,balenichicovx@nyu.edu,false,56,71490.0,236.2,Green +35,opembley,sfennelly@is.gd,false,98,79604.64,72.6,Khaki +36,lstoodersz,emaudsleyz@joomla.org,true,31,28501.29,30.6,Pink +37,rpickavance10,aoregan10@microsoft.com,true,92,85816.31,111.8,Pink +38,bhapgood11,ctrebilcock11@prweb.com,false,36,3128.05,51.0,Yellow +39,cogden12,emoreman12@globo.com,false,77,43674.85,130.1,Blue +40,dkurt13,ccecere13@china.com.cn,false,79,92882.13,154.4,Fuscia +41,nvangeffen14,tservis14@blogtalkradio.com,true,36,44125.22,68.3,Turquoise +42,jmatzl15,amcalees15@prnewswire.com,false,14,64389.09,210.7,Teal +43,kcovolini16,eskuce16@geocities.jp,false,58,61266.66,53.6,Indigo +44,rdhennin17,zhartil17@biblegateway.com,false,62,20083.83,49.7,Yellow +45,lnetti18,sgergolet18@xing.com,false,90,95479.39,89.5,Violet +46,acamblin19,neverest19@google.co.uk,true,42,69372.65,114.4,Pink +47,bsanbroke1a,ehalbert1a@prlog.org,true,77,39295.87,169.1,Goldenrod +48,lcassius1b,lhuntly1b@ow.ly,true,37,57540.0,33.8,Fuscia +49,golivier1c,barger1c@typepad.com,true,85,36823.38,127.2,Teal +50,tdubock1d,amanske1d@fema.gov,true,60,97834.72,78.0,Violet +51,jhardes1e,egreason1e@storify.com,false,36,72901.15,49.5,Mauv +52,djorge1f,emetterick1f@storify.com,false,40,3074.71,60.9,Yellow +53,sprobin1g,rcolisbe1g@cdbaby.com,true,99,15836.96,213.6,Orange +54,breaper1h,msouthall1h@soup.io,true,79,-2334.0,235.5,Yellow +55,bmcmillam1i,mdeevey1i@wired.com,true,34,53132.39,76.8,Pink +56,bmar1j,dyitzhakof1j@odnoklassniki.ru,false,28,83065.6,229.3,Teal +57,mgirling1k,apoor1k@marketwatch.com,false,87,8214.74,56.5,Orange +58,kdoorbar1l,mcholwell1l@uiuc.edu,false,28,49805.07,125.8,Indigo +59,cgatesman1m,fmacneil1m@ow.ly,true,70,51264.57,233.0,Violet +60,mskerman1n,afrancescuzzi1n@odnoklassniki.ru,true,79,95239.67,104.1,Teal +61,vbinney1o,cshorey1o@amazon.de,false,54,16618.88,191.9,Aquamarine +62,hcholerton1p,gpaylor1p@utexas.edu,false,55,43522.18,154.9,Khaki +63,fwyon1q,lbuckby1q@smh.com.au,false,96,61202.83,36.0,Violet +64,sshillabeare1r,rrack1r@pinterest.com,false,38,19007.32,32.4,Goldenrod +65,vkaiser1s,dthorp1s@digg.com,false,30,7656.38,191.8,Yellow +66,mrosenhaupt1t,nburnage1t@marriott.com,true,84,8990.69,162.1,Fuscia +67,klebrun1u,kchaudron1u@mtv.com,false,91,53217.02,81.0,Turquoise +68,vmoncey1v,gdevereux1v@paginegialle.it,true,82,99251.37,24.3,Red +69,ateissier1w,dlanghor1w@redcross.org,true,65,10992.34,132.3,Blue +70,hiwanicki1x,sspinola1x@elpais.com,true,85,78351.87,114.2,Pink +71,mmeckiff1y,wmatteacci1y@wired.com,true,26,57171.24,188.7,Pink +72,cmcbrearty1z,hkingett1z@123-reg.co.uk,false,88,-3016.95,11.8,Pink +73,beynon20,jcran20@chronoengine.com,true,71,21921.98,196.7,Yellow +74,pstannas21,mcornels21@ed.gov,true,99,66552.01,12.6,Goldenrod +75,njosuweit22,btumbelty22@go.com,false,66,23107.05,189.4,Puce +76,kpagelsen23,mtolan23@de.vu,true,40,27701.03,61.6,Pink +77,spetrichat24,rdeldello24@networksolutions.com,false,48,8620.33,180.9,Maroon +78,akerswell25,lvalintine25@japanpost.jp,true,71,-108.19,192.5,Mauv +79,vbraga26,rdumingos26@salon.com,false,41,64557.11,203.6,Maroon +80,lduckit27,nitzhayek27@archive.org,false,76,51903.82,87.7,Green +81,gmidgely28,lkiln28@etsy.com,true,80,97275.62,104.5,Red +82,cgarratt29,rmockes29@google.ru,true,91,5067.63,210.2,Red +83,rglossop2a,rgrouvel2a@arstechnica.com,true,26,62.53,183.6,Purple +84,dluttger2b,eretchless2b@answers.com,true,19,32428.17,231.8,Orange +85,elangridge2c,wgrodden2c@mozilla.org,false,82,33110.19,25.2,Goldenrod +86,oheathcott2d,maxcel2d@cargocollective.com,true,64,83145.34,176.7,Goldenrod +87,lshrive2e,lbloxland2e@free.fr,true,54,36226.59,137.6,Khaki +88,carchley2f,csomerton2f@furl.net,false,61,55457.75,213.9,Red +89,tbrew2g,nvesque2g@bloglines.com,true,59,9234.99,61.3,Puce +90,zcruise2h,jhuxton2h@sohu.com,true,45,22015.28,84.7,Orange +91,ejailler2i,rgillitt2i@freewebs.com,false,71,24201.93,157.0,Blue +92,hsquier2j,rbegg2j@shutterfly.com,true,54,21285.15,55.6,Blue +93,kgandy2k,cmcclean2k@unblog.fr,true,87,47796.6,97.6,Mauv +94,cpadberry2l,bpaskerful2l@google.fr,true,25,12861.17,201.1,Khaki +95,mdjurdjevic2m,mtadlow2m@fc2.com,false,71,37159.23,111.3,Blue +96,npeddie2n,hkilfeather2n@disqus.com,true,36,-4415.24,202.0,Blue +97,tpraton2o,rsiburn2o@creativecommons.org,true,81,4969.55,25.1,Mauv +98,hmoulding2p,lsayers2p@4shared.com,true,43,89837.87,113.5,Aquamarine +99,heastmead2q,jdurrell2q@people.com.cn,true,64,36745.97,123.6,Crimson +100,hlambis2r,abastard2r@fda.gov,true,14,58177.16,192.5,Maroon +101,gwitterick2s,kmadders2s@wikia.com,true,73,89899.81,186.4,Indigo +102,cjedraszek2t,abenda2t@utexas.edu,true,37,65280.64,172.8,Purple +103,cwooton2u,kbeaze2u@craigslist.org,true,63,94274.92,193.2,Goldenrod +104,gunthank2v,sferre2v@whitehouse.gov,true,49,40119.54,110.3,Turquoise +105,bfilson2w,bhayles2w@hc360.com,true,14,44503.45,157.1,Indigo +106,ldebruyn2x,lfurneaux2x@desdev.cn,false,24,59364.66,207.5,Pink +107,lshegog2y,jkinnin2y@toplist.cz,true,29,44611.61,197.8,Pink +108,lswoffer2z,mlambert2z@ibm.com,false,17,75611.62,222.2,Mauv +109,tsmitheman30,ryanyushkin30@prnewswire.com,false,18,24549.77,99.8,Khaki +110,subsdale31,lparker31@shop-pro.jp,false,97,99572.38,168.4,Orange +111,lelfe32,fhorsefield32@arstechnica.com,true,58,39755.35,38.2,Violet +112,abulbeck33,tedeson33@huffingtonpost.com,true,40,23526.09,91.3,Goldenrod +113,omaffi34,ccrossley34@nps.gov,true,48,37339.46,35.9,Pink +114,kbecerro35,sfranca35@plala.or.jp,false,76,83744.37,96.0,Crimson +115,cdeelay36,nharring36@kickstarter.com,false,20,62170.7,139.4,Khaki +116,streherne37,rbishell37@indiatimes.com,true,62,54012.77,168.8,Mauv +117,tralling38,mraspison38@instagram.com,true,99,21780.62,230.9,Mauv +118,jpanswick39,aherche39@live.com,true,63,45818.69,17.1,Teal +119,aavramovich3a,mkinkead3a@artisteer.com,false,85,-3534.1,110.4,Red +120,bhiom3b,lcheckley3b@foxnews.com,false,89,65567.85,135.4,Turquoise +121,rbayldon3c,kbosworth3c@mashable.com,true,50,95253.56,23.6,Khaki +122,fbagnell3d,bjosephov3d@reference.com,false,57,68390.99,78.4,Teal +123,hshambrooke3e,edegregorio3e@umn.edu,false,52,15771.1,203.8,Crimson +124,ggribbon3f,rpagett3f@prweb.com,false,16,2917.55,56.2,Turquoise +125,edavana3g,cdedney3g@cmu.edu,false,31,55667.94,36.1,Yellow +126,hwaskett3h,wantyshev3h@angelfire.com,true,50,63517.05,60.0,Turquoise +127,tdurtnall3i,ftwittey3i@house.gov,false,49,83952.8,35.1,Green +128,omaddyson3j,rcordie3j@myspace.com,true,97,99567.41,168.1,Crimson +129,gmanntschke3k,mmuzzini3k@ifeng.com,true,91,54316.77,91.4,Violet +130,ndaniele3l,cclaughton3l@pen.io,false,69,65027.94,213.7,Blue +131,npeagram3m,cpottle3m@blogtalkradio.com,true,51,31631.27,136.5,Blue +132,mbore3n,npieterick3n@springer.com,false,48,58073.82,95.5,Maroon +133,skensington3o,mbury3o@pagesperso-orange.fr,false,61,13374.65,52.0,Aquamarine +134,bfarman3p,amclaverty3p@narod.ru,true,61,57229.14,124.1,Mauv +135,shutchins3q,sgrumble3q@accuweather.com,true,33,46414.27,208.5,Khaki +136,cgauthorpp3r,gdowsett3r@jigsy.com,true,30,43875.9,159.3,Indigo +137,gblaw3s,aaicken3s@dion.ne.jp,true,43,154.23,107.3,Orange +138,mcorkett3t,fodea3t@foxnews.com,false,47,94738.45,203.1,Indigo +139,mscogings3u,gdicky3u@imageshack.us,true,54,31648.6,4.9,Teal +140,atabram3v,ebeardow3v@twitter.com,false,34,92957.65,19.4,Puce +141,dlucken3w,awinter3w@sciencedirect.com,true,34,1043.49,20.9,Teal +142,bguiduzzi3x,zcatley3x@is.gd,true,56,49087.72,175.5,Crimson +143,adyos3y,aroney3y@craigslist.org,true,68,60016.89,203.2,Purple +144,jsichardt3z,bchadband3z@uol.com.br,true,70,33414.39,215.6,Violet +145,kballsdon40,learingey40@acquirethisname.com,true,34,61189.33,183.7,Turquoise +146,nrattrie41,zniesing41@mlb.com,false,20,3713.34,106.0,Violet +147,pstarmore42,moda42@jugem.jp,true,16,37980.99,173.5,Pink +148,lsalmon43,emichell43@businessweek.com,false,90,69007.42,21.3,Indigo +149,hmcglynn44,mdorward44@independent.co.uk,false,48,62759.77,112.1,Turquoise +150,mlindfors45,jluke45@php.net,false,75,10456.42,26.4,Aquamarine +151,rjeanin46,scolwell46@constantcontact.com,false,69,90824.92,80.8,Yellow +152,titchingham47,jhallt47@who.int,false,50,79100.32,115.9,Aquamarine +153,ppledger48,fcamfield48@apache.org,false,24,28749.64,9.1,Turquoise +154,mbeauchop49,hglasheen49@dailymail.co.uk,false,26,37414.35,45.5,Fuscia +155,rbertlin4a,ulikly4a@comcast.net,false,37,71606.35,225.8,Purple +156,gmillom4b,avenard4b@hud.gov,true,76,94234.72,36.3,Goldenrod +157,asherman4c,fbellenie4c@thetimes.co.uk,true,26,26478.22,198.4,Khaki +158,ubottjer4d,kmcmychem4d@epa.gov,false,81,73783.54,131.1,Purple +159,itimeby4e,aduesbury4e@va.gov,false,40,58556.11,213.9,Green +160,amatson4f,smccool4f@bbc.co.uk,true,49,61713.07,87.0,Orange +161,aaddinall4g,fspellacey4g@live.com,false,33,34527.69,72.2,Turquoise +162,csach4h,ewinning4h@vk.com,true,43,23477.9,229.8,Violet +163,tyeskov4i,fbloxsom4i@forbes.com,false,97,3535.98,113.1,Pink +164,cmcclory4j,mforce4j@technorati.com,false,96,22636.5,98.3,Khaki +165,hilyasov4k,educe4k@netlog.com,true,66,50561.37,66.0,Aquamarine +166,pkollatsch4l,aopdenorth4l@bloglovin.com,false,28,9517.82,27.8,Yellow +167,ndeminico4m,iodee4m@51.la,false,22,60746.86,85.3,Puce +168,tbenedyktowicz4n,jburgoyne4n@blog.com,true,53,32835.66,77.0,Pink +169,mpiner4o,fkepp4o@woothemes.com,true,42,1608.82,23.4,Puce +170,bcastelletto4p,cklugman4p@google.de,true,85,98194.54,94.9,Goldenrod +171,lregitz4q,bwillars4q@google.nl,false,21,83442.35,19.5,Purple +172,egallemore4r,cwatters4r@kickstarter.com,true,16,10599.53,39.3,Orange +173,pmoyne4s,mschulter4s@independent.co.uk,false,23,21132.22,1.3,Crimson +174,edymocke4t,lwindress4t@newsvine.com,false,48,79372.58,216.4,Goldenrod +175,dpietrusiak4u,rcamosso4u@geocities.jp,false,77,64270.28,211.4,Mauv +176,pdeeson4v,goflaherty4v@dion.ne.jp,false,20,3941.61,184.8,Orange +177,erabbe4w,bakett4w@bravesites.com,true,60,51406.26,141.8,Teal +178,mferron4x,mbrooke4x@youku.com,false,25,37622.42,210.8,Teal +179,atottle4y,bclinton4y@purevolume.com,true,37,43558.76,145.2,Red +180,bedwardson4z,dscotfurth4z@dion.ne.jp,true,66,27769.27,68.5,Yellow +181,acarlucci50,fcomiam50@economist.com,false,54,6018.01,208.2,Goldenrod +182,stwallin51,cwallice51@tripod.com,true,32,26271.66,228.3,Blue +183,lmarchiso52,cledstone52@networksolutions.com,true,45,65987.77,46.4,Yellow +184,cpelling53,nniccolls53@dyndns.org,true,30,17758.16,53.4,Goldenrod +185,zgricks54,eeacle54@nih.gov,true,93,65139.32,156.9,Fuscia +186,cclemenson55,diashvili55@angelfire.com,true,91,54823.75,77.4,Aquamarine +187,lbladder56,rdrust56@house.gov,false,23,16407.74,65.4,Orange +188,bwieprecht57,bbench57@naver.com,false,58,7563.83,47.6,Purple +189,hdarinton58,gbrader58@dyndns.org,true,58,10792.6,17.0,Mauv +190,dbowller59,mboughtflower59@boston.com,false,19,78897.16,28.2,Purple +191,cdivis5a,dblabber5a@accuweather.com,true,50,95641.31,97.7,Yellow +192,spenhalewick5b,ggott5b@cloudflare.com,true,89,74511.66,124.7,Indigo +193,mredwood5c,lmckeveney5c@facebook.com,true,95,67450.15,11.8,Yellow +194,amacmurray5d,jfibbitts5d@sitemeter.com,true,33,51196.3,70.0,Violet +195,mkimbling5e,lspeake5e@wired.com,true,22,98791.96,90.2,Crimson +196,ctrye5f,djameson5f@canalblog.com,false,70,91481.94,201.4,Orange +197,carkin5g,jorgel5g@marketwatch.com,true,53,42292.02,23.2,Aquamarine +198,jmedland5h,dbonnier5h@yellowpages.com,true,58,42698.85,84.1,Green +199,fmallall5i,lgimblet5i@whitehouse.gov,true,85,22927.08,152.8,Puce +200,bkupker5j,dgjerde5j@newsvine.com,true,89,93930.62,216.3,Green +201,agill5k,zcarlill5k@privacy.gov.au,false,58,41174.32,115.8,Crimson +202,agofford5l,ldilliston5l@washingtonpost.com,true,96,96695.38,14.1,Purple +203,npender5m,eruger5m@jimdo.com,false,36,75419.63,101.3,Crimson +204,awaterman5n,ghessing5n@businesswire.com,true,54,86699.71,59.7,Orange +205,oaudenis5o,faucoate5o@sun.com,true,19,46836.54,207.5,Aquamarine +206,mwindress5p,hchesman5p@loc.gov,false,97,53488.69,66.9,Indigo +207,hdeyes5q,zkobu5q@wordpress.com,false,72,77479.93,84.4,Violet +208,vbadder5r,pgomez5r@instagram.com,true,24,90940.2,114.6,Aquamarine +209,mmckeon5s,awalkley5s@posterous.com,false,79,78872.2,201.0,Puce +210,alukes5t,gway5t@ning.com,false,56,80344.01,80.7,Red +211,bstruan5u,abarneville5u@google.ca,false,74,38649.41,216.4,Purple +212,rcopner5v,disaak5v@addtoany.com,true,83,51194.82,59.8,Mauv +213,mcapinetti5w,dpoetz5w@independent.co.uk,true,80,51265.78,139.0,Puce +214,ccobain5x,ajervoise5x@state.tx.us,true,71,8492.27,123.8,Turquoise +215,dtinn5y,tfrape5y@answers.com,true,98,80338.69,213.3,Aquamarine +216,gtackell5z,cveracruysse5z@macromedia.com,true,56,28970.75,39.8,Indigo +217,hjoyce60,dgatley60@github.com,true,58,52875.49,131.9,Pink +218,cantonsson61,mmactimpany61@smugmug.com,false,91,90280.01,205.0,Teal +219,jgynne62,edobrowolski62@shareasale.com,false,31,54928.08,8.9,Turquoise +220,ebooty63,gwickey63@thetimes.co.uk,true,63,25248.03,78.3,Purple +221,jhaly64,egirardengo64@adobe.com,false,79,25530.67,193.5,Violet +222,ciskowicz65,mvorley65@narod.ru,false,27,10025.95,8.5,Khaki +223,oream66,mhaining66@nih.gov,false,55,160.47,193.3,Goldenrod +224,cottey67,vtocher67@psu.edu,true,29,-192.32,202.5,Fuscia +225,cmccurlye68,kbarck68@shop-pro.jp,true,53,87737.09,40.1,Green +226,bdavidowich69,tmallalieu69@ebay.co.uk,false,17,-2836.75,21.3,Blue +227,aneesam6a,bjills6a@yale.edu,false,39,86645.31,104.2,Turquoise +228,nivetts6b,graincin6b@webeden.co.uk,true,92,19425.8,10.9,Blue +229,rmagnus6c,hmenloe6c@mozilla.org,true,36,50849.97,194.1,Fuscia +230,mshovell6d,jmeasures6d@ibm.com,true,28,15777.1,193.4,Red +231,fcourtney6e,cqueen6e@omniture.com,false,23,99505.5,221.2,Maroon +232,tbabin6f,mkennally6f@liveinternet.ru,true,97,55648.38,166.8,Purple +233,dkimber6g,kmapholm6g@live.com,false,69,-4440.54,99.2,Green +234,vmerner6h,jfeehily6h@vkontakte.ru,false,99,69185.02,108.8,Khaki +235,bblouet6i,arollings6i@nih.gov,true,23,66210.49,118.3,Puce +236,chowson6j,rgaukroger6j@washingtonpost.com,false,33,80917.55,161.7,Fuscia +237,kburner6k,kpetcher6k@yolasite.com,false,96,65515.39,36.5,Maroon +238,cscandroot6l,lfranchyonok6l@mapquest.com,true,60,50872.94,148.0,Blue +239,ptottie6m,bwheatcroft6m@live.com,false,23,3631.02,18.8,Puce +240,fsheer6n,nbuncher6n@pcworld.com,true,29,44745.12,165.7,Red +241,nhavis6o,jpatrie6o@privacy.gov.au,true,90,89669.26,36.5,Purple +242,hmarcq6p,rvoas6p@example.com,false,55,64169.54,231.0,Fuscia +243,tmccoveney6q,aizhakov6q@theatlantic.com,true,33,31649.37,196.6,Aquamarine +244,htwine6r,egabbotts6r@odnoklassniki.ru,false,48,-1530.03,190.0,Teal +245,gwallsam6s,mabrahamsen6s@cam.ac.uk,false,68,5942.94,112.7,Mauv +246,hdrury6t,hblaisdell6t@hp.com,false,35,64476.46,43.7,Fuscia +247,aosbourn6u,bgaskarth6u@shareasale.com,false,53,77685.37,190.7,Pink +248,wstockell6v,bdrakard6v@uol.com.br,false,29,22633.82,105.6,Blue +249,dstuttman6w,mgathercoal6w@addthis.com,false,86,-4607.94,206.0,Puce +250,nbirkhead6x,isansbury6x@sciencedaily.com,false,76,89007.14,45.3,Maroon +251,jheminsley6y,iraleston6y@foxnews.com,true,72,68734.49,119.4,Maroon +252,mtesauro6z,tgrimme6z@xing.com,false,90,7259.04,218.6,Pink +253,hgrzegorzewicz70,bstotherfield70@cnet.com,true,14,8121.6,223.8,Violet +254,slinay71,cdudney71@uiuc.edu,true,52,74486.69,61.9,Pink +255,mscarre72,aminthorpe72@wikipedia.org,false,30,66347.44,54.2,Indigo +256,kpestricke73,ygilbertson73@bloglovin.com,false,51,66214.09,232.6,Puce +257,cstennes74,hburdas74@theatlantic.com,false,89,56913.91,21.8,Goldenrod +258,mhegarty75,atoquet75@paypal.com,true,29,19756.01,23.6,Purple +259,lvannoort76,daldwich76@tamu.edu,true,73,40920.67,196.4,Green +260,iwindows77,tfarrant77@youtu.be,true,89,19572.3,32.3,Goldenrod +261,ileaburn78,sallsebrook78@mapquest.com,false,37,-1352.52,71.8,Pink +262,hmicklewicz79,bwing79@list-manage.com,true,19,91391.64,203.3,Fuscia +263,rharpur7a,gtyre7a@list-manage.com,false,16,40233.5,176.4,Aquamarine +264,gaddyman7b,tsummersby7b@mlb.com,false,38,79050.35,195.7,Puce +265,cpigny7c,nsurcombe7c@simplemachines.org,true,39,89263.61,83.2,Pink +266,phudghton7d,cwippermann7d@sphinn.com,true,39,47281.64,146.2,Violet +267,jcoats7e,thuzzey7e@liveinternet.ru,true,36,75739.29,239.8,Fuscia +268,adaburn7f,mlugden7f@psu.edu,false,22,47520.54,198.3,Pink +269,bklimkovich7g,bleyfield7g@amazon.co.jp,true,47,22986.66,24.2,Mauv +270,rsilberschatz7h,soosthoutdevree7h@toplist.cz,false,72,35317.44,184.0,Maroon +271,mogborne7i,dpresshaugh7i@berkeley.edu,true,31,42618.97,175.9,Blue +272,mmordon7j,rscotchmoor7j@thetimes.co.uk,false,22,19997.18,161.1,Puce +273,nbattey7k,neuston7k@dagondesign.com,true,54,32305.92,176.1,Fuscia +274,eglayzer7l,gkellert7l@godaddy.com,false,16,92262.24,45.5,Turquoise +275,swhyley7m,nbeeching7m@domainmarket.com,true,82,66351.52,75.7,Khaki +276,ssidey7n,scauderlie7n@toplist.cz,true,23,70017.51,2.1,Pink +277,sbour7o,hleake7o@springer.com,false,50,43686.67,95.2,Fuscia +278,bcastillon7p,gkunkler7p@youku.com,true,43,16395.55,186.6,Blue +279,lkreutzer7q,abarrasse7q@xinhuanet.com,false,57,76744.28,125.9,Crimson +280,tfishley7r,hpantone7r@ca.gov,true,21,95976.15,148.6,Red +281,dhardwicke7s,kkytley7s@ocn.ne.jp,true,27,16.36,118.5,Red +282,garnely7t,tcolomb7t@marketwatch.com,false,85,59346.93,56.6,Puce +283,btosdevin7u,cealles7u@cbsnews.com,false,56,82188.87,63.2,Indigo +284,mtysall7v,dwoodstock7v@rambler.ru,false,43,61900.67,131.2,Yellow +285,bwalrond7w,lcartmel7w@twitpic.com,false,96,95177.28,151.1,Orange +286,dbeharrell7x,mlouis7x@adobe.com,true,82,30894.94,147.3,Teal +287,cdanick7y,equidenham7y@multiply.com,true,19,70158.64,237.1,Khaki +288,tbrent7z,ocantillon7z@zimbio.com,true,53,91377.16,6.1,Purple +289,clovel80,amudge80@sitemeter.com,true,15,34484.24,35.2,Khaki +290,hlewins81,schallin81@flickr.com,false,33,22534.97,226.4,Pink +291,ebaulcombe82,anicolls82@dot.gov,false,96,91459.42,4.0,Puce +292,apaxforde83,jboston83@house.gov,false,21,51945.52,135.1,Purple +293,esanger84,kdelle84@multiply.com,true,57,12465.82,149.7,Mauv +294,bmcelane85,gparzis85@comsenz.com,false,59,29506.16,54.6,Orange +295,bdamerell86,dblewitt86@npr.org,false,80,5435.29,191.4,Pink +296,mlincoln87,nkolinsky87@theglobeandmail.com,false,77,8355.01,232.1,Violet +297,mdeighan88,truddy88@instagram.com,true,90,44335.95,156.5,Purple +298,eaizikovitch89,ewetheril89@illinois.edu,false,50,11132.05,77.2,Maroon +299,gsamarth8a,tbutchard8a@aboutads.info,true,71,77145.4,224.6,Orange +300,apidgeley8b,pbangle8b@cdbaby.com,true,15,72959.09,49.2,Mauv +301,brobilart8c,rkenlin8c@guardian.co.uk,true,20,85888.93,145.9,Turquoise +302,jghest8d,ksharrocks8d@google.it,false,17,15868.69,70.5,Green +303,aalflatt8e,hlawtey8e@oaic.gov.au,true,39,40233.11,113.8,Puce +304,gharriday8f,rfoote8f@facebook.com,false,31,20352.91,179.8,Yellow +305,civushkin8g,cvanichkov8g@sfgate.com,true,94,48255.7,61.5,Pink +306,cfake8h,hfidelli8h@wunderground.com,true,50,82547.51,73.9,Teal +307,mgillian8i,gstudeart8i@weebly.com,false,59,11179.96,156.2,Khaki +308,fkilmartin8j,gcleeve8j@archive.org,true,68,95577.29,180.2,Khaki +309,tgrabert8k,pcrookston8k@toplist.cz,false,55,10720.78,140.0,Crimson +310,phosburn8l,chebson8l@blog.com,false,68,8318.83,46.6,Mauv +311,mbingle8m,vyitzhok8m@quantcast.com,false,65,37332.78,224.8,Maroon +312,aheardman8n,bluttger8n@goo.gl,false,45,2328.98,229.6,Green +313,tkeld8o,dwinstanley8o@umich.edu,false,22,44863.97,173.0,Aquamarine +314,ekynson8p,mmarney8p@sogou.com,true,61,54342.33,27.8,Maroon +315,cclearley8q,bbyatt8q@twitpic.com,true,82,12524.27,133.9,Puce +316,bspellworth8r,dredwood8r@miitbeian.gov.cn,true,71,92700.62,200.3,Turquoise +317,btremontana8s,dcator8s@ed.gov,false,74,12226.59,174.3,Aquamarine +318,hwindram8t,kmacgowan8t@stumbleupon.com,true,93,21708.68,44.2,Khaki +319,tacedo8u,ijull8u@artisteer.com,false,59,91644.72,100.9,Teal +320,lhews8v,sgrieves8v@exblog.jp,true,87,68881.02,85.4,Mauv +321,hcamillo8w,ssherrett8w@yahoo.co.jp,true,64,6920.87,66.5,Khaki +322,bbusek8x,pcarcass8x@nymag.com,false,60,26202.56,43.4,Goldenrod +323,hreddell8y,sivanchenkov8y@nydailynews.com,true,43,44546.17,5.9,Maroon +324,nloughlin8z,llynes8z@wikipedia.org,false,46,68732.94,79.5,Pink +325,ajakel90,adegliantoni90@google.de,false,95,84739.75,119.5,Puce +326,aharrinson91,bitzkowicz91@list-manage.com,true,51,88479.9,75.2,Crimson +327,lmackim92,epaumier92@domainmarket.com,false,50,95020.24,146.7,Mauv +328,hoag93,llegon93@arstechnica.com,false,52,23818.47,234.8,Pink +329,jlarenson94,bwarwicker94@virginia.edu,false,94,88994.02,92.8,Yellow +330,cmatchell95,sspawton95@aboutads.info,true,45,11952.11,198.2,Mauv +331,ncarryer96,kdibson96@jugem.jp,false,44,48955.43,109.3,Yellow +332,sjunkinson97,hricciardello97@hexun.com,false,18,72618.62,89.4,Mauv +333,cdysart98,sdell98@skyrock.com,true,55,41894.68,154.3,Puce +334,tvazquez99,askelly99@vk.com,false,88,-646.24,167.9,Pink +335,tewart9a,vviegas9a@fotki.com,true,37,57049.98,188.2,Maroon +336,ctames9b,bwillgress9b@chron.com,false,51,51317.06,47.3,Green +337,tkoch9c,dadshed9c@facebook.com,false,49,96924.02,115.3,Yellow +338,hbucklan9d,gsalzburger9d@merriam-webster.com,true,13,90897.24,95.4,Fuscia +339,vlaba9e,kmeach9e@geocities.jp,true,91,23248.53,203.0,Indigo +340,llowdeane9f,fscowcroft9f@delicious.com,true,27,20693.19,110.1,Red +341,phenke9g,jberntsson9g@odnoklassniki.ru,true,94,38336.26,177.6,Puce +342,grolston9h,freddlesden9h@sohu.com,false,61,64714.37,28.4,Purple +343,aunderdown9i,lcorben9i@discuz.net,false,36,71082.12,67.9,Fuscia +344,zpepye9j,adrover9j@cisco.com,false,49,10848.48,159.7,Pink +345,mpretley9k,bparr9k@hatena.ne.jp,false,67,57757.87,213.1,Maroon +346,foselton9l,jnewton9l@wikispaces.com,true,30,-975.71,155.0,Teal +347,hashpole9m,mhowship9m@craigslist.org,true,69,21864.13,181.0,Green +348,wwelburn9n,palder9n@japanpost.jp,false,96,45250.91,1.9,Fuscia +349,kharkins9o,gmcgonagle9o@noaa.gov,true,16,42088.83,188.1,Indigo +350,nbasden9p,rmetham9p@github.com,true,43,38385.3,73.0,Puce +351,bruzicka9q,akeyser9q@a8.net,false,38,41579.61,217.2,Indigo +352,sguntrip9r,rpineaux9r@ezinearticles.com,true,77,5117.34,199.4,Red +353,fkynnd9s,asoda9s@hp.com,true,54,26313.24,3.0,Red +354,bofogarty9t,emccracken9t@wsj.com,false,42,55115.44,66.0,Pink +355,kvinter9u,wlarkings9u@cbc.ca,false,81,83879.15,15.1,Turquoise +356,ssaphir9v,lwindrum9v@google.it,true,64,30807.45,58.6,Yellow +357,csigart9w,eotson9w@blinklist.com,true,38,64804.79,68.4,Khaki +358,ybusson9x,mwilletts9x@unesco.org,false,52,56660.74,212.4,Mauv +359,hgemmill9y,mvinas9y@telegraph.co.uk,true,19,28393.0,206.3,Khaki +360,rduckitt9z,avaggs9z@youku.com,false,35,66979.19,117.3,Green +361,sjaquesta0,gcrunkhorna0@a8.net,true,60,6175.05,156.3,Pink +362,scuniffea1,tavona1@jugem.jp,true,67,4997.31,224.4,Mauv +363,ekingscotea2,kmelbournea2@sakura.ne.jp,false,15,59573.94,56.8,Violet +364,amolanda3,rkrolika3@elpais.com,true,49,40952.75,203.3,Puce +365,hbeasanta4,kthrippa4@huffingtonpost.com,true,38,3459.39,222.8,Purple +366,jtheurera5,sfeviera5@naver.com,false,74,59931.1,9.6,Violet +367,trickera6,mfinniea6@elpais.com,false,38,87301.7,122.7,Turquoise +368,dleana7,loflahertya7@sakura.ne.jp,false,81,65030.51,30.6,Orange +369,bduraka8,aasletta8@artisteer.com,true,26,60213.51,232.0,Khaki +370,mblodga9,fescota9@geocities.jp,true,29,84100.4,197.9,Pink +371,rgadieaa,mperrataa@reddit.com,true,56,-2741.1,101.6,Indigo +372,hoakenfallab,divanchovab@wisc.edu,true,68,5326.02,50.9,Turquoise +373,vbuggeac,ehaggerwoodac@booking.com,true,44,86193.16,89.0,Maroon +374,rbrumhamad,bfrearsad@technorati.com,true,73,78288.5,186.9,Mauv +375,dsauntae,asiddonsae@wix.com,true,23,78866.35,160.6,Fuscia +376,msalkildaf,gerridgeaf@shareasale.com,false,47,67684.6,143.2,Turquoise +377,cdelaneyag,tmassonag@indiegogo.com,true,96,22564.78,66.7,Red +378,mondracekah,drubanenkoah@flickr.com,false,82,88382.87,5.7,Mauv +379,gmumai,tdobellai@usnews.com,true,13,22103.78,41.2,Purple +380,btondeuraj,rhollowaj@seesaa.net,false,57,47580.03,89.6,Orange +381,gdreinanak,bshevlaneak@oakley.com,true,29,49443.9,61.6,Yellow +382,npickardal,sloughlinal@imgur.com,false,14,65970.84,200.2,Violet +383,rjerkeam,hwithnallam@ed.gov,false,85,37582.52,38.3,Maroon +384,mhousdenan,ltunnan@princeton.edu,true,33,-1364.5,87.6,Maroon +385,afiggao,aelkinao@house.gov,true,13,79874.07,30.1,Orange +386,gjolliffap,flavenap@umich.edu,true,73,27510.64,87.9,Pink +387,kbroscombeaq,aslemmondsaq@google.cn,false,20,78309.9,12.1,Green +388,clinceyar,pfraginoar@gravatar.com,false,44,97077.5,60.7,Crimson +389,klorrieas,agrellieras@si.edu,true,74,66507.05,125.6,Violet +390,cblaslat,rmcilwrickat@umn.edu,true,60,62973.12,71.3,Violet +391,jjellemanau,kcadleau@yandex.ru,true,43,44572.72,190.2,Puce +392,tstartav,jwardlawav@jigsy.com,true,86,31839.35,193.1,Goldenrod +393,emcpakeaw,dobreenaw@businesswire.com,false,86,59105.01,96.5,Red +394,mgeogheganax,csurmeyersax@diigo.com,true,94,72436.34,16.3,Teal +395,fmitforday,creignarday@discuz.net,true,76,69619.25,210.9,Blue +396,tcrewsaz,afrowaz@privacy.gov.au,false,44,898.7,133.2,Goldenrod +397,rbicknellb0,sboothmanb0@japanpost.jp,true,78,64146.3,168.4,Goldenrod +398,cturesb1,bstrachanb1@google.pl,true,47,42103.48,19.6,Green +399,stynemouthb2,dmushettb2@princeton.edu,false,38,15531.69,170.7,Violet +400,kbrazerb3,rbethob3@marketwatch.com,false,84,80793.42,100.5,Yellow +401,nmouncherb4,kjuryb4@eepurl.com,true,67,97164.11,111.0,Turquoise +402,gcallanderb5,mmacdonoghb5@diigo.com,true,40,8068.21,37.5,Yellow +403,btorrecillab6,abrislanb6@freewebs.com,false,94,46709.31,18.1,Blue +404,mstancerb7,bgoodhayb7@ca.gov,true,45,24584.88,72.2,Blue +405,laytonb8,tsullerb8@taobao.com,true,22,84505.29,62.9,Violet +406,bmcasgillb9,aedgerb9@wired.com,false,68,90372.28,34.8,Pink +407,rvasicba,edanaherba@imdb.com,true,45,97016.41,115.2,Yellow +408,eaxtonnebb,agrigoriscubb@lycos.com,true,80,45154.03,9.3,Teal +409,mstonehambc,jstuttmanbc@clickbank.net,false,14,61716.73,52.0,Green +410,cginnalybd,bflewettbd@wp.com,false,13,44319.39,91.4,Violet +411,ncooleybe,lbisterfeldbe@sbwire.com,false,79,24488.12,18.2,Yellow +412,lganniclifftbf,isaysebf@bloomberg.com,true,69,50918.29,159.5,Crimson +413,nmorlandbg,lhirchebg@biblegateway.com,true,96,71208.1,11.2,Khaki +414,lwiggingtonbh,ksewartbh@domainmarket.com,false,72,9810.99,41.2,Fuscia +415,gcritchlowbi,aboldockbi@scribd.com,false,74,79339.5,49.7,Turquoise +416,ktwiggebj,hhaackbj@friendfeed.com,true,54,67528.23,105.8,Puce +417,bjolleybk,cmiltonbk@cbc.ca,false,14,84296.1,114.4,Yellow +418,pfaulkesbl,sbyassbl@deviantart.com,true,14,48540.93,15.5,Yellow +419,bthomingabm,rfrancillobm@diigo.com,true,31,69763.88,68.8,Crimson +420,sfidelebn,tgelsthorpebn@berkeley.edu,false,92,74216.48,99.8,Fuscia +421,ccopnarbo,jgregolibo@csmonitor.com,true,85,5272.03,195.7,Blue +422,zsimoensbp,smarchantbp@umn.edu,false,61,97579.11,56.9,Crimson +423,gbrackpoolbq,fdorrobq@desdev.cn,false,54,55674.94,226.2,Violet +424,abaishbr,amcquillenbr@foxnews.com,true,51,87600.11,11.6,Aquamarine +425,mrunhambs,eecclesharebs@sitemeter.com,false,75,14877.28,102.2,Crimson +426,gligginsbt,cdrysdallbt@archive.org,false,90,78363.69,33.0,Fuscia +427,cgoligherbu,sjedraszekbu@shutterfly.com,false,22,10308.25,40.0,Puce +428,rpincottbv,tpietzkebv@statcounter.com,false,41,51972.96,192.6,Blue +429,dbehnbw,krunciemanbw@mozilla.com,true,14,58830.53,156.3,Mauv +430,cschnitterbx,fbirchwoodbx@moonfruit.com,false,91,31120.89,188.0,Fuscia +431,arubertby,cbaglinby@unicef.org,true,96,82876.82,163.7,Pink +432,amaccomebz,wphilippardbz@etsy.com,false,46,50503.63,40.2,Orange +433,klowmassc0,rlauxmannc0@thetimes.co.uk,true,56,31769.36,33.6,Fuscia +434,evennersc1,fharnesc1@prnewswire.com,false,46,4024.85,28.4,Khaki +435,clammersc2,sdengelc2@google.com.br,false,67,70758.56,106.9,Goldenrod +436,fguytonc3,skleinschmidtc3@wiley.com,false,19,82524.39,139.6,Teal +437,clysaghtc4,nkestellc4@artisteer.com,true,63,48171.04,111.5,Crimson +438,gheretyc5,cpriskc5@rediff.com,true,77,41566.35,70.5,Crimson +439,rmacfaydenc6,jjanoschekc6@seattletimes.com,false,32,72329.47,0.8,Mauv +440,gcaselickc7,dantrimc7@usnews.com,true,36,55473.45,49.8,Khaki +441,lhalfacreec8,adownc8@biblegateway.com,false,55,91352.59,87.0,Purple +442,hcampionec9,cjanjusevicc9@i2i.jp,false,94,5622.52,194.4,Teal +443,acreadyca,prubkeca@biglobe.ne.jp,false,24,98545.91,237.6,Maroon +444,gbluescb,ahuntingdoncb@sciencedirect.com,true,43,48213.68,233.5,Indigo +445,eworleycc,fthainecc@unesco.org,true,27,91966.14,2.1,Yellow +446,mdellentycd,fgirodincd@zdnet.com,true,27,67506.28,119.6,Maroon +447,cmeadmorece,hcoleiroce@spiegel.de,true,64,62494.96,126.9,Blue +448,breincf,mpaulatcf@shop-pro.jp,true,13,3586.78,5.1,Maroon +449,smcgrillcg,mmackinnoncg@google.ru,false,27,84078.07,91.9,Aquamarine +450,etinghillch,vgablerch@opensource.org,true,90,66555.31,45.9,Teal +451,vsnarci,mvaseyci@xing.com,false,55,44496.92,12.3,Teal +452,alesekcj,nrosenthalcj@ox.ac.uk,true,14,84908.63,49.4,Puce +453,dvangiffenck,preekieck@tamu.edu,false,86,-4656.46,133.3,Aquamarine +454,dblowickcl,dhandkecl@cmu.edu,false,53,83013.4,161.3,Violet +455,kavocm,rplottcm@prnewswire.com,true,91,57038.88,233.5,Purple +456,xteaguecn,amcloughlincn@mit.edu,true,41,24449.76,102.1,Goldenrod +457,peldridgeco,jkermonco@examiner.com,false,83,24252.27,43.4,Blue +458,lreamancp,ogoakscp@indiegogo.com,false,85,13434.49,124.0,Mauv +459,lgasquoinecq,aleececq@skype.com,true,22,75570.45,30.3,Red +460,jpepperillcr,ddunhillcr@kickstarter.com,true,92,94767.55,211.6,Purple +461,pnabbscs,balkercs@cmu.edu,true,95,27097.71,60.5,Orange +462,ggraceyct,senglishct@newyorker.com,true,99,54343.78,12.1,Khaki +463,kdastcu,hfraynecu@free.fr,false,86,14267.99,56.9,Turquoise +464,kkrolmancv,kmarcamcv@drupal.org,false,30,20297.42,89.8,Aquamarine +465,tfustcw,sburchcw@umn.edu,false,59,84527.6,55.0,Pink +466,dmcclarycx,cjoblincx@liveinternet.ru,false,63,59220.3,185.1,Mauv +467,pgaincy,mdivisekcy@prweb.com,false,85,71012.99,35.4,Goldenrod +468,sdamiatacz,mcresercz@sina.com.cn,false,54,99816.64,15.3,Maroon +469,kbeininckd0,ipaolillod0@pcworld.com,false,45,-4173.66,191.1,Aquamarine +470,edavenalld1,kshoried1@newsvine.com,true,82,1277.98,157.5,Maroon +471,aduchenned2,mhillhoused2@163.com,true,36,26023.31,133.0,Indigo +472,jverillod3,dsawerd3@youtube.com,true,64,3535.4,191.8,Teal +473,hpolkinghorned4,btuffeyd4@elegantthemes.com,false,86,36677.58,32.0,Fuscia +474,bbarfieldd5,mbellissd5@mashable.com,false,15,58698.35,83.9,Turquoise +475,gstaveleyd6,nmcgrirld6@sourceforge.net,false,28,80702.44,179.9,Mauv +476,ccozbyd7,kmckeaneyd7@geocities.jp,true,39,82585.43,211.4,Maroon +477,fswalteridged8,wcoitd8@chron.com,false,99,7611.06,172.1,Yellow +478,ahigbind9,gyarrelld9@weebly.com,false,39,71771.24,90.7,Yellow +479,asesonda,kcarsbergda@meetup.com,true,63,26044.56,191.6,Green +480,rlavistedb,jpearceydb@ycombinator.com,true,13,50080.28,31.0,Puce +481,gpigottdc,ekirkwooddc@japanpost.jp,false,87,69816.31,88.4,Fuscia +482,cbacklerdd,ochilcottdd@bloglines.com,true,53,35472.31,6.8,Fuscia +483,kbalasde,featde@senate.gov,false,92,53611.51,103.3,Violet +484,ffelipdf,bkiesseldf@phoca.cz,false,93,7678.89,12.5,Indigo +485,qgimenezdg,hburghalldg@mashable.com,false,30,3030.1,39.9,Maroon +486,rjobeydh,dvanesdh@foxnews.com,true,82,29836.2,221.7,Crimson +487,khedlingdi,kparlattdi@state.tx.us,true,52,85312.44,163.2,Orange +488,vpietrowskidj,djozefdj@ning.com,true,26,25924.48,218.6,Pink +489,sdarkedk,mhebbesdk@friendfeed.com,false,16,81693.36,90.3,Teal +490,gbanthorpedl,hbuncedl@house.gov,true,61,15400.9,46.5,Crimson +491,rhairesnapedm,kjentondm@live.com,false,37,26461.52,147.4,Orange +492,hmccaguedn,ppelmandn@chicagotribune.com,false,86,67490.87,44.7,Goldenrod +493,csandalldo,jruddindo@sogou.com,true,58,91239.02,158.2,Blue +494,dberrdp,mtripetdp@bravesites.com,false,26,61265.42,203.8,Blue +495,jbrockmandq,mstledgerdq@mozilla.com,false,96,63215.72,45.4,Goldenrod +496,dsommerlanddr,dellicedr@theguardian.com,true,76,20800.83,122.6,Mauv +497,abusbyds,wwaterstoneds@dell.com,false,93,68359.11,70.5,Teal +498,lscothorndt,dcluleedt@yale.edu,false,19,79359.34,175.2,Red +499,mlewintondu,tnutleydu@photobucket.com,false,32,-2300.81,73.0,Green +500,lelhamdv,wsobtkadv@squidoo.com,false,89,37893.73,181.8,Crimson +501,kscimonidw,lkyndreddw@behance.net,true,67,85584.98,230.6,Green +502,tavrasindx,paylesburydx@nydailynews.com,false,98,41192.29,74.8,Indigo +503,smyhilldy,kbasondy@bbc.co.uk,true,45,38736.67,143.0,Green +504,dabrahmerdz,cdagnandz@de.vu,true,62,9378.66,20.0,Red +505,kkeemere0,gbeamisse0@nytimes.com,false,98,4162.71,18.7,Fuscia +506,riacovuccie1,calejandroe1@shareasale.com,false,91,91952.04,52.6,Pink +507,sruppertze2,icolericke2@mit.edu,true,86,34428.37,62.8,Purple +508,xbeaushawe3,ssirettee3@woothemes.com,true,66,11770.33,235.1,Green +509,tpicoppe4,pderycote4@gnu.org,false,23,2189.34,103.3,Pink +510,learingeye5,mmckillope5@ezinearticles.com,false,82,88726.34,143.4,Blue +511,tletsone6,jdottrelle6@icio.us,true,37,49164.09,146.9,Red +512,dcrannye7,xcarlane7@abc.net.au,false,71,93356.56,117.1,Aquamarine +513,lmaxwalee8,pwhitakere8@tripadvisor.com,false,76,1833.27,58.5,Orange +514,csalvadore9,rbalfe9@quantcast.com,false,42,52992.26,75.4,Orange +515,llethburyea,ysoreyea@utexas.edu,false,50,11336.56,172.7,Green +516,lcalverteb,ahawkeridgeeb@nymag.com,true,83,98451.86,91.3,Green +517,atimsonec,ktozerec@examiner.com,true,88,8666.01,0.7,Pink +518,tloffilled,gkenforded@unblog.fr,false,33,32990.38,52.0,Blue +519,smelchioree,atunnacliffeee@topsy.com,true,94,91006.84,181.3,Green +520,hbeaversef,swildsmithef@123-reg.co.uk,true,13,66133.92,26.0,Mauv +521,nmatteinieg,sbatecokeg@economist.com,false,98,77972.77,37.1,Turquoise +522,fwakeeh,dadanezeh@dot.gov,false,15,64248.62,9.4,Yellow +523,creggianiei,bstrugnellei@ocn.ne.jp,false,78,34758.36,126.8,Red +524,gkiggelej,nfarloweej@disqus.com,true,96,42426.05,132.9,Violet +525,jcasswellek,agerwoodek@diigo.com,false,40,19057.94,82.9,Turquoise +526,ahallingel,hamerighiel@wunderground.com,true,13,54084.8,22.7,Mauv +527,mlagaduem,amacknockiterem@elpais.com,false,68,863.69,179.8,Aquamarine +528,bboddingtonen,aizakofen@huffingtonpost.com,false,73,68249.86,64.3,Mauv +529,ceyameo,wtibbettseo@aol.com,false,88,13860.6,62.0,Crimson +530,bprosekep,hibbisonep@hao123.com,false,50,10094.89,132.8,Crimson +531,gmckeveneyeq,bbyreseq@cyberchimps.com,false,94,4845.16,237.6,Purple +532,ctwidaleer,dkurtener@so-net.ne.jp,true,57,93976.79,10.6,Red +533,dnotoes,blembrickes@huffingtonpost.com,false,79,14319.39,217.5,Aquamarine +534,jsoaperet,hredleyet@canalblog.com,false,24,19354.76,169.8,Khaki +535,kgleweu,bdenyukineu@51.la,false,22,35109.3,233.3,Mauv +536,kyareev,ldearanev@archive.org,false,76,43341.02,140.4,Pink +537,nmannersew,rlongworthyew@sourceforge.net,true,61,69056.45,150.3,Fuscia +538,bpendrichex,jdamarellex@godaddy.com,true,59,18328.73,77.8,Purple +539,llinfordey,jfenderey@slate.com,true,80,27543.27,173.0,Crimson +540,bcorderyez,wgrundez@hugedomains.com,true,89,28790.81,4.5,Crimson +541,bdabneyf0,adartnallf0@simplemachines.org,false,44,2838.02,188.7,Red +542,ahuguetf1,marstingallf1@spiegel.de,true,79,2149.74,37.0,Teal +543,reskrietf2,jmurleyf2@nyu.edu,false,61,17501.06,52.7,Mauv +544,hfaleyf3,btwinnf3@istockphoto.com,true,32,20482.91,77.1,Mauv +545,kconveryf4,btolmanf4@biglobe.ne.jp,true,22,36218.53,103.2,Turquoise +546,ggoffef5,jstpierref5@paginegialle.it,false,70,17740.9,171.1,Green +547,dmedwayf6,mpaddlef6@hubpages.com,true,42,69034.54,225.7,Orange +548,dstandingf7,wfarlowef7@ebay.com,false,74,98539.49,12.1,Turquoise +549,abagleyf8,scasseyf8@bravesites.com,true,90,68274.04,33.1,Mauv +550,bmenlowf9,cdelgardillof9@wikipedia.org,false,85,67469.33,128.1,Maroon +551,ssambrookfa,sgeckefa@phpbb.com,false,37,56617.84,132.8,Orange +552,hwalworthfb,kkleintfb@fastcompany.com,true,68,37200.79,2.9,Violet +553,ghamperfc,lmaccathayfc@shop-pro.jp,false,72,58659.9,67.7,Puce +554,pkoresfd,bwippermannfd@chronoengine.com,true,35,55984.59,216.8,Turquoise +555,ldyetfe,aclemofe@example.com,true,25,12718.57,227.6,Aquamarine +556,ebogueff,kbenfieldff@blogs.com,true,20,56766.15,235.9,Pink +557,cmaynefg,ssomertonfg@va.gov,false,27,3308.83,131.1,Fuscia +558,ftreagusfh,mgrishukovfh@furl.net,true,75,-4161.96,113.6,Violet +559,wlacostefi,svolantefi@sourceforge.net,true,70,33537.64,142.5,Red +560,jogeneayfj,jgoodafj@squidoo.com,false,85,33917.02,35.1,Fuscia +561,npashenfk,meveringhamfk@zimbio.com,true,65,69465.96,76.6,Indigo +562,fshawlfl,bgarveyfl@vinaora.com,false,67,95207.61,153.8,Crimson +563,rsultanfm,atulleyfm@github.io,true,57,58044.47,14.4,Yellow +564,slukeschfn,iscogganfn@who.int,false,22,73806.5,156.1,Red +565,cpotteridgefo,narentsfo@infoseek.co.jp,false,77,63771.41,236.2,Mauv +566,vjanotafp,runwinfp@ehow.com,true,81,61250.82,198.1,Orange +567,vbrodeaufq,acouroyfq@un.org,true,63,57957.38,186.3,Goldenrod +568,dmaciejafr,jbandierafr@printfriendly.com,false,57,30884.44,101.0,Pink +569,togrowganefs,wrallinshawfs@hubpages.com,true,19,49263.3,17.1,Yellow +570,acocklandft,mmakeyft@va.gov,false,94,9313.52,195.9,Turquoise +571,tgeraldfu,rcuttellfu@bigcartel.com,true,50,87898.58,158.2,Indigo +572,fjosupeitfv,jpelerinfv@tripod.com,false,41,27115.87,126.8,Green +573,bbuxceyfw,ailesfw@topsy.com,true,63,41515.85,117.1,Puce +574,msinkinsfx,ttrusslerfx@seattletimes.com,true,65,72000.4,191.5,Purple +575,adraudefy,ytusonfy@zdnet.com,true,60,88127.41,230.1,Blue +576,amannevillefz,nfeedhamfz@macromedia.com,false,50,65633.21,95.7,Pink +577,mvogelg0,bchalcroftg0@qq.com,false,34,50920.56,18.5,Yellow +578,lorring1,gfaccinig1@photobucket.com,true,21,64347.95,132.6,Pink +579,arudyardg2,akomorowskig2@netvibes.com,false,13,4256.31,170.7,Red +580,mbrundleg3,ebartolettig3@twitter.com,false,63,60552.96,131.6,Teal +581,fglasscoog4,ndaughtryg4@dagondesign.com,true,67,11550.21,81.9,Violet +582,gpampling5,mjenseng5@thetimes.co.uk,true,89,86453.75,149.0,Pink +583,sespinag6,abrundallg6@symantec.com,false,94,65972.88,120.0,Green +584,emckerrong7,smillwallg7@mit.edu,true,88,45074.19,124.3,Violet +585,hpetzg8,ebrendelg8@gmpg.org,false,37,92724.01,133.8,Maroon +586,lsiddaleyg9,caxupg9@cisco.com,false,51,67792.0,108.5,Mauv +587,fministerga,ccroosega@quantcast.com,true,84,65051.04,133.2,Teal +588,creekiegb,ccarpmilegb@blogtalkradio.com,true,51,31816.56,54.8,Indigo +589,rleahygc,ejosselgc@parallels.com,false,51,81587.34,66.5,Maroon +590,mantczakgd,drameletgd@patch.com,false,41,-3646.83,81.6,Purple +591,sdorricottge,ceveringhamge@yandex.ru,true,53,41317.45,143.2,Mauv +592,wvedeneevgf,cevendengf@engadget.com,false,49,49402.97,232.4,Khaki +593,fgorsegg,cplattgg@blogspot.com,true,35,47432.17,20.0,Orange +594,alapidusgh,awridegh@indiatimes.com,false,20,78788.0,179.6,Pink +595,tcosgrovegi,gnorthcotegi@cloudflare.com,false,31,28108.13,212.9,Red +596,gelacoategj,adebiasigj@washingtonpost.com,false,58,62853.69,66.1,Indigo +597,gmarplesgk,mwyldgk@yahoo.co.jp,true,54,49065.95,158.6,Puce +598,dbussellgl,imatysgl@google.de,true,90,7724.75,8.5,Mauv +599,dbuessengm,bfolkardgm@surveymonkey.com,false,49,48455.57,233.9,Puce +600,gchavegn,vmilesopgn@jimdo.com,true,99,13469.13,184.1,Yellow +601,jinglesfieldgo,dcolcombgo@linkedin.com,true,45,1004.92,6.2,Teal +602,mmumfordgp,kmountergp@archive.org,false,28,-4416.49,121.5,Indigo +603,bmoricangq,wmcgeochgq@wordpress.org,false,17,12077.01,14.4,Violet +604,bfinlaisongr,acliffordgr@unicef.org,false,89,89849.04,107.0,Maroon +605,mpettettgs,tvadergs@issuu.com,true,96,42486.98,148.5,Green +606,ldoggettgt,ceallisgt@illinois.edu,false,61,43463.64,99.6,Green +607,ilarradgu,bstaplesgu@unc.edu,true,60,49913.31,157.4,Teal +608,hlivesaygv,pdowleygv@studiopress.com,true,13,7664.88,93.2,Goldenrod +609,svoasgw,rpetraccigw@shareasale.com,false,29,40766.57,75.7,Mauv +610,ncouthgx,bglendinninggx@hexun.com,false,99,79519.59,21.9,Goldenrod +611,ogallandersgy,ldruettgy@chron.com,false,96,13354.06,131.9,Goldenrod +612,vescreetgz,slowdyanegz@yahoo.com,false,88,40116.08,239.8,Goldenrod +613,rforesighth0,jdanneh0@devhub.com,false,97,79150.18,78.3,Orange +614,gbradburneh1,wsebireh1@google.pl,false,66,37487.31,193.7,Aquamarine +615,tbessh2,jfoulkesh2@nasa.gov,true,68,65805.53,136.1,Aquamarine +616,hbraveryh3,btalmanh3@apple.com,true,94,48039.97,65.4,Crimson +617,bharomeh4,jbarchrameevh4@dedecms.com,true,67,5737.12,132.0,Crimson +618,rbarrassh5,aholyardh5@youtu.be,false,91,63675.2,87.3,Goldenrod +619,rpurkinsh6,acleevesh6@4shared.com,true,23,10570.63,30.3,Aquamarine +620,hshapirah7,mlengthornh7@bluehost.com,true,15,5091.39,126.0,Khaki +621,mgallemoreh8,mdaveridgeh8@bigcartel.com,false,51,39541.9,53.5,Red +622,jtremeerh9,rmoulsdaleh9@microsoft.com,false,41,3757.36,209.6,Goldenrod +623,mmccarlha,iwehnerha@posterous.com,false,49,90839.15,168.9,Crimson +624,sbarkshb,dduxbarryhb@nature.com,false,88,89436.71,47.7,Teal +625,scrusehc,ddykashc@wikimedia.org,false,94,67142.77,12.4,Khaki +626,apharrohd,scantihd@drupal.org,true,18,39180.88,108.4,Mauv +627,hdurwardhe,dhammerbergerhe@seattletimes.com,false,86,71736.66,126.7,Pink +628,erousthf,lelixhf@who.int,false,56,58459.94,34.9,Fuscia +629,mdavidofhg,rspellarhg@usgs.gov,false,38,61614.1,89.9,Turquoise +630,ttuxellhh,mspatarihh@feedburner.com,false,78,51309.61,131.5,Violet +631,dbalehi,rroomehi@cpanel.net,true,86,62390.84,199.1,Aquamarine +632,fbugdallhj,gmartyhj@about.me,false,33,2007.57,53.7,Yellow +633,enatthk,nstebbinshk@hibu.com,true,37,50801.2,60.0,Crimson +634,sdrinkallhl,cmosebyhl@youtube.com,false,74,51150.22,144.6,Maroon +635,dsmallshawhm,randrieuhm@gravatar.com,false,56,85903.73,196.3,Yellow +636,nwinnetthn,modwyerhn@exblog.jp,true,63,32291.14,122.5,Turquoise +637,shelstromho,cduckhamho@google.co.uk,false,33,97661.21,85.5,Blue +638,ckeelyhp,kpinnockhp@statcounter.com,false,81,15296.61,128.7,Teal +639,dgennerichhq,tpietrowiczhq@nasa.gov,true,85,77357.36,50.8,Orange +640,jsimionihr,rpetriehr@examiner.com,true,35,19017.44,128.5,Aquamarine +641,kespinahs,icosgriffhs@webnode.com,true,14,6159.76,207.7,Turquoise +642,pdurradht,agoodwellht@canalblog.com,false,62,59773.15,123.4,Indigo +643,sjapphu,hcrichhu@godaddy.com,false,86,11573.15,44.1,Indigo +644,narringtonhv,jcoslitthv@stanford.edu,false,37,30502.54,96.5,Purple +645,kpoolmanhw,igamilhw@ning.com,false,56,56780.97,162.7,Crimson +646,skobierieckihx,jbyhx@usatoday.com,false,85,41612.15,79.1,Pink +647,icouthhy,csarllhy@go.com,true,54,31181.74,89.7,Maroon +648,jjoselandhz,dboarderhz@house.gov,false,36,80954.36,86.9,Yellow +649,irichmondi0,fklimushevi0@msu.edu,false,42,37682.61,234.9,Maroon +650,ssheffieldi1,lschlagtmansi1@dyndns.org,true,52,-3665.29,166.6,Orange +651,fmulcahyi2,ehavardi2@ihg.com,true,49,41254.87,44.2,Crimson +652,djouannyi3,sbrommagei3@google.cn,false,60,71442.22,132.4,Khaki +653,sroughi4,dorrilli4@virginia.edu,true,55,97207.32,18.4,Fuscia +654,emalamorei5,hharnelli5@harvard.edu,false,19,1795.59,43.7,Turquoise +655,zbitteni6,gbeedomi6@flickr.com,false,59,-364.24,157.1,Orange +656,idittsoni7,gdebruini7@reuters.com,false,43,41980.16,179.8,Maroon +657,mshirili8,gdamanti8@hud.gov,false,35,84967.11,28.4,Aquamarine +658,jsawnwyi9,hskipperi9@columbia.edu,false,89,46970.78,18.4,Goldenrod +659,hsulteria,odunnania@privacy.gov.au,false,86,14882.28,73.6,Goldenrod +660,mfoatib,gclouterib@archive.org,false,45,582.32,171.5,Blue +661,pogleasaneic,vfavellic@blogtalkradio.com,true,86,39702.75,14.2,Blue +662,vjuraid,oauselloid@csmonitor.com,false,92,16612.18,106.2,Mauv +663,ntoweie,jchrstineie@tinypic.com,false,67,36379.08,68.2,Khaki +664,mgossingtonif,sbeiningif@last.fm,true,52,79030.26,14.3,Fuscia +665,cdhoogheig,aflattmanig@goo.ne.jp,true,58,77394.03,97.2,Fuscia +666,ccochraneih,cfeaveryearih@dell.com,true,83,70191.19,6.9,Violet +667,eabrahamowitczii,odigiacomettinoii@google.pl,false,33,4397.91,198.4,Orange +668,cmaginnij,waberkirderij@plala.or.jp,false,24,1057.09,88.1,Purple +669,jpetranik,cdawltreyik@census.gov,true,44,63915.45,8.6,Pink +670,eswadlingil,wwhytemanil@europa.eu,true,56,71911.47,190.2,Maroon +671,eshemeltim,kwanderschekim@networkadvertising.org,false,45,44066.59,212.0,Red +672,fmolandin,chassallin@dailymail.co.uk,true,22,14561.19,238.5,Blue +673,nwillicottio,despinolaio@edublogs.org,false,86,32655.61,50.6,Goldenrod +674,lcarlisleip,pyesichevip@wisc.edu,false,66,75660.77,30.3,Pink +675,lalessandriniiq,cransoniq@youtu.be,false,74,47901.36,39.4,Fuscia +676,rholtawayir,nnetleyir@howstuffworks.com,true,77,32315.92,144.8,Blue +677,aprandiis,esibbsonis@upenn.edu,false,87,99274.22,102.8,Puce +678,tleetit,ftawtonit@flavors.me,true,20,77658.35,185.3,Orange +679,nstollmeieriu,bdearaniu@yahoo.com,false,19,63735.29,3.8,Mauv +680,mmcfieiv,hdykaiv@cbc.ca,true,84,85061.62,173.9,Fuscia +681,corwiniw,fbristeriw@biglobe.ne.jp,false,45,58073.98,218.6,Blue +682,vaddionizioix,abaudouxix@wp.com,true,67,86234.95,207.4,Yellow +683,hurlichiy,jkenneaiy@angelfire.com,false,85,51750.87,136.6,Purple +684,kkidstoniz,rmanlowiz@sitemeter.com,false,19,9916.06,28.2,Yellow +685,bginityj0,rhillyattj0@reuters.com,true,15,41133.12,216.9,Pink +686,jlauxj1,bpohlsj1@about.me,false,48,70224.36,172.8,Mauv +687,ttillardj2,shandkej2@netscape.com,false,81,22207.06,127.5,Purple +688,dvogellerj3,ewohlerj3@latimes.com,true,42,2866.52,52.5,Yellow +689,cbullierj4,mbrafieldj4@123-reg.co.uk,true,53,97194.09,61.9,Fuscia +690,nmalafej5,mkristoffersenj5@booking.com,true,42,93893.88,14.1,Red +691,nmeritonj6,myuj6@geocities.com,false,20,73740.27,166.9,Yellow +692,cfilkinj7,jcaplanj7@slideshare.net,true,90,27306.55,181.3,Goldenrod +693,thullyj8,wdanilowiczj8@zdnet.com,true,83,60098.64,117.3,Puce +694,mharnettyj9,zgooddiej9@nasa.gov,true,22,61067.9,237.6,Aquamarine +695,csielyja,kezzleja@cnn.com,true,43,77517.54,89.9,Orange +696,lkellochjb,lpatshulljb@google.com.br,false,21,37309.43,49.7,Violet +697,crussonjc,jruddomjc@e-recht24.de,true,96,27214.97,95.7,Turquoise +698,lbernardeaujd,mbrestonjd@studiopress.com,true,62,62163.5,171.4,Violet +699,lhousecroftje,kkeepeje@vinaora.com,true,73,64899.23,100.1,Turquoise +700,mflemmingjf,jferaghjf@bluehost.com,true,92,86381.37,151.1,Crimson +701,cbeaufoyjg,ageldertjg@washington.edu,false,86,10538.88,69.8,Blue +702,bstuckjh,hridewoodjh@ed.gov,false,13,41191.55,212.3,Pink +703,jmcgoochji,ecrebbinji@ibm.com,true,47,78938.47,210.5,Teal +704,pkernleyjj,clysterjj@vistaprint.com,false,45,58732.87,140.8,Blue +705,jweedonjk,awalkerjk@foxnews.com,false,26,78719.54,148.6,Maroon +706,mfluinjl,lbrockingtonjl@sun.com,true,92,50186.14,116.6,Teal +707,atwelljm,hbroxisjm@barnesandnoble.com,false,86,41574.6,147.7,Goldenrod +708,lschenkeljn,chadawayjn@mediafire.com,true,82,16726.39,164.3,Puce +709,swarbysjo,fgratlandjo@msu.edu,true,54,88648.86,44.8,Crimson +710,lsnodinjp,gbengtssonjp@networksolutions.com,true,84,71325.02,66.1,Aquamarine +711,rbiddlestonejq,bdosdelljq@webmd.com,false,61,54819.81,124.6,Crimson +712,goconcannonjr,ceddyjr@sohu.com,false,79,32417.97,20.0,Mauv +713,kcorderojs,achingedehalsjs@odnoklassniki.ru,true,26,76691.29,138.6,Fuscia +714,kmarkwelljt,lcheesleyjt@nymag.com,false,99,21081.63,67.2,Teal +715,jrimingtonju,bswalteridgeju@spotify.com,true,23,63174.74,156.8,Red +716,kyeliashevjv,stalmadgejv@multiply.com,false,83,-3340.18,213.5,Indigo +717,rboughtonjw,dhousdenjw@dedecms.com,true,64,40979.68,104.8,Fuscia +718,wbegwelljx,uposthilljx@xing.com,true,86,89664.13,193.8,Teal +719,ahalltjy,earonowiczjy@ameblo.jp,false,44,31149.9,158.3,Indigo +720,cmullisjz,igasnelljz@bizjournals.com,true,42,33540.98,227.0,Fuscia +721,beastmank0,mtoothillk0@issuu.com,false,75,74087.41,78.9,Purple +722,srangeleyk1,bmatthewmank1@geocities.com,true,60,45820.23,74.8,Indigo +723,lowensk2,mdegnenk2@apache.org,true,54,98564.02,125.0,Khaki +724,gmockfordk3,ydaviotk3@domainmarket.com,false,55,26509.13,141.5,Aquamarine +725,hcatherinek4,vbickfordk4@artisteer.com,true,83,59087.25,2.1,Goldenrod +726,eparcallk5,gstoresk5@comcast.net,false,68,53765.82,117.4,Maroon +727,kthringk6,sbainbridgek6@adobe.com,true,93,8724.04,189.9,Teal +728,jmeggisonk7,mfebvrek7@webs.com,true,33,56904.29,201.7,Yellow +729,tdeeveyk8,fweirk8@merriam-webster.com,false,58,30724.23,44.8,Crimson +730,amorgenk9,imcallasterk9@umn.edu,false,58,65230.09,193.4,Blue +731,sbengtssonka,mkhristoforovka@reverbnation.com,true,17,36651.28,178.2,Green +732,glilleykb,cdifranceschikb@cloudflare.com,true,53,17126.99,170.8,Yellow +733,dwellbelovedkc,ahatchellkc@dailymotion.com,false,83,70630.56,71.0,Crimson +734,handraudkd,mstiantkd@mashable.com,true,67,5945.12,119.6,Red +735,gdouseke,grallingke@netscape.com,true,36,37991.89,162.3,Indigo +736,sgerreykf,gdriffekf@blinklist.com,true,19,73181.22,173.5,Indigo +737,zlitzmannkg,zatcherleykg@qq.com,true,54,4223.91,5.0,Violet +738,mferrarokh,bbennekh@earthlink.net,true,40,23135.73,207.3,Goldenrod +739,abarclayki,kglantzki@house.gov,true,67,99852.81,59.3,Mauv +740,dtwinningkj,nfreiburgerkj@irs.gov,true,72,92527.26,226.3,Mauv +741,svanderkruijskk,djoskovitchkk@fema.gov,true,63,65954.96,34.4,Crimson +742,sseinekl,blissandrinikl@blinklist.com,true,24,15488.9,48.7,Puce +743,mroddamkm,nwaddellkm@yale.edu,false,70,87047.79,65.2,Khaki +744,rboorkn,zveschambeskn@sogou.com,false,61,82878.07,75.2,Orange +745,kruddomko,sbandeyko@ow.ly,true,48,50560.12,165.6,Red +746,cgiberdkp,ksarsonkp@icio.us,false,22,67979.25,76.7,Blue +747,swhitehallkq,ojustunkq@prweb.com,true,94,16531.31,138.0,Goldenrod +748,ylaylandkr,cannakinkr@google.cn,false,52,4113.22,224.0,Green +749,rpietaschks,bmccuffieks@msn.com,false,26,91627.92,109.0,Turquoise +750,mmiddlemasskt,jscadingkt@yale.edu,true,43,50070.06,16.8,Mauv +751,ghuggardku,rosgarbyku@free.fr,false,97,84868.77,136.5,Green +752,fsealeafkv,egutridgekv@google.ca,false,78,80357.62,29.8,Teal +753,mstandidgekw,tclowtonkw@craigslist.org,true,60,52510.57,79.6,Khaki +754,fsavillekx,cskeatskx@cafepress.com,false,98,54177.86,55.4,Purple +755,fdreakinky,ycarloneky@jalbum.net,true,37,54960.14,7.0,Mauv +756,fcatterickkz,gkwietakkz@simplemachines.org,false,21,5613.0,66.4,Green +757,dvanhalenl0,ohayerl0@fotki.com,false,22,58864.42,13.5,Turquoise +758,bhubbuckel1,sdavidofskil1@archive.org,false,22,37263.07,216.5,Pink +759,jroparsl2,crodliffl2@npr.org,true,51,53206.93,212.7,Green +760,mfennellyl3,ebogacel3@i2i.jp,false,78,4464.91,5.7,Maroon +761,mglantonl4,ejestl4@reverbnation.com,true,26,1767.14,206.0,Indigo +762,qmontfortl5,aalbonel5@4shared.com,false,42,59088.77,140.3,Violet +763,lgyerl6,jmaccurleyl6@eventbrite.com,false,22,19183.33,125.9,Indigo +764,mhadgkissl7,lcansdalel7@wisc.edu,false,88,97933.34,171.6,Turquoise +765,cdodsl8,sdydel8@globo.com,true,48,8972.11,78.2,Orange +766,khadcroftl9,lfilchaginl9@tinypic.com,false,39,87113.59,122.8,Indigo +767,agiblinla,cjudgkinsla@dell.com,true,77,94061.18,141.5,Crimson +768,dhurllb,damylb@epa.gov,false,43,4687.76,38.7,Mauv +769,shanscombelc,cpidgeleylc@cmu.edu,false,66,-4321.45,194.5,Turquoise +770,aashdownld,mhuntingld@jugem.jp,true,23,61671.86,225.4,Aquamarine +771,rportinarile,mlerwillle@studiopress.com,true,63,63822.19,147.6,Turquoise +772,msaylorlf,lcurnocklf@who.int,true,86,60550.42,73.2,Green +773,jsparwaylg,kjacqueminotlg@canalblog.com,true,41,50514.33,90.0,Yellow +774,mlegerwoodlh,rporsonlh@toplist.cz,true,33,97239.82,159.6,Teal +775,mmecchili,nparrishli@amazon.de,false,68,91486.93,79.0,Mauv +776,dkilleenlj,thamblinglj@washington.edu,true,64,69715.69,22.5,Violet +777,omosconelk,jgreensideslk@constantcontact.com,true,44,5241.46,110.0,Teal +778,tbratcherll,tandrivelll@simplemachines.org,true,98,20618.77,142.9,Teal +779,twanelm,hhentzelm@ovh.net,false,27,22452.95,21.6,Aquamarine +780,ngegayln,abassomln@vimeo.com,false,63,70329.93,201.4,Khaki +781,sottosenlo,aerleylo@techcrunch.com,false,40,20516.77,162.3,Violet +782,kmeadleylp,sgiottoilp@i2i.jp,true,69,15457.58,83.8,Purple +783,jgatfieldlq,emohanlq@tinypic.com,false,24,69864.91,174.8,Turquoise +784,bgabbotlr,adenhamlr@whitehouse.gov,true,87,97852.75,108.5,Fuscia +785,chileyls,idurantels@kickstarter.com,false,63,26897.56,153.0,Violet +786,bfawkeslt,hlonghirstlt@nyu.edu,true,83,42854.45,7.2,Puce +787,abygrovelu,jlainlu@goo.ne.jp,false,93,1715.17,222.8,Yellow +788,cscranneylv,mswaytonlv@imageshack.us,false,37,39586.8,173.6,Goldenrod +789,bswetmanlw,mlittledykelw@shareasale.com,true,13,93620.79,25.6,Pink +790,rcastellx,mjenynslx@cbsnews.com,false,64,97502.03,151.7,Indigo +791,awhildely,zfidgely@si.edu,true,86,10900.19,96.0,Purple +792,pspinagelz,isommertonlz@umn.edu,false,89,34495.95,167.2,Aquamarine +793,daylmorem0,rasburym0@cmu.edu,true,85,51662.45,119.8,Puce +794,rrabsonm1,nvandenveldenm1@ucsd.edu,true,49,99877.75,231.4,Mauv +795,jcorneliusm2,louldcottm2@latimes.com,true,18,34783.14,72.5,Indigo +796,lmcsporonm3,ajentetm3@instagram.com,false,45,10515.56,142.7,Green +797,hchappelm4,jwinsparem4@reference.com,false,63,79383.73,41.9,Khaki +798,mroanm5,hlafayem5@illinois.edu,true,16,85865.88,185.5,Aquamarine +799,cscollardm6,etejerom6@msu.edu,true,77,75096.41,233.8,Green +800,vbelonem7,cmenurem7@nature.com,false,93,86735.4,57.5,Violet +801,rgiacobbom8,gsorsbym8@nifty.com,true,42,21567.63,118.9,Indigo +802,hcuffm9,mgrafhomm9@clickbank.net,true,59,47064.47,194.6,Red +803,sdeverilma,asibsonma@freewebs.com,true,99,70659.88,59.4,Mauv +804,nrowberrymb,cmercermb@yellowpages.com,true,20,40618.69,110.6,Indigo +805,jshavelmc,tcowinmc@wikispaces.com,true,91,53367.65,11.5,Maroon +806,bswalemd,tobraymd@foxnews.com,false,88,76575.62,23.5,Teal +807,tpolendineme,rrumgayme@bravesites.com,false,77,80167.6,222.7,Khaki +808,tredfernmf,jtrustriemf@phoca.cz,false,29,94929.88,158.4,Turquoise +809,oegintonmg,abolinomg@admin.ch,false,95,38729.62,210.9,Green +810,mcondliffemh,celsburymh@github.io,true,27,12947.55,90.5,Orange +811,emenarymi,rcurnokkmi@nps.gov,true,99,43420.32,191.0,Maroon +812,cbamfieldmj,ccrawforthmj@exblog.jp,false,64,-3111.18,199.8,Green +813,ndudlestonmk,creveleymk@ehow.com,true,88,91535.3,69.3,Red +814,nmcenteeml,amitkcovml@dmoz.org,false,58,22965.14,120.4,Maroon +815,ggarrymm,wvanyukovmm@tmall.com,true,25,20543.95,131.9,Puce +816,chandasidemn,amickleburghmn@amazon.co.uk,true,58,96068.68,193.5,Green +817,tfenbymo,cbowerbankmo@state.gov,true,57,86420.45,70.3,Crimson +818,hfeeneymp,mkilkennymp@reuters.com,true,26,13832.73,220.7,Yellow +819,cpeddelmq,wwedgemq@webeden.co.uk,false,15,-1753.01,141.1,Green +820,tadamecmr,tcosgreavemr@dmoz.org,true,62,54733.3,46.8,Purple +821,fbegginims,cpedicanms@dell.com,false,92,26331.35,152.3,Yellow +822,bzannotellimt,drillattmt@exblog.jp,true,98,53088.31,91.4,Mauv +823,bwildmanmu,sinsealmu@shareasale.com,true,27,33382.88,16.4,Yellow +824,tdunsmoremv,ztrayesmv@posterous.com,true,29,15765.88,70.0,Puce +825,cmoxonmw,hbenionmw@yellowbook.com,false,26,74238.43,92.0,Aquamarine +826,tkoppeckmx,ebemlottmx@noaa.gov,false,84,29802.32,91.6,Mauv +827,achampneysmy,kmullendermy@acquirethisname.com,true,82,68007.28,151.7,Blue +828,msainsburymz,auzzellmz@spiegel.de,true,13,44445.82,234.1,Yellow +829,tpiwelln0,yburchalln0@edublogs.org,true,87,72267.84,110.2,Khaki +830,kbainesn1,bchessumn1@paypal.com,false,55,66022.98,74.8,Orange +831,avallendern2,adesaveryn2@hubpages.com,true,33,-202.52,51.3,Goldenrod +832,zhanneyn3,dbougourdn3@posterous.com,false,90,94985.52,77.9,Red +833,mgahann4,lsilversmidn4@trellian.com,false,95,38053.25,239.0,Puce +834,gleverettn5,acharlickn5@about.me,true,23,68019.86,91.9,Fuscia +835,sburberyen6,kmickann6@google.it,false,43,23025.1,137.7,Fuscia +836,dlindupn7,jbullersn7@uol.com.br,false,79,13973.81,217.5,Violet +837,aklousnern8,furreyn8@infoseek.co.jp,true,88,92713.68,39.6,Red +838,htothn9,edrablen9@cbsnews.com,true,50,57212.07,148.3,Turquoise +839,bblaymiresna,reberzna@theatlantic.com,true,62,72365.05,184.9,Puce +840,dmcelweenb,wnelesnb@hexun.com,false,16,5898.99,48.6,Crimson +841,tbatissenc,hkowalskinc@timesonline.co.uk,true,25,59091.38,183.7,Teal +842,mgoviernd,rbaxendalend@timesonline.co.uk,false,45,25233.05,215.1,Yellow +843,ctarpeyne,lmcrobbne@dot.gov,false,96,69785.72,49.6,Yellow +844,ksquibbsnf,tkluliceknf@smugmug.com,true,92,17162.29,50.9,Red +845,fmaynellng,hhabertng@walmart.com,false,79,30896.94,0.8,Teal +846,cdunabienh,jbraznh@harvard.edu,false,98,25174.93,16.8,Pink +847,jvanhalenni,dnoddleni@google.pl,true,54,36548.43,161.4,Goldenrod +848,ohingenj,caldricknj@google.com.hk,true,89,17907.01,76.4,Crimson +849,jdruittnk,tkyngdonnk@elpais.com,false,70,10303.06,43.9,Goldenrod +850,agauntleynl,fchaikovskinl@amazon.com,true,30,36041.95,65.4,Pink +851,teatesnm,mdowyernm@indiegogo.com,false,66,77516.5,152.3,Turquoise +852,lowernn,evizornn@goo.ne.jp,false,13,80379.36,229.8,Crimson +853,schristallno,lcowderoyno@imageshack.us,true,90,-3916.31,220.0,Mauv +854,khofernp,hmerielnp@unesco.org,false,82,72738.09,116.2,Green +855,ibrolannq,rnewnq@mashable.com,true,99,64580.15,8.6,Maroon +856,rfilernr,ccliburnnr@bloglovin.com,true,97,75886.58,127.3,Violet +857,pocahsedyns,cgullyns@google.nl,false,47,66025.54,48.9,Teal +858,kbahlnt,ldwelleynt@census.gov,false,30,17006.19,59.6,Purple +859,tecknu,jrichtnu@trellian.com,false,65,84133.56,141.0,Yellow +860,akellehernv,ffeldmusnv@skyrock.com,true,68,89448.65,0.2,Indigo +861,kkaradzasnw,bdonnellannw@ucla.edu,false,64,19725.5,88.4,Fuscia +862,pjordannx,esoggeenx@harvard.edu,true,61,10697.78,9.2,Green +863,dthorringtonny,astallanny@hao123.com,false,87,92520.21,131.7,Pink +864,ceilertnz,hstockeynz@yellowbook.com,true,24,-4166.65,102.9,Pink +865,gkippaxo0,opatemano0@godaddy.com,true,58,92532.69,223.6,Crimson +866,bhuckabeo1,wgladtbacho1@fema.gov,false,48,15862.24,170.7,Violet +867,twillmetto2,welgaro2@flickr.com,false,77,54246.67,215.2,Blue +868,aaspino3,dedmetto3@examiner.com,false,52,75131.93,229.1,Yellow +869,jblissetto4,pharcombeo4@wisc.edu,false,40,77283.98,77.5,Blue +870,cskeeleso5,edelhayo5@php.net,true,31,31368.98,10.6,Aquamarine +871,dlillimano6,nkerwicko6@over-blog.com,false,59,-4976.95,146.5,Violet +872,oboutflouro7,aarthargo7@deliciousdays.com,false,72,86576.81,38.6,Turquoise +873,agrinleyo8,bstuchburieo8@topsy.com,true,70,41443.1,40.0,Maroon +874,mbrehaulto9,ofritzo9@elpais.com,false,96,96134.88,108.1,Orange +875,lderhamoa,salabastaroa@intel.com,true,24,65219.84,94.5,Violet +876,olibbeob,dbeaufoyob@auda.org.au,true,97,64431.0,118.4,Pink +877,dvereganoc,dadcocksoc@disqus.com,false,16,3380.11,13.5,Green +878,erylettod,hnesbyod@purevolume.com,true,77,93942.71,235.2,Violet +879,mbiddissoe,bprysoe@twitter.com,false,44,42211.05,68.3,Crimson +880,dclaypoleof,phowlesof@ed.gov,true,14,75530.96,123.1,Orange +881,dfreemantleog,amanisog@twitter.com,false,93,17909.85,85.1,Violet +882,botuohyoh,sgouldstrawoh@google.de,true,29,64901.12,196.9,Green +883,akimberlyoi,rmaclachlanoi@moonfruit.com,false,45,24230.08,25.5,Khaki +884,kpemberoj,rclaasenoj@senate.gov,true,53,3772.61,126.7,Purple +885,kblackbroughok,fgurnerok@japanpost.jp,false,31,66665.42,107.6,Aquamarine +886,ericciol,oyakebovitchol@mashable.com,false,91,12.72,213.1,Khaki +887,acannavanom,chalegarthom@quantcast.com,false,80,74701.54,36.5,Indigo +888,mjanssenson,thaggerstoneon@ask.com,true,89,60108.71,85.3,Red +889,etuttiettoo,bfeeheryoo@toplist.cz,true,16,-4603.13,218.9,Puce +890,mkitchenmanop,rcoyettop@sbwire.com,false,95,15250.3,151.4,Pink +891,vcarnelloq,rwallegeoq@earthlink.net,false,43,54363.86,80.5,Maroon +892,csummerscalesor,jsantorior@weibo.com,true,53,10008.12,125.8,Blue +893,esirmonos,tsurmonos@npr.org,true,67,11020.49,92.3,Teal +894,vdunfordot,jmernerot@slashdot.org,true,77,29219.85,105.4,Green +895,nbolusou,fscrannageou@samsung.com,false,63,93759.14,105.8,Blue +896,amoylerov,bharnettov@phoca.cz,true,46,6921.41,53.3,Orange +897,gferrelliow,rgallifordow@businessinsider.com,true,22,54580.64,129.2,Puce +898,lwinkleox,mwisdenox@google.nl,true,28,68138.0,81.5,Turquoise +899,tgrisardoy,kgilbeartoy@51.la,false,95,7846.12,145.1,Purple +900,kjayeoz,skyneoz@guardian.co.uk,true,78,70129.38,0.2,Goldenrod +901,smecop0,fgoodrickep0@t-online.de,false,29,-796.8,137.1,Khaki +902,ggoutp1,gdownesp1@mlb.com,true,62,92345.61,116.0,Pink +903,mwalthop2,croizp2@bbb.org,false,79,47447.26,224.8,Aquamarine +904,smerrganp3,byarmouthp3@google.it,true,77,37050.71,190.6,Aquamarine +905,rgrindlep4,llowfillp4@yandex.ru,true,66,99440.67,137.2,Pink +906,adabornp5,nconnerryp5@dot.gov,false,92,70552.57,37.6,Green +907,ggrimsdellp6,kbehrendp6@jiathis.com,true,81,89295.97,170.6,Aquamarine +908,osmoutenp7,ialdwichp7@nih.gov,false,85,94090.23,151.9,Green +909,mocannanp8,eleathwoodp8@comcast.net,true,24,65800.55,172.4,Maroon +910,dpourveerp9,mmathevetp9@dropbox.com,true,54,35738.96,30.7,Orange +911,jragglespa,hmargrempa@wunderground.com,true,58,15246.16,235.8,Maroon +912,gmaestropb,ghackingepb@google.cn,false,46,13202.97,48.9,Green +913,pnattepc,mcritchardpc@dell.com,false,56,77065.89,7.3,Aquamarine +914,gbattiepd,ehenighanpd@usda.gov,false,33,96375.25,185.9,Teal +915,chowickpe,atraharpe@artisteer.com,false,90,73316.39,215.2,Orange +916,cduckinfieldpf,kferrarellopf@globo.com,false,42,19893.76,40.1,Indigo +917,lbradanepg,dgapperpg@sciencedaily.com,true,82,84588.63,2.4,Mauv +918,eweondph,rbaterph@freewebs.com,false,86,32398.49,234.4,Teal +919,dpavlikpi,mbrissetpi@nba.com,false,69,37981.61,66.0,Yellow +920,difflandpj,sblabeypj@pbs.org,false,40,205.48,99.3,Blue +921,oocarrollpk,hrookespk@t.co,false,39,8027.29,179.1,Green +922,mbailpl,hdorceypl@berkeley.edu,true,90,31989.58,137.0,Green +923,lhissettpm,tloudianpm@craigslist.org,true,99,76601.73,136.3,Violet +924,csindenpn,leubankpn@canalblog.com,false,83,18256.89,66.4,Orange +925,wrowlinpo,ldoolerpo@blogger.com,false,82,28114.27,161.7,Goldenrod +926,lcathiepp,hpetrollipp@issuu.com,true,82,73603.2,223.7,Turquoise +927,cchippindalepq,gleestutpq@google.com.au,false,97,71971.21,166.9,Khaki +928,cmargrettpr,hpopescupr@slate.com,true,82,35026.36,36.1,Pink +929,mhalshawps,agilhoolps@webeden.co.uk,false,80,25313.15,107.7,Blue +930,ddavydkovpt,kstearndalept@exblog.jp,true,64,39551.81,41.7,Blue +931,kgyfordpu,cheathcottpu@dot.gov,false,48,50205.55,34.7,Goldenrod +932,ssempillpv,rdigginspv@elegantthemes.com,true,36,86762.77,161.8,Goldenrod +933,nbutcherspw,gdarlassonpw@biglobe.ne.jp,false,55,42640.28,129.8,Turquoise +934,pyarntonpx,galanbrookepx@cnbc.com,true,32,28483.99,43.3,Goldenrod +935,godbypy,lruckledgepy@hubpages.com,true,36,370.68,31.2,Violet +936,fbriggspz,olinehampz@infoseek.co.jp,true,28,28368.04,131.7,Fuscia +937,cidaleq0,rasserq0@people.com.cn,false,32,59676.47,46.1,Khaki +938,fmcclintonq1,mgethinsq1@last.fm,true,20,21805.28,188.6,Puce +939,blarwoodq2,lphilippsonq2@youtu.be,true,42,87558.38,51.7,Pink +940,omattysq3,zlapennaq3@reuters.com,true,79,75167.27,39.8,Green +941,lissacofq4,mpenkethq4@a8.net,false,13,43719.34,115.9,Purple +942,rrollingq5,dohanessianq5@soundcloud.com,false,26,75355.53,26.2,Pink +943,jsennq6,bworsnipq6@eepurl.com,false,35,50325.24,185.5,Khaki +944,mduerq7,edivisq7@bizjournals.com,false,58,78500.9,152.1,Green +945,aeckq8,ctitmusq8@usa.gov,false,84,893.61,153.7,Blue +946,ofulleq9,apietrzakq9@edublogs.org,true,53,37805.24,208.3,Fuscia +947,pruminqa,dlansdaleqa@theguardian.com,false,86,30109.71,216.0,Goldenrod +948,hmacmurrayqb,scominoliqb@bluehost.com,false,74,72500.94,71.6,Orange +949,kvermanqc,sribeiroqc@mayoclinic.com,true,69,26416.2,141.1,Red +950,wmichalqd,tcorballisqd@topsy.com,true,24,61432.79,38.5,Purple +951,kklampkqe,amacvanamyqe@uol.com.br,false,39,72249.74,26.7,Khaki +952,bhusbandsqf,ejardeinqf@google.it,false,52,23211.25,52.8,Fuscia +953,dlongcasterqg,dbugbeeqg@homestead.com,false,74,16366.46,29.2,Khaki +954,rchesneyqh,moverstoneqh@newsvine.com,false,99,-2006.09,120.9,Crimson +955,iheadqi,bpoolqi@yellowpages.com,false,33,-4317.33,14.9,Goldenrod +956,cflatmanqj,eturfordqj@kickstarter.com,false,38,40558.1,71.3,Goldenrod +957,lcolefordqk,raubryqk@hugedomains.com,false,26,78048.02,234.3,Turquoise +958,aschwerinql,hholburyql@people.com.cn,false,88,79154.48,167.6,Green +959,fheffernonqm,kzamboniniqm@ameblo.jp,false,16,83008.0,76.1,Teal +960,amartiniqn,smoxsteadqn@sciencedaily.com,false,35,48144.86,130.9,Orange +961,ojaramqo,tlorrowayqo@europa.eu,false,81,10215.38,177.4,Indigo +962,ddonanqp,amceloryqp@reuters.com,true,43,12796.7,188.6,Green +963,pburghqq,dhasteqq@sciencedaily.com,true,34,80065.42,59.1,Orange +964,hforceqr,cjedrzaszkiewiczqr@time.com,true,47,87532.27,221.7,Violet +965,gculliganqs,wpiddickqs@engadget.com,true,73,39438.9,17.3,Pink +966,eredmireqt,icoxenqt@imdb.com,false,71,98327.86,57.8,Green +967,sdoersqu,zenosqu@dmoz.org,false,17,7913.05,143.9,Red +968,maynoldqv,gverlingqv@umich.edu,false,24,87024.88,180.6,Turquoise +969,kpygottqw,gchaudretqw@stanford.edu,false,56,94903.86,174.3,Goldenrod +970,ssaunperqx,bpryceqx@plala.or.jp,true,87,37998.4,117.2,Indigo +971,aayceqy,hratieqy@usa.gov,false,65,29630.67,173.2,Turquoise +972,jbakhrushinqz,oneashamqz@weibo.com,false,64,69392.77,182.5,Blue +973,mfrancesr0,mblairr0@free.fr,true,15,39618.91,140.3,Orange +974,jkaygillr1,cbellier1@microsoft.com,false,96,95563.93,81.0,Orange +975,bparamorer2,gtrengrover2@addthis.com,true,55,56345.69,154.0,Khaki +976,bpodmorer3,akhilkovr3@apple.com,false,28,14922.0,165.1,Blue +977,emackinr4,lvasilchenkor4@deviantart.com,false,29,49697.14,66.5,Indigo +978,snielsonr5,tologanr5@webnode.com,false,24,30891.03,71.5,Orange +979,ckryskar6,bstannersr6@hc360.com,true,43,86494.57,131.5,Puce +980,rbestwallr7,hlindgrenr7@gnu.org,false,60,53289.56,207.7,Khaki +981,kporter8,mhumbertr8@sphinn.com,true,72,40844.95,120.1,Pink +982,syeskovr9,amcvanamyr9@newyorker.com,false,70,50277.84,183.1,Indigo +983,atilzeyra,bcaselickra@oracle.com,true,90,67053.3,8.3,Green +984,nstredwickrb,pgiacobiliorb@biglobe.ne.jp,false,72,12131.57,234.1,Teal +985,npursegloverc,efouldesrc@wp.com,true,57,4805.7,81.3,Mauv +986,lsowersbyrd,opotticaryrd@sphinn.com,true,57,60797.72,151.9,Turquoise +987,bderbyre,hvanleeuwenre@oakley.com,false,78,46044.34,229.7,Crimson +988,rogusrf,gdunkleerf@reference.com,true,95,23477.77,132.0,Orange +989,bdomicorg,aeagleshamrg@prweb.com,true,70,81559.91,117.3,Turquoise +990,cspittalrh,lruddlesdenrh@tiny.cc,true,66,78185.35,199.1,Khaki +991,tlamasri,tsachnori@ucoz.ru,false,99,14428.88,229.0,Goldenrod +992,cfranzewitchrj,lblackdenrj@admin.ch,false,49,45862.1,145.6,Orange +993,jraitrk,rdigginsonrk@freewebs.com,false,94,95204.78,64.4,Mauv +994,jpouldenrl,ldowzellrl@ox.ac.uk,true,79,73495.97,155.7,Orange +995,dmaffeorm,lhimsworthrm@ucla.edu,true,98,99913.35,141.9,Khaki +996,sbroadburyrn,jbrenardrn@reddit.com,true,25,50938.22,218.9,Violet +997,dbirkinshawro,lferraoro@wufoo.com,false,80,87717.29,230.0,Indigo +998,sallmondrp,bdurrantrp@miitbeian.gov.cn,true,32,2571.84,113.7,Maroon +999,gbrollyrq,lpoulneyrq@wikipedia.org,true,58,5790.69,226.9,Orange +1000,ljevonsrr,jjoinsonrr@hatena.ne.jp,true,19,7591.89,217.6,Puce diff --git a/src/Backend/test_data/csv/intergration_test_data_2.csv b/src/Backend/test_data/csv/intergration_test_data_2.csv new file mode 100644 index 0000000..c332c72 --- /dev/null +++ b/src/Backend/test_data/csv/intergration_test_data_2.csv @@ -0,0 +1,1001 @@ +id,department_name,manager_name,manager_email +1,Marketing,Gardy Venton,gventon0@biglobe.ne.jp +2,Product Management,Gypsy Robb,grobb1@google.pl +3,Product Management,Hastings Girardot,hgirardot2@zdnet.com +4,Legal,Dyna Rallin,drallin3@hubpages.com +5,Training,Karolina Hayhurst,khayhurst4@360.cn +6,Engineering,Anatol Goutcher,agoutcher5@nsw.gov.au +7,Support,Ashleigh Callam,acallam6@imdb.com +8,Accounting,Yorker Dowle,ydowle7@elpais.com +9,Accounting,Vale Lilbourne,vlilbourne8@jalbum.net +10,Product Management,Reeba Haggerstone,rhaggerstone9@tripod.com +11,Training,Cari Lyle,clylea@comcast.net +12,Services,Alina Soro,asorob@census.gov +13,Accounting,Henrik Count,hcountc@tumblr.com +14,Business Development,Ralph Labbez,rlabbezd@unblog.fr +15,Sales,Pincas Newvill,pnewville@infoseek.co.jp +16,Support,Axel McFater,amcfaterf@seattletimes.com +17,Support,Lorita Wylder,lwylderg@ibm.com +18,Support,Celine Conelly,cconellyh@google.co.uk +19,Training,Bertine Leatherborrow,bleatherborrowi@printfriendly.com +20,Sales,Billie Stalman,bstalmanj@google.es +21,Marketing,Darcee Orwin,dorwink@yahoo.com +22,Support,Normie Adie,nadiel@tripod.com +23,Research and Development,Antonietta Steinhammer,asteinhammerm@foxnews.com +24,Research and Development,Delainey Kennsley,dkennsleyn@ehow.com +25,Accounting,Kingsly Civitillo,kcivitilloo@tuttocitta.it +26,Legal,Denise Blucher,dblucherp@cam.ac.uk +27,Human Resources,Darrell Andrieu,dandrieuq@jalbum.net +28,Engineering,Thurstan Bendle,tbendler@bluehost.com +29,Legal,Eleni Collcott,ecollcotts@parallels.com +30,Services,Rhona Ousby,rousbyt@dmoz.org +31,Engineering,Raphaela Peirpoint,rpeirpointu@jalbum.net +32,Business Development,Jillayne Tunnock,jtunnockv@symantec.com +33,Marketing,Remington Oldaker,roldakerw@wisc.edu +34,Support,Sherm Knapp,sknappx@forbes.com +35,Business Development,Roman Nares,rnaresy@amazon.com +36,Human Resources,Merlina Medendorp,mmedendorpz@shop-pro.jp +37,Sales,Hermione Jopke,hjopke10@ucsd.edu +38,Human Resources,Agna Scroggie,ascroggie11@hostgator.com +39,Legal,Rosamund Follett,rfollett12@guardian.co.uk +40,Human Resources,Jamie Beteriss,jbeteriss13@bigcartel.com +41,Training,Bernardine Beardshall,bbeardshall14@discovery.com +42,Marketing,Avrom Toone,atoone15@accuweather.com +43,Human Resources,Lucias O'Hagan,lohagan16@princeton.edu +44,Research and Development,Everett Shambrook,eshambrook17@usatoday.com +45,Research and Development,Randolf Arpur,rarpur18@cdc.gov +46,Services,Ferguson Gorrick,fgorrick19@sphinn.com +47,Services,Natalina Forber,nforber1a@google.com +48,Human Resources,Emmalynn Bim,ebim1b@chicagotribune.com +49,Accounting,Minor Spellacey,mspellacey1c@youtube.com +50,Research and Development,Tony Gilbane,tgilbane1d@youtube.com +51,Marketing,Dorotea Bryning,dbryning1e@booking.com +52,Support,Sansone D'Elias,sdelias1f@indiegogo.com +53,Marketing,Terrence Cromblehome,tcromblehome1g@bizjournals.com +54,Training,Lemmy Budgen,lbudgen1h@miibeian.gov.cn +55,Product Management,Sidney Vanichkin,svanichkin1i@gnu.org +56,Services,Bette Antunes,bantunes1j@reuters.com +57,Legal,Benjy Leafe,bleafe1k@ucsd.edu +58,Engineering,Ainslie Drewry,adrewry1l@163.com +59,Marketing,Earl Kaesmakers,ekaesmakers1m@oakley.com +60,Research and Development,Sela Dearden,sdearden1n@delicious.com +61,Support,Kati Macknish,kmacknish1o@discovery.com +62,Marketing,Edan Jockle,ejockle1p@yale.edu +63,Business Development,Xaviera Strettle,xstrettle1q@independent.co.uk +64,Services,Maxy Hoofe,mhoofe1r@wikimedia.org +65,Training,Stephine Fransemai,sfransemai1s@slate.com +66,Legal,Tessy Rowell,trowell1t@reddit.com +67,Legal,Rik Kilbourne,rkilbourne1u@altervista.org +68,Sales,Ariana Eich,aeich1v@mozilla.org +69,Legal,Melvyn Semper,msemper1w@quantcast.com +70,Services,Wallis Caldwell,wcaldwell1x@senate.gov +71,Legal,Alejandrina Siaskowski,asiaskowski1y@studiopress.com +72,Research and Development,Caryl Balffye,cbalffye1z@ft.com +73,Research and Development,Trudie Bamlet,tbamlet20@netlog.com +74,Support,Alard Dykes,adykes21@businessweek.com +75,Support,Perkin Mackieson,pmackieson22@abc.net.au +76,Sales,Mikaela Padillo,mpadillo23@jimdo.com +77,Legal,Irma Maylour,imaylour24@ucoz.com +78,Product Management,Arnoldo Cockburn,acockburn25@imageshack.us +79,Research and Development,Arv Tanfield,atanfield26@slate.com +80,Human Resources,Leanor McLevie,lmclevie27@edublogs.org +81,Services,Imojean Silbermann,isilbermann28@parallels.com +82,Accounting,Mavis Wiskar,mwiskar29@acquirethisname.com +83,Business Development,Pearline Sillett,psillett2a@friendfeed.com +84,Services,Sophronia Shawcroft,sshawcroft2b@adobe.com +85,Support,Zacharias Divver,zdivver2c@upenn.edu +86,Services,Hazel Casterton,hcasterton2d@simplemachines.org +87,Sales,Romain Wells,rwells2e@gravatar.com +88,Support,Emmalynne Tapscott,etapscott2f@weather.com +89,Product Management,Kyle Gwynn,kgwynn2g@ox.ac.uk +90,Engineering,Spence Isherwood,sisherwood2h@dedecms.com +91,Support,Jessie Beecraft,jbeecraft2i@vkontakte.ru +92,Business Development,Gallagher Perkis,gperkis2j@merriam-webster.com +93,Research and Development,Ferguson Farquarson,ffarquarson2k@clickbank.net +94,Training,Tallie Nares,tnares2l@ezinearticles.com +95,Accounting,Kent Gaskill,kgaskill2m@nbcnews.com +96,Human Resources,Nickola Plomer,nplomer2n@wikispaces.com +97,Legal,Keefer Brownfield,kbrownfield2o@nytimes.com +98,Legal,Genna Cardenas,gcardenas2p@indiatimes.com +99,Marketing,Dori Hagwood,dhagwood2q@google.fr +100,Legal,Bertie Wilber,bwilber2r@disqus.com +101,Services,Brandon MacGillespie,bmacgillespie2s@blogspot.com +102,Sales,Yvette Hankins,yhankins2t@themeforest.net +103,Legal,Flossie Yukhnevich,fyukhnevich2u@reference.com +104,Services,Kendall Gocke,kgocke2v@psu.edu +105,Research and Development,Amos Davydzenko,adavydzenko2w@upenn.edu +106,Engineering,Shelden Frampton,sframpton2x@irs.gov +107,Business Development,Griswold Rain,grain2y@wunderground.com +108,Sales,Dorian Rogeon,drogeon2z@about.me +109,Support,Vilhelmina Whitlaw,vwhitlaw30@answers.com +110,Accounting,Phillie Janusz,pjanusz31@istockphoto.com +111,Training,Sharon Horbart,shorbart32@archive.org +112,Support,Abran Eidler,aeidler33@irs.gov +113,Marketing,Zedekiah Densham,zdensham34@nps.gov +114,Research and Development,Matelda Buzek,mbuzek35@etsy.com +115,Marketing,Carita Wyllcock,cwyllcock36@gmpg.org +116,Marketing,Phaedra Yellowlees,pyellowlees37@cnet.com +117,Training,Rik Sapena,rsapena38@opensource.org +118,Human Resources,Duke Ranscomb,dranscomb39@github.com +119,Support,Ambrosio Hailston,ahailston3a@aboutads.info +120,Support,Kania Pumfrey,kpumfrey3b@nasa.gov +121,Human Resources,Godfrey Clavey,gclavey3c@wisc.edu +122,Human Resources,Leila Barkhouse,lbarkhouse3d@jimdo.com +123,Training,Camel Usborn,cusborn3e@foxnews.com +124,Training,Marvin Everett,meverett3f@networksolutions.com +125,Product Management,Nappie Quainton,nquainton3g@networksolutions.com +126,Legal,Berne Cleminshaw,bcleminshaw3h@google.com.au +127,Accounting,Fionnula Hoodless,fhoodless3i@umich.edu +128,Training,Amabelle Ede,aede3j@wikipedia.org +129,Accounting,Araldo MacKomb,amackomb3k@squarespace.com +130,Accounting,Jaime Rosenqvist,jrosenqvist3l@moonfruit.com +131,Accounting,Drusie Phillp,dphillp3m@google.pl +132,Sales,Worden Branscomb,wbranscomb3n@meetup.com +133,Training,Marshal Keward,mkeward3o@spiegel.de +134,Accounting,Jammie Palluschek,jpalluschek3p@chronoengine.com +135,Engineering,Homere Plumbe,hplumbe3q@msu.edu +136,Research and Development,Gearard Linstead,glinstead3r@dagondesign.com +137,Research and Development,Sybilla Schrader,sschrader3s@seesaa.net +138,Human Resources,Car Sandiford,csandiford3t@dot.gov +139,Accounting,Marget Portwaine,mportwaine3u@ucoz.com +140,Research and Development,Gardie Bonefant,gbonefant3v@accuweather.com +141,Legal,Pooh Gilburt,pgilburt3w@google.es +142,Sales,Free Lohering,flohering3x@slate.com +143,Product Management,Cherin Demare,cdemare3y@webmd.com +144,Engineering,Donnamarie Ivanchenkov,divanchenkov3z@nationalgeographic.com +145,Support,Wyatt Terzi,wterzi40@toplist.cz +146,Human Resources,Flemming Mallord,fmallord41@de.vu +147,Research and Development,Janette Pendle,jpendle42@yellowbook.com +148,Services,Kacey Robuchon,krobuchon43@addthis.com +149,Business Development,Lorri Michallat,lmichallat44@npr.org +150,Human Resources,Grete Leggat,gleggat45@cdc.gov +151,Training,Iolanthe Francisco,ifrancisco46@ovh.net +152,Marketing,Charmion Takkos,ctakkos47@infoseek.co.jp +153,Human Resources,Marybelle Plampin,mplampin48@princeton.edu +154,Services,Gwenora Cawthra,gcawthra49@apache.org +155,Support,Barny Woodlands,bwoodlands4a@freewebs.com +156,Accounting,Britt Brayshaw,bbrayshaw4b@cornell.edu +157,Marketing,Alexander Maymand,amaymand4c@xrea.com +158,Engineering,Jenn Stirley,jstirley4d@blogs.com +159,Research and Development,Elysia Halso,ehalso4e@umich.edu +160,Accounting,Felicdad Shyres,fshyres4f@bloomberg.com +161,Services,Freda Shetliff,fshetliff4g@prlog.org +162,Human Resources,Rafi Ewings,rewings4h@example.com +163,Engineering,Martin Valenti,mvalenti4i@stumbleupon.com +164,Marketing,Valera Capel,vcapel4j@ocn.ne.jp +165,Research and Development,Padraic Morson,pmorson4k@disqus.com +166,Business Development,Tito O' Mahony,to4l@statcounter.com +167,Sales,Yancy Baudinot,ybaudinot4m@harvard.edu +168,Marketing,Ethel MacCaull,emaccaull4n@list-manage.com +169,Sales,Garrot Hulance,ghulance4o@sun.com +170,Services,Wilona Huddlestone,whuddlestone4p@google.nl +171,Services,Daryl Hubbins,dhubbins4q@arizona.edu +172,Business Development,Audi Wyles,awyles4r@unblog.fr +173,Accounting,Genevieve Gauvin,ggauvin4s@usgs.gov +174,Support,Jocelyne Tremblet,jtremblet4t@wp.com +175,Research and Development,Clarinda Orgee,corgee4u@sakura.ne.jp +176,Human Resources,Kip Paulon,kpaulon4v@hud.gov +177,Legal,Shaw Langeren,slangeren4w@java.com +178,Services,Rolando Grimley,rgrimley4x@vkontakte.ru +179,Sales,Adora Crossfield,acrossfield4y@google.es +180,Training,Tadeo Tierney,ttierney4z@bizjournals.com +181,Training,Sydney Huskisson,shuskisson50@oracle.com +182,Sales,Inge Fellenor,ifellenor51@craigslist.org +183,Human Resources,Vergil Aspinwall,vaspinwall52@economist.com +184,Services,Jamison MacSweeney,jmacsweeney53@chron.com +185,Engineering,Bessie Othen,bothen54@illinois.edu +186,Research and Development,Beau Furzer,bfurzer55@smugmug.com +187,Product Management,Chip Brake,cbrake56@xrea.com +188,Support,Heinrik Van Castele,hvan57@mit.edu +189,Business Development,Tess Vogelein,tvogelein58@eepurl.com +190,Research and Development,Jillayne Anscott,janscott59@phoca.cz +191,Research and Development,Karlen Ruggiero,kruggiero5a@surveymonkey.com +192,Legal,Patrice Readett,preadett5b@youtu.be +193,Legal,Arthur Rue,arue5c@furl.net +194,Training,Bram Cotelard,bcotelard5d@sogou.com +195,Sales,Lonna Steinhammer,lsteinhammer5e@bigcartel.com +196,Human Resources,Noella Caret,ncaret5f@mtv.com +197,Research and Development,Trenton Fearnside,tfearnside5g@reverbnation.com +198,Human Resources,Liuka Lawless,llawless5h@seesaa.net +199,Marketing,Isac Goodred,igoodred5i@columbia.edu +200,Sales,Eileen Minister,eminister5j@archive.org +201,Accounting,Paige Malimoe,pmalimoe5k@army.mil +202,Business Development,Suzann Packman,spackman5l@tinyurl.com +203,Support,Miof mela Burbudge,mmela5m@ed.gov +204,Legal,Marina Incogna,mincogna5n@illinois.edu +205,Human Resources,Silvana Gwyllt,sgwyllt5o@163.com +206,Accounting,Ara Kennelly,akennelly5p@xinhuanet.com +207,Human Resources,Lyn Nolda,lnolda5q@prweb.com +208,Training,Davon Larroway,dlarroway5r@constantcontact.com +209,Accounting,Yasmeen Fabri,yfabri5s@shutterfly.com +210,Marketing,Elfrida Romushkin,eromushkin5t@canalblog.com +211,Research and Development,Boycey Oattes,boattes5u@nasa.gov +212,Accounting,Loise O'Mannion,lomannion5v@businessweek.com +213,Business Development,Angel Drewitt,adrewitt5w@cdbaby.com +214,Support,Daphene Redan,dredan5x@51.la +215,Engineering,Janice Lartice,jlartice5y@cbc.ca +216,Legal,Nicola Gwillym,ngwillym5z@biglobe.ne.jp +217,Support,Darb Mioni,dmioni60@cbslocal.com +218,Engineering,Pacorro Kenninghan,pkenninghan61@miitbeian.gov.cn +219,Accounting,Vasili Verrico,vverrico62@chronoengine.com +220,Sales,Coralie Gaiger,cgaiger63@behance.net +221,Human Resources,Cecilio Crooks,ccrooks64@csmonitor.com +222,Human Resources,Elenore Dudny,edudny65@blog.com +223,Business Development,Meggy Finley,mfinley66@craigslist.org +224,Accounting,Dasie Benedetti,dbenedetti67@dion.ne.jp +225,Support,Livvyy Rudolph,lrudolph68@smh.com.au +226,Human Resources,Maighdiln Etheridge,metheridge69@latimes.com +227,Sales,Deny Matieu,dmatieu6a@upenn.edu +228,Product Management,Cathy Philbin,cphilbin6b@networksolutions.com +229,Engineering,Grethel Sends,gsends6c@adobe.com +230,Product Management,Ashli Koenraad,akoenraad6d@nps.gov +231,Business Development,Leona Ind,lind6e@reverbnation.com +232,Engineering,Hadley Gullam,hgullam6f@guardian.co.uk +233,Sales,Kata Broadey,kbroadey6g@histats.com +234,Sales,Christoffer Wethers,cwethers6h@bloglines.com +235,Human Resources,Lucilia Whacket,lwhacket6i@weebly.com +236,Research and Development,Pamelina Hassekl,phassekl6j@plala.or.jp +237,Services,Jake O'Dunniom,jodunniom6k@cisco.com +238,Accounting,Madonna Klimowicz,mklimowicz6l@jigsy.com +239,Sales,Valera Cuesta,vcuesta6m@51.la +240,Business Development,Mord Charteris,mcharteris6n@smugmug.com +241,Training,Babara Marsy,bmarsy6o@ezinearticles.com +242,Training,Jacobo Gherardesci,jgherardesci6p@so-net.ne.jp +243,Human Resources,Sanford Rate,srate6q@newsvine.com +244,Legal,Kristofer Tedman,ktedman6r@hatena.ne.jp +245,Legal,Alli Gerrett,agerrett6s@behance.net +246,Human Resources,Bern Falshaw,bfalshaw6t@dropbox.com +247,Human Resources,Loise Backwell,lbackwell6u@fotki.com +248,Accounting,Lib Snowling,lsnowling6v@com.com +249,Engineering,Cherilynn Martynka,cmartynka6w@skyrock.com +250,Sales,Reyna Buckeridge,rbuckeridge6x@prweb.com +251,Business Development,Nonnah Duham,nduham6y@themeforest.net +252,Accounting,Leola Neller,lneller6z@nymag.com +253,Services,Cyrille Freeman,cfreeman70@opensource.org +254,Research and Development,Eberto Folliott,efolliott71@baidu.com +255,Accounting,Henriette Dmitriev,hdmitriev72@harvard.edu +256,Training,Lane Cardew,lcardew73@msu.edu +257,Engineering,Morganica Dixon,mdixon74@statcounter.com +258,Legal,Raffarty Kerne,rkerne75@google.com.hk +259,Marketing,Brear Larderot,blarderot76@blinklist.com +260,Training,Sophia Cristofor,scristofor77@1und1.de +261,Sales,Eustace Pollins,epollins78@java.com +262,Product Management,Ethelin Trowsdale,etrowsdale79@newsvine.com +263,Training,Hermann Solloway,hsolloway7a@phoca.cz +264,Support,Robby Lygoe,rlygoe7b@blogger.com +265,Marketing,Megen Cathersides,mcathersides7c@edublogs.org +266,Sales,Ceil Caudelier,ccaudelier7d@bloglines.com +267,Product Management,Bone Orrobin,borrobin7e@cbc.ca +268,Marketing,Travus Nottingham,tnottingham7f@bluehost.com +269,Training,Helge Titterell,htitterell7g@businessweek.com +270,Accounting,Ann-marie Surcombe,asurcombe7h@wiley.com +271,Business Development,Jerald Pestor,jpestor7i@cdc.gov +272,Human Resources,Blisse Briskey,bbriskey7j@t-online.de +273,Research and Development,Blondell Garaghan,bgaraghan7k@zimbio.com +274,Training,Jacquie Escalera,jescalera7l@people.com.cn +275,Support,Darcy Dumbrall,ddumbrall7m@cmu.edu +276,Business Development,Emmit Easun,eeasun7n@ihg.com +277,Engineering,Tedd Petticrew,tpetticrew7o@privacy.gov.au +278,Support,Dede Conradsen,dconradsen7p@wufoo.com +279,Engineering,Mace Clows,mclows7q@infoseek.co.jp +280,Product Management,Constantia McElroy,cmcelroy7r@360.cn +281,Accounting,Sansone Gudahy,sgudahy7s@boston.com +282,Services,Tersina Stolli,tstolli7t@pen.io +283,Legal,Emylee Sheara,esheara7u@hubpages.com +284,Sales,Perri Kaesmans,pkaesmans7v@unesco.org +285,Engineering,Ignazio Griggs,igriggs7w@addthis.com +286,Accounting,Maddie Oliphand,moliphand7x@blogs.com +287,Research and Development,Elsbeth Vann,evann7y@who.int +288,Legal,Latashia Walewski,lwalewski7z@ebay.com +289,Training,Wilhelmine Matyushenko,wmatyushenko80@independent.co.uk +290,Research and Development,Vinni Alves,valves81@php.net +291,Human Resources,Dom Subhan,dsubhan82@cnn.com +292,Training,Norby Epton,nepton83@salon.com +293,Business Development,Allan Oguz,aoguz84@yale.edu +294,Accounting,Marjorie Pedri,mpedri85@sakura.ne.jp +295,Training,Miran Lieb,mlieb86@yahoo.co.jp +296,Business Development,Whit Maffulli,wmaffulli87@mail.ru +297,Engineering,Barbi Simmill,bsimmill88@spotify.com +298,Services,Gothart Gason,ggason89@yellowbook.com +299,Human Resources,Anny McGuffog,amcguffog8a@berkeley.edu +300,Human Resources,Charmain St Leger,cst8b@hp.com +301,Business Development,Julienne Biddle,jbiddle8c@usa.gov +302,Accounting,Son Stoggles,sstoggles8d@skype.com +303,Product Management,Valerye Ardron,vardron8e@addthis.com +304,Marketing,Mariam Pearman,mpearman8f@hubpages.com +305,Engineering,Sanders Thaxter,sthaxter8g@over-blog.com +306,Business Development,Harriet Willingham,hwillingham8h@illinois.edu +307,Marketing,Sauveur Govett,sgovett8i@soundcloud.com +308,Marketing,Berna Veness,bveness8j@miibeian.gov.cn +309,Business Development,Rory Atkins,ratkins8k@bandcamp.com +310,Human Resources,Luelle Crompton,lcrompton8l@house.gov +311,Marketing,Jamey Topham,jtopham8m@biblegateway.com +312,Training,Roselle Orum,rorum8n@eepurl.com +313,Accounting,Pancho Tumini,ptumini8o@oakley.com +314,Product Management,Halimeda Hurl,hhurl8p@google.nl +315,Product Management,Vinnie Zannotelli,vzannotelli8q@instagram.com +316,Legal,Junie Rutigliano,jrutigliano8r@furl.net +317,Human Resources,Randall Newitt,rnewitt8s@weather.com +318,Product Management,Borg Fearne,bfearne8t@jiathis.com +319,Research and Development,Brier A'Barrow,babarrow8u@weibo.com +320,Legal,Van Goter,vgoter8v@illinois.edu +321,Research and Development,Ruy Allred,rallred8w@oakley.com +322,Sales,Selinda Maber,smaber8x@va.gov +323,Sales,Lacey Carter,lcarter8y@ameblo.jp +324,Research and Development,Albertine Potteridge,apotteridge8z@ameblo.jp +325,Marketing,Bary Privett,bprivett90@flavors.me +326,Services,Abbey De Gregoli,ade91@ow.ly +327,Product Management,Sonni Ottewell,sottewell92@cyberchimps.com +328,Legal,Jessee Cazereau,jcazereau93@woothemes.com +329,Training,Betteanne Border,bborder94@barnesandnoble.com +330,Training,Mickie Cockson,mcockson95@columbia.edu +331,Engineering,Julian Sedgwick,jsedgwick96@yelp.com +332,Human Resources,Tye Palatini,tpalatini97@rambler.ru +333,Support,Alysia Fishpoole,afishpoole98@weather.com +334,Marketing,Reese Antonomoli,rantonomoli99@pagesperso-orange.fr +335,Legal,Hynda Pfaffe,hpfaffe9a@cloudflare.com +336,Services,Claiborne Klesse,cklesse9b@craigslist.org +337,Sales,Lisetta Livzey,llivzey9c@istockphoto.com +338,Support,Karalynn Cumbridge,kcumbridge9d@mediafire.com +339,Services,Carmita Hoofe,choofe9e@whitehouse.gov +340,Accounting,Carmen Peggrem,cpeggrem9f@bloomberg.com +341,Legal,Roselle Livingston,rlivingston9g@shinystat.com +342,Support,Bevin Stampfer,bstampfer9h@bigcartel.com +343,Human Resources,Basilio Sline,bsline9i@symantec.com +344,Engineering,Arlana Sabatier,asabatier9j@xinhuanet.com +345,Accounting,Letisha Blanchet,lblanchet9k@engadget.com +346,Research and Development,Lyon Schumacher,lschumacher9l@dailymotion.com +347,Human Resources,Henrieta O'Brogane,hobrogane9m@techcrunch.com +348,Marketing,Garreth Breslin,gbreslin9n@webs.com +349,Research and Development,Rem Chiverstone,rchiverstone9o@upenn.edu +350,Marketing,Chadwick Melly,cmelly9p@discuz.net +351,Sales,Salvidor Middis,smiddis9q@cam.ac.uk +352,Business Development,Morie Churchley,mchurchley9r@pinterest.com +353,Product Management,Oberon Scantlebury,oscantlebury9s@youtu.be +354,Product Management,Joela Turfrey,jturfrey9t@businesswire.com +355,Marketing,Claudia Ganning,cganning9u@lycos.com +356,Product Management,Franchot Breston,fbreston9v@opera.com +357,Training,Glad Ort,gort9w@buzzfeed.com +358,Accounting,Monah Petto,mpetto9x@vkontakte.ru +359,Engineering,Iormina Dyble,idyble9y@pen.io +360,Business Development,Darnall Scryne,dscryne9z@senate.gov +361,Engineering,Delila Gosney,dgosneya0@fotki.com +362,Accounting,Clair Batchelar,cbatchelara1@google.pl +363,Support,Thain Caffrey,tcaffreya2@usda.gov +364,Services,Ali Gulvin,agulvina3@miibeian.gov.cn +365,Marketing,Xylina Loyd,xloyda4@infoseek.co.jp +366,Human Resources,Lucinda Duetsche,lduetschea5@time.com +367,Training,Pietrek Keeler,pkeelera6@ycombinator.com +368,Engineering,Natasha Vassay,nvassaya7@foxnews.com +369,Legal,Daren Lanfear,dlanfeara8@weebly.com +370,Services,Blanca Abate,babatea9@g.co +371,Training,Jacklin Cahill,jcahillaa@amazon.co.uk +372,Business Development,Noelle McFayden,nmcfaydenab@indiatimes.com +373,Engineering,Brnaba Hryncewicz,bhryncewiczac@huffingtonpost.com +374,Services,Juliette Clawson,jclawsonad@ted.com +375,Business Development,Bil Kineton,bkinetonae@e-recht24.de +376,Research and Development,Natty Clear,nclearaf@cpanel.net +377,Marketing,Karola Sandercock,ksandercockag@spotify.com +378,Sales,Ashley Manie,amanieah@apache.org +379,Engineering,Ayn McCowen,amccowenai@scribd.com +380,Human Resources,Calvin Muggeridge,cmuggeridgeaj@webmd.com +381,Support,Ruthie Exton,rextonak@nymag.com +382,Marketing,Even Christofor,echristoforal@eventbrite.com +383,Training,Nelle Jull,njullam@washington.edu +384,Marketing,Inessa Viggars,iviggarsan@ucla.edu +385,Legal,Jehanna Whale,jwhaleao@xing.com +386,Services,Annaliese Dye,adyeap@ucsd.edu +387,Business Development,Regen Ussher,russheraq@huffingtonpost.com +388,Accounting,Kattie Yanin,kyaninar@shinystat.com +389,Product Management,Mead Bagger,mbaggeras@mysql.com +390,Marketing,Pinchas MacAlpyne,pmacalpyneat@wordpress.org +391,Business Development,Aila Glavis,aglavisau@storify.com +392,Support,Hadley Pinchin,hpinchinav@arstechnica.com +393,Accounting,Zenia Fligg,zfliggaw@netlog.com +394,Research and Development,Lin Gilardoni,lgilardoniax@ft.com +395,Accounting,Klarika McIlenna,kmcilennaay@usatoday.com +396,Training,Hakim Kimmel,hkimmelaz@nba.com +397,Sales,Ellswerth Tant,etantb0@smugmug.com +398,Engineering,Joy Bugg,jbuggb1@opensource.org +399,Training,Shepperd Boarder,sboarderb2@quantcast.com +400,Research and Development,Aloisia Demcak,ademcakb3@cbc.ca +401,Services,Skye Waldrum,swaldrumb4@slideshare.net +402,Business Development,Antony Blenkhorn,ablenkhornb5@smugmug.com +403,Marketing,Muhammad Di Nisco,mdib6@smugmug.com +404,Support,Binni Forsdike,bforsdikeb7@home.pl +405,Business Development,Torey Cattanach,tcattanachb8@yahoo.co.jp +406,Human Resources,Jessy Sallis,jsallisb9@nba.com +407,Business Development,Rosabella Desvignes,rdesvignesba@rediff.com +408,Research and Development,Sharleen Wadham,swadhambb@skype.com +409,Support,Amie Paudin,apaudinbc@examiner.com +410,Services,Trenna Clout,tcloutbd@tiny.cc +411,Accounting,Bernita Cumbridge,bcumbridgebe@desdev.cn +412,Accounting,Tripp Cruikshanks,tcruikshanksbf@thetimes.co.uk +413,Training,Jackie Eneas,jeneasbg@studiopress.com +414,Legal,Cece Selwyn,cselwynbh@microsoft.com +415,Training,Ernaline Castell,ecastellbi@hhs.gov +416,Services,Edward De Atta,edebj@google.com.hk +417,Sales,Lenette Akett,lakettbk@usgs.gov +418,Research and Development,Margarethe Behneke,mbehnekebl@furl.net +419,Accounting,Leland Bineham,lbinehambm@biblegateway.com +420,Human Resources,Mehetabel Lutsch,mlutschbn@answers.com +421,Accounting,Candra Bendley,cbendleybo@simplemachines.org +422,Human Resources,Shoshana Herrero,sherrerobp@amazon.co.uk +423,Accounting,Theodore Renoden,trenodenbq@oakley.com +424,Accounting,Chandal Antonetti,cantonettibr@kickstarter.com +425,Services,Elbertina Autie,eautiebs@joomla.org +426,Training,Rakel Bedome,rbedomebt@wikipedia.org +427,Product Management,Leanna Dymick,ldymickbu@creativecommons.org +428,Business Development,Ernst Clyne,eclynebv@ustream.tv +429,Sales,Chryste Colls,ccollsbw@themeforest.net +430,Support,Kippie Alessandretti,kalessandrettibx@123-reg.co.uk +431,Product Management,Waldemar McKearnen,wmckearnenby@berkeley.edu +432,Product Management,Barnie Balshaw,bbalshawbz@angelfire.com +433,Sales,Tiphanie Broinlich,tbroinlichc0@state.tx.us +434,Accounting,Alie Coggins,acogginsc1@wsj.com +435,Accounting,Beatrix McCool,bmccoolc2@webeden.co.uk +436,Engineering,Janela Martinello,jmartinelloc3@wikimedia.org +437,Business Development,Jessamine Josuweit,jjosuweitc4@yandex.ru +438,Human Resources,Renato Scotchmoor,rscotchmoorc5@ucsd.edu +439,Legal,Eilis Wartonby,ewartonbyc6@tmall.com +440,Services,Eldridge Deschlein,edeschleinc7@accuweather.com +441,Engineering,Flory Haugeh,fhaugehc8@storify.com +442,Marketing,Sindee Sebley,ssebleyc9@ucla.edu +443,Marketing,Torin Le Fevre,tleca@nbcnews.com +444,Product Management,Rea McAlinion,rmcalinioncb@reference.com +445,Human Resources,Fina Going,fgoingcc@so-net.ne.jp +446,Research and Development,Marilyn MacRonald,mmacronaldcd@hhs.gov +447,Support,Ly Cyson,lcysonce@dion.ne.jp +448,Product Management,Eloise Durie,eduriecf@nba.com +449,Engineering,Stillman Edmons,sedmonscg@sitemeter.com +450,Training,Tuck Clayden,tclaydench@census.gov +451,Support,Ernesta Hasley,ehasleyci@ebay.com +452,Services,Melina Limrick,mlimrickcj@ask.com +453,Accounting,Marsha Bridewell,mbridewellck@simplemachines.org +454,Business Development,Cahra Megarry,cmegarrycl@about.com +455,Support,Norbert Stobbs,nstobbscm@imageshack.us +456,Marketing,Mira Sorrell,msorrellcn@qq.com +457,Services,Danila Congram,dcongramco@harvard.edu +458,Sales,Katleen Muffin,kmuffincp@rambler.ru +459,Marketing,Prentice Iddiens,piddienscq@google.nl +460,Sales,Brita Talboy,btalboycr@free.fr +461,Business Development,Lynne Eldershaw,leldershawcs@bbc.co.uk +462,Accounting,Madonna Mettricke,mmettrickect@canalblog.com +463,Research and Development,Paulette Boutellier,pboutelliercu@shutterfly.com +464,Training,Rosco Murrells,rmurrellscv@redcross.org +465,Product Management,Antonius Kubicki,akubickicw@opensource.org +466,Marketing,Ody Ansill,oansillcx@google.com.hk +467,Human Resources,Moselle Outibridge,moutibridgecy@unesco.org +468,Business Development,Oralie Hicks,ohickscz@seattletimes.com +469,Research and Development,Kathryn McCreadie,kmccreadied0@pcworld.com +470,Support,Elayne Dearlove,edearloved1@wordpress.com +471,Business Development,Ianthe Alfuso,ialfusod2@si.edu +472,Training,Raynor Chavey,rchaveyd3@networksolutions.com +473,Research and Development,Michel Linturn,mlinturnd4@paypal.com +474,Support,Prent Skirling,pskirlingd5@nationalgeographic.com +475,Support,Jeffrey Brecknock,jbrecknockd6@ihg.com +476,Legal,Ruy Beharrell,rbeharrelld7@nifty.com +477,Services,Pail Gainor,pgainord8@deviantart.com +478,Business Development,Clerc Drife,cdrifed9@fc2.com +479,Product Management,Granger Sollas,gsollasda@telegraph.co.uk +480,Training,Karita Wrixon,kwrixondb@paypal.com +481,Services,Arlene Freeburn,afreeburndc@ameblo.jp +482,Product Management,Lorettalorna Feldbau,lfeldbaudd@1und1.de +483,Human Resources,Michaeline Tipler,mtiplerde@marriott.com +484,Accounting,Ferrell Edscer,fedscerdf@netlog.com +485,Product Management,Lek Rocks,lrocksdg@flickr.com +486,Human Resources,Clemmy Hartzog,chartzogdh@elpais.com +487,Legal,Derk Lehrer,dlehrerdi@hugedomains.com +488,Product Management,Orsa Polo,opolodj@tinypic.com +489,Human Resources,Alessandra Joutapaitis,ajoutapaitisdk@wufoo.com +490,Engineering,Aubry Frere,afreredl@wikipedia.org +491,Marketing,Keith Gethings,kgethingsdm@webnode.com +492,Business Development,Amory Cawdron,acawdrondn@bluehost.com +493,Human Resources,Adlai Gigg,agiggdo@google.com +494,Business Development,Cindee Caddick,ccaddickdp@cmu.edu +495,Marketing,Emile Hanby,ehanbydq@multiply.com +496,Human Resources,Darla Farrant,dfarrantdr@ustream.tv +497,Engineering,Russ Pestricke,rpestrickeds@nasa.gov +498,Marketing,Jaime Freegard,jfreegarddt@aboutads.info +499,Human Resources,Barrie Sargeant,bsargeantdu@slideshare.net +500,Research and Development,Margarete Bowller,mbowllerdv@aol.com +501,Legal,Lynea Farnaby,lfarnabydw@intel.com +502,Training,Gal Simonite,gsimonitedx@so-net.ne.jp +503,Marketing,Scarface Einchcombe,seinchcombedy@chicagotribune.com +504,Marketing,Reidar Rugiero,rrugierodz@xinhuanet.com +505,Training,Willetta Sowersby,wsowersbye0@cam.ac.uk +506,Training,Margery Antrobus,mantrobuse1@foxnews.com +507,Marketing,Mirella Carpenter,mcarpentere2@pinterest.com +508,Support,Domingo Pidwell,dpidwelle3@quantcast.com +509,Engineering,Ferdie McIlhagga,fmcilhaggae4@nbcnews.com +510,Sales,Ruddy Haps,rhapse5@e-recht24.de +511,Product Management,Duky Lardner,dlardnere6@mac.com +512,Product Management,Danny Gutch,dgutche7@state.tx.us +513,Research and Development,Ibbie Oneill,ioneille8@storify.com +514,Research and Development,Uta Cookes,ucookese9@live.com +515,Business Development,Felicdad Borkett,fborkettea@163.com +516,Training,Terry de Quincey,tdeeb@scribd.com +517,Services,Jacynth McCloughen,jmccloughenec@wiley.com +518,Human Resources,Alick Mizzen,amizzened@time.com +519,Research and Development,Thorny Astbery,tastberyee@alexa.com +520,Marketing,Edmon Vowden,evowdenef@ebay.com +521,Product Management,Hubey Callaway,hcallawayeg@mayoclinic.com +522,Research and Development,Regan Brideoke,rbrideokeeh@usnews.com +523,Engineering,Bernelle Llewellen,bllewellenei@cbc.ca +524,Sales,Darell Spencock,dspencockej@tinyurl.com +525,Legal,Tristan Trowel,ttrowelek@goodreads.com +526,Research and Development,Randi Sapshed,rsapshedel@lulu.com +527,Accounting,Lowrance Drayn,ldraynem@nationalgeographic.com +528,Sales,Delmar Slides,dslidesen@cargocollective.com +529,Training,Ward Reijmers,wreijmerseo@blog.com +530,Research and Development,Perla Dashper,pdashperep@msu.edu +531,Marketing,Carr Naughton,cnaughtoneq@nbcnews.com +532,Accounting,Karissa Southway,ksouthwayer@themeforest.net +533,Legal,Blanca Rraundl,brraundles@wordpress.org +534,Training,Anastasia MacAlister,amacalisteret@digg.com +535,Marketing,Milena MacFarland,mmacfarlandeu@pen.io +536,Human Resources,Gwenora Fuente,gfuenteev@who.int +537,Business Development,Christina Smitham,csmithamew@engadget.com +538,Support,Abelard Dowe,adoweex@taobao.com +539,Services,Kerk Dewdney,kdewdneyey@mysql.com +540,Research and Development,Noah Riding,nridingez@mayoclinic.com +541,Human Resources,Sandro Barenski,sbarenskif0@gov.uk +542,Research and Development,Edee Gazzard,egazzardf1@jalbum.net +543,Human Resources,Verge Measen,vmeasenf2@myspace.com +544,Support,Whitman Snibson,wsnibsonf3@wisc.edu +545,Training,Carina Moodie,cmoodief4@umn.edu +546,Engineering,Anetta Maymand,amaymandf5@is.gd +547,Engineering,Amalee Geal,agealf6@archive.org +548,Human Resources,Coralyn Haysham,chayshamf7@livejournal.com +549,Human Resources,Sigfried Codlin,scodlinf8@plala.or.jp +550,Support,Raynell Livezey,rlivezeyf9@google.it +551,Accounting,Ethelin Lappine,elappinefa@bandcamp.com +552,Sales,Theobald Oxx,toxxfb@netvibes.com +553,Support,Granthem Leverette,gleverettefc@symantec.com +554,Support,Ariel Hartus,ahartusfd@scribd.com +555,Training,Olvan Schankelborg,oschankelborgfe@so-net.ne.jp +556,Research and Development,Anallese Blumer,ablumerff@chron.com +557,Services,Lynne Frenchum,lfrenchumfg@wunderground.com +558,Training,Ursulina Serginson,userginsonfh@macromedia.com +559,Sales,Marketa Hargess,mhargessfi@usda.gov +560,Training,Fitzgerald Bramble,fbramblefj@hubpages.com +561,Research and Development,Ema Diable,ediablefk@cdc.gov +562,Legal,Tyson Quincey,tquinceyfl@domainmarket.com +563,Marketing,Brear Sharple,bsharplefm@elegantthemes.com +564,Training,Jordana Renzini,jrenzinifn@oaic.gov.au +565,Legal,Vivi Wetwood,vwetwoodfo@geocities.com +566,Training,Reece Westney,rwestneyfp@unc.edu +567,Product Management,Kial Gent,kgentfq@independent.co.uk +568,Accounting,Babette Chrichton,bchrichtonfr@ibm.com +569,Business Development,Aurelie Gush,agushfs@reference.com +570,Marketing,Cori Lammie,clammieft@theguardian.com +571,Marketing,Birk Grimditch,bgrimditchfu@spotify.com +572,Engineering,Rossie Casbon,rcasbonfv@psu.edu +573,Services,Bertine Iggulden,bigguldenfw@amazon.co.jp +574,Research and Development,Egbert Gudgen,egudgenfx@cdbaby.com +575,Human Resources,Perice Hefford,pheffordfy@sogou.com +576,Business Development,Eben Wormell,ewormellfz@indiatimes.com +577,Business Development,Vallie Riccio,vricciog0@livejournal.com +578,Business Development,Drake Gabbotts,dgabbottsg1@altervista.org +579,Accounting,Stu Ubank,subankg2@arizona.edu +580,Business Development,Althea Kinney,akinneyg3@indiegogo.com +581,Sales,Marlene Brambell,mbrambellg4@com.com +582,Human Resources,Melitta Sandwich,msandwichg5@networksolutions.com +583,Human Resources,Kelsey Bucktrout,kbucktroutg6@mac.com +584,Sales,Sinclair Skentelbery,sskentelberyg7@about.com +585,Business Development,Archie Knappitt,aknappittg8@discuz.net +586,Sales,Harrison Royal,hroyalg9@sun.com +587,Human Resources,Clementine Grishanov,cgrishanovga@marketwatch.com +588,Sales,Winni Jacox,wjacoxgb@xrea.com +589,Sales,Candra Sturzaker,csturzakergc@cyberchimps.com +590,Human Resources,Howey Featenby,hfeatenbygd@bbb.org +591,Research and Development,Tamara Dewen,tdewenge@businessweek.com +592,Engineering,Giustino Cabell,gcabellgf@hubpages.com +593,Accounting,Dynah Smorthwaite,dsmorthwaitegg@purevolume.com +594,Support,Giavani Udy,gudygh@google.ca +595,Engineering,Mickie Hegg,mhegggi@shinystat.com +596,Business Development,Vivian Gatrill,vgatrillgj@instagram.com +597,Services,Evaleen Harniman,eharnimangk@ted.com +598,Product Management,Wendi Craighall,wcraighallgl@nyu.edu +599,Business Development,Agneta Hargate,ahargategm@stanford.edu +600,Product Management,Sarette Lind,slindgn@lycos.com +601,Legal,Bonny Jeandot,bjeandotgo@berkeley.edu +602,Product Management,Ella Seebert,eseebertgp@tripod.com +603,Business Development,Judd Lumley,jlumleygq@furl.net +604,Services,Jens Rouby,jroubygr@wired.com +605,Services,Hailey Dorin,hdorings@smh.com.au +606,Accounting,Tildie Greguoli,tgreguoligt@printfriendly.com +607,Sales,Eryn Picardo,epicardogu@tripadvisor.com +608,Sales,Sophie Belk,sbelkgv@wp.com +609,Training,Harmon Ratnage,hratnagegw@yellowbook.com +610,Product Management,Germayne Baudts,gbaudtsgx@over-blog.com +611,Support,Christel Chaimson,cchaimsongy@vimeo.com +612,Product Management,Nisse Matusson,nmatussongz@blog.com +613,Services,Selia Iori,siorih0@va.gov +614,Sales,Lauralee Scarce,lscarceh1@ebay.co.uk +615,Business Development,Helaina Walpole,hwalpoleh2@blogger.com +616,Business Development,Randee Hymers,rhymersh3@bloomberg.com +617,Support,Vonnie Barfoot,vbarfooth4@ameblo.jp +618,Legal,Klemens Fyfe,kfyfeh5@ocn.ne.jp +619,Services,Worthy Greste,wgresteh6@nymag.com +620,Services,Patric Richley,prichleyh7@sciencedaily.com +621,Sales,Helga Albers,halbersh8@usda.gov +622,Business Development,Muffin Cocks,mcocksh9@apple.com +623,Legal,Henrietta Bentje,hbentjeha@spotify.com +624,Services,Meade Burtonwood,mburtonwoodhb@facebook.com +625,Business Development,Ralina Hargess,rhargesshc@flavors.me +626,Business Development,Luci Yuranovev,lyuranovevhd@oaic.gov.au +627,Support,Alvinia Leftwich,aleftwichhe@bbc.co.uk +628,Research and Development,Davine Matushevich,dmatushevichhf@newsvine.com +629,Sales,Wilmar Regitz,wregitzhg@e-recht24.de +630,Sales,Marian Fancourt,mfancourthh@sun.com +631,Services,Carmelle Owttrim,cowttrimhi@salon.com +632,Business Development,Lindsay Creenan,lcreenanhj@phoca.cz +633,Support,Brendin Yakebovich,byakebovichhk@chron.com +634,Engineering,Brennen Galsworthy,bgalsworthyhl@foxnews.com +635,Legal,Gardy Stampe,gstampehm@shop-pro.jp +636,Services,Sonnnie Firbank,sfirbankhn@samsung.com +637,Business Development,Lawry Cutchie,lcutchieho@geocities.com +638,Sales,Binny Berge,bbergehp@weather.com +639,Services,Bay Alp,balphq@utexas.edu +640,Business Development,Betsey Trahar,btraharhr@arizona.edu +641,Legal,Monique Antunez,mantunezhs@skype.com +642,Support,Phillipe Cockson,pcocksonht@geocities.jp +643,Engineering,Fraser Tatters,ftattershu@cafepress.com +644,Business Development,Lazaro Epton,leptonhv@icq.com +645,Engineering,Elva Camerello,ecamerellohw@sciencedirect.com +646,Human Resources,Winifield Loynes,wloyneshx@stumbleupon.com +647,Product Management,Rosalinde Passingham,rpassinghamhy@webmd.com +648,Business Development,Berti Munson,bmunsonhz@is.gd +649,Business Development,Kaylyn Hallstone,khallstonei0@craigslist.org +650,Services,Hali Winkett,hwinketti1@wired.com +651,Research and Development,Maison Hemeret,mhemereti2@g.co +652,Product Management,Feliza Garnul,fgarnuli3@pinterest.com +653,Legal,Sam Burrill,sburrilli4@pcworld.com +654,Human Resources,Wolfy Andrzejak,wandrzejaki5@nytimes.com +655,Accounting,Bent Goeff,bgoeffi6@bloomberg.com +656,Product Management,Zebulen Emson,zemsoni7@phoca.cz +657,Training,Matias Tace,mtacei8@unesco.org +658,Services,Erin Balsillie,ebalsilliei9@google.cn +659,Training,Nels McKellen,nmckellenia@fema.gov +660,Legal,Lynnelle Capewell,lcapewellib@ca.gov +661,Human Resources,Cal Lishman,clishmanic@taobao.com +662,Accounting,Ulla Allberry,uallberryid@clickbank.net +663,Sales,Falito Pavlata,fpavlataie@cyberchimps.com +664,Support,Maude Ness,mnessif@pinterest.com +665,Product Management,Rahal Ashford,rashfordig@hhs.gov +666,Engineering,Karole Janaud,kjanaudih@rediff.com +667,Training,Cecilia Doyland,cdoylandii@mail.ru +668,Business Development,Avril Boken,abokenij@51.la +669,Accounting,Flinn Ansell,fansellik@ftc.gov +670,Accounting,Bryanty Suff,bsuffil@dyndns.org +671,Marketing,Claretta Cappleman,ccapplemanim@google.com.au +672,Legal,Trenna Sabatier,tsabatierin@intel.com +673,Human Resources,Robinet Kybert,rkybertio@bing.com +674,Accounting,Darice Bulfield,dbulfieldip@mac.com +675,Engineering,Devland Maier,dmaieriq@sfgate.com +676,Product Management,Gearalt Dimont,gdimontir@noaa.gov +677,Training,Cy Franzonello,cfranzonellois@eventbrite.com +678,Accounting,Sharyl Leete,sleeteit@wikispaces.com +679,Accounting,Stesha Scotchmur,sscotchmuriu@rambler.ru +680,Accounting,Chance Wadesworth,cwadesworthiv@sohu.com +681,Sales,Haze Staining,hstainingiw@cocolog-nifty.com +682,Engineering,Jacenta Stobbs,jstobbsix@bandcamp.com +683,Business Development,Rudolf Bathowe,rbathoweiy@usnews.com +684,Sales,Babette Rhead,brheadiz@fotki.com +685,Engineering,Lonni Peasnone,lpeasnonej0@yahoo.co.jp +686,Accounting,Abigale Antal,aantalj1@goo.gl +687,Accounting,Shannen Phelit,sphelitj2@dropbox.com +688,Training,Masha Bateup,mbateupj3@psu.edu +689,Services,Annette Aldam,aaldamj4@gravatar.com +690,Business Development,Rosa Elnough,relnoughj5@cbsnews.com +691,Business Development,Claresta Nolot,cnolotj6@wordpress.org +692,Accounting,Channa Surmeir,csurmeirj7@so-net.ne.jp +693,Research and Development,Hagan Brundell,hbrundellj8@mapy.cz +694,Engineering,Judie Kestell,jkestellj9@twitpic.com +695,Human Resources,Lucius Mattek,lmattekja@wordpress.org +696,Human Resources,Demeter Stormouth,dstormouthjb@prweb.com +697,Human Resources,Allard Friman,afrimanjc@whitehouse.gov +698,Human Resources,Annissa Audley,aaudleyjd@drupal.org +699,Business Development,Waylen Annand,wannandje@slideshare.net +700,Accounting,Krissie Spacie,kspaciejf@craigslist.org +701,Support,Edyth Phonix,ephonixjg@virginia.edu +702,Support,Dora Nehl,dnehljh@amazonaws.com +703,Support,Geralda Moar,gmoarji@about.com +704,Human Resources,Anabella Gaitung,agaitungjj@simplemachines.org +705,Engineering,Andre Luckwell,aluckwelljk@samsung.com +706,Sales,Murdoch Jerdein,mjerdeinjl@google.co.uk +707,Research and Development,Cariotta Harbottle,charbottlejm@1und1.de +708,Sales,Bat Headington,bheadingtonjn@columbia.edu +709,Sales,Koral Regler,kreglerjo@e-recht24.de +710,Sales,Laetitia Kees,lkeesjp@yahoo.co.jp +711,Research and Development,Modestia Demko,mdemkojq@icq.com +712,Accounting,Lissie Kensitt,lkensittjr@deviantart.com +713,Sales,Athene Huglin,ahuglinjs@free.fr +714,Research and Development,Hastings Asp,haspjt@goo.ne.jp +715,Human Resources,Silvanus Debow,sdebowju@telegraph.co.uk +716,Accounting,Birgitta D'Andrea,bdandreajv@yahoo.co.jp +717,Services,Muhammad Swindon,mswindonjw@ucoz.ru +718,Training,Malina Barnish,mbarnishjx@google.cn +719,Product Management,Tedda Arnaudi,tarnaudijy@paginegialle.it +720,Legal,Debbie Romero,dromerojz@shinystat.com +721,Product Management,Bella Nehl,bnehlk0@hugedomains.com +722,Business Development,Leighton Fearnall,lfearnallk1@elpais.com +723,Services,Harriet Fairbanks,hfairbanksk2@ucla.edu +724,Training,Jaquelyn Browne,jbrownek3@newsvine.com +725,Sales,Brittani Le Page,blek4@about.me +726,Research and Development,Arden Clayal,aclayalk5@arizona.edu +727,Engineering,Miltie Lafford,mlaffordk6@storify.com +728,Engineering,Ezra Stichall,estichallk7@ox.ac.uk +729,Product Management,Clarance Seamans,cseamansk8@latimes.com +730,Human Resources,Allissa Bools,aboolsk9@nydailynews.com +731,Support,Constanta Alexsandrov,calexsandrovka@reuters.com +732,Accounting,King Cushion,kcushionkb@dailymotion.com +733,Accounting,Panchito Halsall,phalsallkc@bandcamp.com +734,Support,Jayme Yeend,jyeendkd@addthis.com +735,Legal,Jeffy Pargeter,jpargeterke@meetup.com +736,Marketing,Jo ann Hiskey,jannkf@mapquest.com +737,Business Development,Fidelio Kubicka,fkubickakg@tiny.cc +738,Business Development,Glynn Blevin,gblevinkh@instagram.com +739,Marketing,Denys Luckes,dluckeski@umich.edu +740,Product Management,Deeanne Langland,dlanglandkj@macromedia.com +741,Services,Ringo Cauldwell,rcauldwellkk@exblog.jp +742,Marketing,Constantino Harring,charringkl@twitter.com +743,Marketing,Myrilla Buckel,mbuckelkm@soup.io +744,Sales,Nikola Rainbow,nrainbowkn@ning.com +745,Engineering,Claudine Sharer,csharerko@studiopress.com +746,Business Development,Joete Schettini,jschettinikp@un.org +747,Product Management,Illa Butter,ibutterkq@mozilla.org +748,Product Management,Pinchas Stearns,pstearnskr@phoca.cz +749,Sales,Gearard Nockells,gnockellsks@unc.edu +750,Human Resources,Vivyanne Screas,vscreaskt@stanford.edu +751,Support,Ray Dagger,rdaggerku@networkadvertising.org +752,Accounting,Malvina Spini,mspinikv@google.pl +753,Marketing,Sophia O'Kielt,sokieltkw@addtoany.com +754,Sales,Kristoforo Guillard,kguillardkx@facebook.com +755,Product Management,Vladimir Frangione,vfrangioneky@drupal.org +756,Support,Rosita Hurd,rhurdkz@smugmug.com +757,Business Development,Geneva Frammingham,gframminghaml0@bing.com +758,Engineering,Tadeo Melmar,tmelmarl1@opera.com +759,Accounting,Melina Quartly,mquartlyl2@cocolog-nifty.com +760,Training,Eldin Grundey,egrundeyl3@drupal.org +761,Engineering,Daphene Brodeau,dbrodeaul4@cocolog-nifty.com +762,Research and Development,Dannie Normadell,dnormadelll5@sphinn.com +763,Product Management,Boonie Evelyn,bevelynl6@sohu.com +764,Human Resources,Damaris Kobes,dkobesl7@arizona.edu +765,Business Development,Davy Lawrenson,dlawrensonl8@bbb.org +766,Legal,Licha Galiford,lgalifordl9@cbc.ca +767,Product Management,Pen Treat,ptreatla@mysql.com +768,Research and Development,Gill Allward,gallwardlb@vinaora.com +769,Business Development,Sterne Baxstar,sbaxstarlc@kickstarter.com +770,Engineering,Ulick Grishanin,ugrishaninld@nsw.gov.au +771,Support,Bria Lawden,blawdenle@hubpages.com +772,Product Management,Orella Covil,ocovillf@digg.com +773,Accounting,Ari Haburne,ahaburnelg@wisc.edu +774,Accounting,Sonny Dimmne,sdimmnelh@techcrunch.com +775,Training,Tamar Nansom,tnansomli@geocities.jp +776,Product Management,Bartolemo Joanaud,bjoanaudlj@weebly.com +777,Research and Development,Denise Fever,dfeverlk@oakley.com +778,Support,Charmain Heads,cheadsll@weather.com +779,Support,Arabele Jenne,ajennelm@shareasale.com +780,Support,Friedrick Battrick,fbattrickln@histats.com +781,Engineering,Berkley Fadian,bfadianlo@yahoo.com +782,Engineering,Gabriel Bernadzki,gbernadzkilp@google.de +783,Business Development,Kasey Stinchcombe,kstinchcombelq@nps.gov +784,Product Management,Dareen Gelly,dgellylr@irs.gov +785,Support,Zilvia Zealey,zzealeyls@cdc.gov +786,Research and Development,Gerhardt Housen,ghousenlt@baidu.com +787,Legal,Tadio Zaniolo,tzaniololu@nifty.com +788,Product Management,Pascal Dron,pdronlv@hao123.com +789,Sales,Aurie Murrum,amurrumlw@wunderground.com +790,Engineering,Farr Truss,ftrusslx@tinypic.com +791,Services,Bryna Patise,bpatisely@tmall.com +792,Support,Kirstyn Scurrey,kscurreylz@reddit.com +793,Support,Adrienne Kearford,akearfordm0@instagram.com +794,Engineering,Arther Doerling,adoerlingm1@cbc.ca +795,Sales,Jillian Salaman,jsalamanm2@unblog.fr +796,Services,Hillier Basnett,hbasnettm3@github.io +797,Training,Renell Krier,rkrierm4@si.edu +798,Research and Development,Demott Szymon,dszymonm5@ox.ac.uk +799,Human Resources,Padraig Amberger,pambergerm6@chronoengine.com +800,Legal,Malia Firman,mfirmanm7@chronoengine.com +801,Research and Development,Sergent Sabey,ssabeym8@dailymotion.com +802,Product Management,Zahara Tribbeck,ztribbeckm9@elpais.com +803,Human Resources,Sonnie Aymes,saymesma@nba.com +804,Sales,Geoffry Jellis,gjellismb@zimbio.com +805,Marketing,Tabbie Witter,twittermc@blogtalkradio.com +806,Support,Benji Switland,bswitlandmd@howstuffworks.com +807,Accounting,Eleanore Schaumann,eschaumannme@usnews.com +808,Accounting,Sanderson Copcott,scopcottmf@yahoo.com +809,Marketing,Stacy Crosby,scrosbymg@slashdot.org +810,Sales,Clarance Scotford,cscotfordmh@google.it +811,Business Development,Ameline Marzelle,amarzellemi@dyndns.org +812,Engineering,Barbette Knowlton,bknowltonmj@ameblo.jp +813,Accounting,Marla Barbey,mbarbeymk@smh.com.au +814,Accounting,Johann Reijmers,jreijmersml@fema.gov +815,Sales,Jsandye Pilbury,jpilburymm@dmoz.org +816,Services,Conan Pollitt,cpollittmn@umich.edu +817,Human Resources,Petra Moncaster,pmoncastermo@ox.ac.uk +818,Sales,Nolana Tweed,ntweedmp@photobucket.com +819,Product Management,Bree Harrell,bharrellmq@icio.us +820,Training,Kelcie Jalland,kjallandmr@patch.com +821,Legal,Constantin Gibbings,cgibbingsms@deliciousdays.com +822,Human Resources,Hardy Butterley,hbutterleymt@etsy.com +823,Legal,Eli Ratcliffe,eratcliffemu@hostgator.com +824,Business Development,Coralyn Belone,cbelonemv@cbsnews.com +825,Accounting,Vanya Grzelewski,vgrzelewskimw@zimbio.com +826,Engineering,Gregorio Dixcey,gdixceymx@mashable.com +827,Product Management,Jackie Line,jlinemy@si.edu +828,Services,Lek Ahmad,lahmadmz@un.org +829,Sales,Frayda Blabber,fblabbern0@vistaprint.com +830,Marketing,Hammad Marrow,hmarrown1@reuters.com +831,Support,Zuzana Duffer,zduffern2@naver.com +832,Services,Coretta Duley,cduleyn3@thetimes.co.uk +833,Human Resources,Adina Buckthorp,abuckthorpn4@cornell.edu +834,Sales,Elwyn Poole,epoolen5@slashdot.org +835,Product Management,Whitman Greenard,wgreenardn6@deliciousdays.com +836,Engineering,Gerri Batterham,gbatterhamn7@deliciousdays.com +837,Marketing,Ivy Cracker,icrackern8@eventbrite.com +838,Product Management,Cyrille Winchurst,cwinchurstn9@tinyurl.com +839,Legal,Delmor Jansema,djansemana@delicious.com +840,Research and Development,Samantha Searl,ssearlnb@pcworld.com +841,Sales,Raddy Abrahamsson,rabrahamssonnc@sun.com +842,Services,Fitzgerald Wolver,fwolvernd@github.io +843,Support,Melvin Haselwood,mhaselwoodne@bandcamp.com +844,Business Development,Hussein Lynds,hlyndsnf@360.cn +845,Business Development,Pryce Voce,pvoceng@scientificamerican.com +846,Business Development,Beilul Thirtle,bthirtlenh@google.nl +847,Legal,Bev Chappel,bchappelni@chronoengine.com +848,Marketing,Britni Gallifont,bgallifontnj@ihg.com +849,Support,Rudolfo Itzhaiek,ritzhaieknk@dell.com +850,Business Development,Dunc Clausewitz,dclausewitznl@psu.edu +851,Human Resources,Allan Thal,athalnm@upenn.edu +852,Marketing,Glennie Lines,glinesnn@free.fr +853,Legal,Wilhelmina Shivlin,wshivlinno@oracle.com +854,Training,Loralyn Tulley,ltulleynp@newyorker.com +855,Research and Development,Geoff Peppin,gpeppinnq@mit.edu +856,Marketing,Abe Physick,aphysicknr@go.com +857,Legal,Deva Tetley,dtetleyns@auda.org.au +858,Services,Gregory Cutridge,gcutridgent@nbcnews.com +859,Accounting,Thaxter Coldman,tcoldmannu@angelfire.com +860,Services,Cathrine Janney,cjanneynv@miitbeian.gov.cn +861,Product Management,Stace Grunwald,sgrunwaldnw@si.edu +862,Human Resources,Fayth Mountfort,fmountfortnx@fastcompany.com +863,Sales,Starla Prettyjohn,sprettyjohnny@surveymonkey.com +864,Human Resources,Donavon Brasted,dbrastednz@dropbox.com +865,Product Management,Deeanne Flacknoe,dflacknoeo0@sohu.com +866,Marketing,Ewen Gian,egiano1@51.la +867,Support,Rodie Sander,rsandero2@slideshare.net +868,Engineering,Darb Lapidus,dlapiduso3@whitehouse.gov +869,Services,Morgan Jiggins,mjigginso4@usgs.gov +870,Business Development,Paloma Quadri,pquadrio5@ask.com +871,Services,Deedee Heliet,dhelieto6@goodreads.com +872,Training,Glennis Jerke,gjerkeo7@ezinearticles.com +873,Sales,Dasi Stormonth,dstormontho8@pinterest.com +874,Business Development,Tristan McMeeking,tmcmeekingo9@forbes.com +875,Sales,Sadye Airth,sairthoa@addtoany.com +876,Human Resources,Normand Lilliman,nlillimanob@nature.com +877,Sales,Hansiain Gyenes,hgyenesoc@networkadvertising.org +878,Support,Renado Waterland,rwaterlandod@reverbnation.com +879,Legal,Howie Poon,hpoonoe@samsung.com +880,Training,Derrick Bambrugh,dbambrughof@free.fr +881,Training,Manolo Knapton,mknaptonog@fema.gov +882,Services,Alika Illes,aillesoh@blogger.com +883,Human Resources,Haily Gostage,hgostageoi@digg.com +884,Legal,Aggie Baythrop,abaythropoj@google.pl +885,Accounting,Georg Brimley,gbrimleyok@theatlantic.com +886,Accounting,Cody Matejovsky,cmatejovskyol@google.pl +887,Engineering,Milo Jarvie,mjarvieom@mayoclinic.com +888,Human Resources,Chaddy Tow,ctowon@reference.com +889,Human Resources,Beryl Grafham,bgrafhamoo@networkadvertising.org +890,Training,Trudi Olesen,tolesenop@shareasale.com +891,Marketing,Gardener MacNockater,gmacnockateroq@theguardian.com +892,Training,Hortensia Machen,hmachenor@paypal.com +893,Training,Vlad Rosier,vrosieros@dot.gov +894,Services,Elli Shieldon,eshieldonot@baidu.com +895,Support,Shannon Capron,scapronou@netlog.com +896,Business Development,Maggie Rugiero,mrugieroov@parallels.com +897,Support,Gustaf Sokell,gsokellow@discovery.com +898,Business Development,Nestor Caesmans,ncaesmansox@earthlink.net +899,Product Management,Ring Showl,rshowloy@dagondesign.com +900,Product Management,Curt Tall,ctalloz@mapquest.com +901,Support,Nollie Derrett,nderrettp0@comcast.net +902,Engineering,Urson Wither,uwitherp1@lulu.com +903,Support,Imogene Yandell,iyandellp2@yellowbook.com +904,Support,Kent Meier,kmeierp3@ebay.co.uk +905,Sales,Ashil Birtles,abirtlesp4@artisteer.com +906,Sales,Rowney Twyford,rtwyfordp5@dagondesign.com +907,Support,Mathew Emlen,memlenp6@nymag.com +908,Sales,Corny O'Boyle,coboylep7@deliciousdays.com +909,Product Management,Chelsy MacLeese,cmacleesep8@sciencedirect.com +910,Business Development,Nahum Hanigan,nhaniganp9@techcrunch.com +911,Training,Florrie Brundle,fbrundlepa@jugem.jp +912,Marketing,Antonie MacConnell,amacconnellpb@thetimes.co.uk +913,Engineering,Cati Fumagallo,cfumagallopc@flickr.com +914,Engineering,Russell De Caville,rdepd@china.com.cn +915,Sales,Orelee Waymont,owaymontpe@fc2.com +916,Accounting,Terri Simcock,tsimcockpf@usda.gov +917,Engineering,Olenka Leah,oleahpg@ucoz.com +918,Research and Development,Calvin Petrushkevich,cpetrushkevichph@elpais.com +919,Human Resources,Rana Cottage,rcottagepi@merriam-webster.com +920,Accounting,Damiano Beeby,dbeebypj@independent.co.uk +921,Sales,Reinaldo Castletine,rcastletinepk@twitpic.com +922,Marketing,Wernher Watford,wwatfordpl@ucsd.edu +923,Accounting,Noreen Dudding,nduddingpm@fda.gov +924,Marketing,Dane Crevy,dcrevypn@pcworld.com +925,Services,Nydia Dressell,ndressellpo@hc360.com +926,Legal,Petronella Durman,pdurmanpp@forbes.com +927,Human Resources,Regen Finlaison,rfinlaisonpq@sina.com.cn +928,Training,Carlos Scopyn,cscopynpr@qq.com +929,Research and Development,Natka Egre,negreps@bluehost.com +930,Business Development,Noak Strutton,nstruttonpt@mac.com +931,Sales,Astrid Glass,aglasspu@miibeian.gov.cn +932,Training,Darby Pickover,dpickoverpv@google.com +933,Services,Lyn Dunsmore,ldunsmorepw@kickstarter.com +934,Marketing,Nalani Torre,ntorrepx@zimbio.com +935,Legal,Laurette Noli,lnolipy@cyberchimps.com +936,Support,Elena Noteyoung,enoteyoungpz@devhub.com +937,Product Management,Tobias Duddin,tduddinq0@craigslist.org +938,Services,Ketty Ridolfi,kridolfiq1@wikipedia.org +939,Product Management,Dacie Voisey,dvoiseyq2@addtoany.com +940,Human Resources,Hillie Wort,hwortq3@ask.com +941,Accounting,Verla Kettlestringes,vkettlestringesq4@github.com +942,Engineering,Jamil Doohey,jdooheyq5@usatoday.com +943,Services,Vale Grollmann,vgrollmannq6@dagondesign.com +944,Support,Morten Laurentin,mlaurentinq7@answers.com +945,Services,Delcina Folley,dfolleyq8@msu.edu +946,Accounting,Lina Treby,ltrebyq9@weibo.com +947,Legal,Philip Wooldridge,pwooldridgeqa@scientificamerican.com +948,Sales,Aurelea Sharples,asharplesqb@360.cn +949,Support,Jerrold Burmaster,jburmasterqc@google.cn +950,Services,Stevie Lorkins,slorkinsqd@accuweather.com +951,Training,Alys Browne,abrowneqe@geocities.com +952,Training,Addi Older,aolderqf@vistaprint.com +953,Human Resources,Joline Sopp,jsoppqg@linkedin.com +954,Services,Roddie McCane,rmccaneqh@abc.net.au +955,Product Management,Ricky Gallemore,rgallemoreqi@usda.gov +956,Human Resources,Gregoire Soanes,gsoanesqj@rakuten.co.jp +957,Services,Herman Casburn,hcasburnqk@java.com +958,Human Resources,Koo MacIlhargy,kmacilhargyql@gizmodo.com +959,Engineering,Brittni Masser,bmasserqm@pbs.org +960,Services,Lenka Asel,laselqn@people.com.cn +961,Business Development,Byrom Blacksland,bblackslandqo@epa.gov +962,Human Resources,Andrew Howsin,ahowsinqp@deliciousdays.com +963,Support,Richard Spykings,rspykingsqq@wikimedia.org +964,Legal,Jeramie Fawcett,jfawcettqr@soup.io +965,Business Development,Cece Barnes,cbarnesqs@answers.com +966,Human Resources,Chickie Gerant,cgerantqt@wunderground.com +967,Support,Killie Guillou,kguillouqu@histats.com +968,Marketing,Pyotr Braferton,pbrafertonqv@yahoo.co.jp +969,Human Resources,Vaughn Lansberry,vlansberryqw@privacy.gov.au +970,Services,Janice Bettis,jbettisqx@slideshare.net +971,Research and Development,Mortimer Heading,mheadingqy@blinklist.com +972,Training,Quentin Trusty,qtrustyqz@dell.com +973,Human Resources,Katinka Shanklin,kshanklinr0@free.fr +974,Business Development,Selena Bustard,sbustardr1@google.de +975,Accounting,Tedda Benez,tbenezr2@is.gd +976,Research and Development,Hakim Lugsdin,hlugsdinr3@webnode.com +977,Engineering,Dorie Skeete,dskeeter4@homestead.com +978,Research and Development,Chere Kobieriecki,ckobierieckir5@hc360.com +979,Product Management,Salim Moulsdall,smoulsdallr6@cloudflare.com +980,Human Resources,Oralla Gerin,ogerinr7@constantcontact.com +981,Marketing,Emmerich Gelling,egellingr8@psu.edu +982,Training,Hermione Anstie,hanstier9@hexun.com +983,Research and Development,Ilka Gavigan,igaviganra@stumbleupon.com +984,Accounting,Ginevra Scholar,gscholarrb@bing.com +985,Support,Candra Husk,chuskrc@umn.edu +986,Services,Emilee Peterffy,epeterffyrd@pen.io +987,Services,Robinett Eblein,rebleinre@mediafire.com +988,Training,Trever Beyer,tbeyerrf@cnet.com +989,Legal,Toiboid Crosser,tcrosserrg@digg.com +990,Legal,Maryjane Vreede,mvreederh@state.tx.us +991,Human Resources,Wernher Ecles,weclesri@dropbox.com +992,Sales,Melisandra Vane,mvanerj@sciencedirect.com +993,Human Resources,Normy Fenelow,nfenelowrk@addthis.com +994,Human Resources,Elie Simms,esimmsrl@sfgate.com +995,Training,Kaycee Millom,kmillomrm@networksolutions.com +996,Engineering,Zulema Eldershaw,zeldershawrn@blog.com +997,Research and Development,Rhiamon Geldard,rgeldardro@ted.com +998,Marketing,Rab Sergant,rsergantrp@artisteer.com +999,Research and Development,Dorelle MacVagh,dmacvaghrq@bravesites.com +1000,Support,Elvira Bucham,ebuchamrr@jigsy.com From f623bc51c4df54c503afb14d6094652b140c8670 Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Fri, 5 Dec 2025 02:03:43 -0500 Subject: [PATCH 20/21] Documentation:Implement all intergration test --- src/Backend/opti-sql-go/Expr/expr.go | 1 - src/Backend/opti-sql-go/Expr/expr_test.go | 8 +- .../opti-sql-go/operators/aggr/groupBy.go | 17 + .../opti-sql-go/operators/filter/filter.go | 3 + .../operators/test/intergration_test.go | 738 +++++++++++++++--- 5 files changed, 656 insertions(+), 111 deletions(-) diff --git a/src/Backend/opti-sql-go/Expr/expr.go b/src/Backend/opti-sql-go/Expr/expr.go index 665990d..cd19dab 100644 --- a/src/Backend/opti-sql-go/Expr/expr.go +++ b/src/Backend/opti-sql-go/Expr/expr.go @@ -263,7 +263,6 @@ func NewLiteralResolve(Type arrow.DataType, Value any) *LiteralResolve { fmt.Printf("%v did not match any case, of type %T\n", v, v) castVal = Value } - fmt.Printf("sotred as -> %v\t%v\n", Type, castVal) return &LiteralResolve{Type: Type, Value: castVal} } func EvalLiteral(l *LiteralResolve, batch *operators.RecordBatch) (arrow.Array, error) { diff --git a/src/Backend/opti-sql-go/Expr/expr_test.go b/src/Backend/opti-sql-go/Expr/expr_test.go index 7f839bc..5941807 100644 --- a/src/Backend/opti-sql-go/Expr/expr_test.go +++ b/src/Backend/opti-sql-go/Expr/expr_test.go @@ -1550,7 +1550,7 @@ func TestLikeOperatorSQL(t *testing.T) { t.Run("name starts with a", func(t *testing.T) { rc := generateTestColumns() sqlStatment := "A%" - whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, string(sqlStatment))) + whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, sqlStatment)) boolMask, err := EvalExpression(whereStatment, rc) if err != nil { t.Fatalf("unexpected error from EvalExpression") @@ -1572,7 +1572,7 @@ func TestLikeOperatorSQL(t *testing.T) { t.Run("name contains li", func(t *testing.T) { rc := generateTestColumns() sqlStatment := "%li%" - whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, string(sqlStatment))) + whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, sqlStatment)) boolMask, err := EvalExpression(whereStatment, rc) if err != nil { @@ -1624,7 +1624,7 @@ func TestLikeOperatorSQL(t *testing.T) { t.Run("name is exactly 5 letters", func(t *testing.T) { rc := generateTestColumns() sqlStatment := "_____" - whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, string(sqlStatment))) + whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, sqlStatment)) boolMask, err := EvalExpression(whereStatment, rc) if err != nil { @@ -1650,7 +1650,7 @@ func TestLikeOperatorSQL(t *testing.T) { t.Run("name starts with Ch", func(t *testing.T) { rc := generateTestColumns() sqlStatment := "Ch%" - whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, string(sqlStatment))) + whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, sqlStatment)) boolMask, err := EvalExpression(whereStatment, rc) if err != nil { diff --git a/src/Backend/opti-sql-go/operators/aggr/groupBy.go b/src/Backend/opti-sql-go/operators/aggr/groupBy.go index 962a450..07ba08f 100644 --- a/src/Backend/opti-sql-go/operators/aggr/groupBy.go +++ b/src/Backend/opti-sql-go/operators/aggr/groupBy.go @@ -430,6 +430,17 @@ func buildDynamicArray(mem memory.Allocator, dt arrow.DataType, values []any) ar // =========================== // UNSUPPORTED TYPE // =========================== + case arrow.BOOL: + b := array.NewBooleanBuilder(mem) + for _, v := range values { + fmt.Printf("values:%v type:%T\n", v, v) + if v == nil { + b.AppendNull() + } else { + b.Append(castToBool(v)) + } + } + return b.NewArray() default: panic(fmt.Sprintf("unsupported dynamic array type: %v", dt)) } @@ -440,3 +451,9 @@ func buildFloatArray(mem memory.Allocator, values []float64) arrow.Array { b.AppendValues(values, nil) return b.NewArray() } +func castToBool(v any) bool { + if v == "true" || v == true { + return true + } + return false +} diff --git a/src/Backend/opti-sql-go/operators/filter/filter.go b/src/Backend/opti-sql-go/operators/filter/filter.go index a476ac8..5493712 100644 --- a/src/Backend/opti-sql-go/operators/filter/filter.go +++ b/src/Backend/opti-sql-go/operators/filter/filter.go @@ -168,6 +168,7 @@ func validPredicates(pred Expr.Expression, schema *arrow.Schema) bool { if !arrow.TypeEqual(dt1, dt2) { return false } + fmt.Printf("left:\t%v\nright:\t%v\n", p.Left, p.Right) // recursively validate children return validPredicates(p.Left, schema) && validPredicates(p.Right, schema) @@ -177,6 +178,8 @@ func validPredicates(pred Expr.Expression, schema *arrow.Schema) bool { case *Expr.NullCheckExpr: return validPredicates(p.Expr, schema) + case *Expr.ScalarFunction: + return true default: return false } diff --git a/src/Backend/opti-sql-go/operators/test/intergration_test.go b/src/Backend/opti-sql-go/operators/test/intergration_test.go index af51020..20b0a01 100644 --- a/src/Backend/opti-sql-go/operators/test/intergration_test.go +++ b/src/Backend/opti-sql-go/operators/test/intergration_test.go @@ -6,6 +6,8 @@ import ( "io" "opti-sql-go/Expr" "opti-sql-go/operators" + join "opti-sql-go/operators/Join" + aggr "opti-sql-go/operators/aggr" "opti-sql-go/operators/filter" "opti-sql-go/operators/project" "os" @@ -114,11 +116,11 @@ func TestSelectFilterLimit(t *testing.T) { } if batch == nil { - t.Logf("(1.A) got nil batch (possibly EOF)") + t.Logf("(1A) got nil batch (possibly EOF)") return } - t.Logf("(1.A) batch:\n%v\n", batch.PrettyPrint()) + t.Logf("(1A) batch:\n%v\n", batch.PrettyPrint()) }) // (1.B) SELECT username, age_years FROM source1 WHERE is_active = true AND age_years < 25 LIMIT 3; @@ -162,14 +164,14 @@ func TestSelectFilterLimit(t *testing.T) { } if batch == nil { - t.Logf("(1.B) got nil batch (possibly EOF)") + t.Logf("(1B) got nil batch (possibly EOF)") return } - t.Logf("(1.B) batch:\n%v\n", batch.PrettyPrint()) + t.Logf("(1B) batch:\n%v\n", batch.PrettyPrint()) }) // (1.C) SELECT id, favorite_color FROM source1 WHERE favorite_color = 'Red' LIMIT 7; - t.Run("(1.C)", func(t *testing.T) { + t.Run("1C", func(t *testing.T) { src := source1Project() pred := Expr.NewBinaryExpr( @@ -203,101 +205,505 @@ func TestSelectFilterLimit(t *testing.T) { } if batch == nil { - t.Logf("(1.C) got nil batch (possibly EOF)") + t.Logf("(1C) got nil batch (possibly EOF)") return } - t.Logf("(1.C) batch:\n%v\n", batch.PrettyPrint()) + t.Logf("(1C) batch:\n%v\n", batch.PrettyPrint()) }) } -/* -(2) -Operators: Filter, Scalar functions -sql query: -(2.A)SELECT id, username, LOWER(favorite_color) as fav_color_lower FROM source1 WHERE UPPER(favorite_color) = 'BLUE'; -(2.B)SELECT username, LOWER(email_address) AS email_lower -FROM source1 -WHERE UPPER(username) = 'ALICE'; -*/ +// ------------------------------------------------------------------------- +// (2) Operators: Filter, Scalar functions +// (2.A) SELECT id, username, LOWER(favorite_color) as fav_color_lower FROM source1 WHERE UPPER(favorite_color) = 'BLUE'; +// (2.B) SELECT username, LOWER(email_address) AS email_lower FROM source1 WHERE UPPER(username) = 'ALICE'; +func TestFilterScalarFunctions(t *testing.T) { + // (2.A) SELECT id, username, LOWER(favorite_color) as fav_color_lower FROM source1 WHERE UPPER(favorite_color) = 'BLUE'; + t.Run("2A", func(t *testing.T) { + src := source1Project() -/* -(3) -Operators: select, Sort -sql query: -(3.A)SELECT id, account_balance_usd, username -FROM source1 -ORDER BY account_balance_usd ASC -(3.B)SELECT id, favorite_color -FROM source1 -ORDER BY favorite_color ASC; -*/ + pred := Expr.NewBinaryExpr( + Expr.NewScalarFunction(Expr.Upper, Expr.NewColumnResolve("favorite_color")), + Expr.Equal, + Expr.NewLiteralResolve(arrow.BinaryTypes.String, "BLUE"), + ) -/* -(4) -Operators: Join(INNER), Select -SQL: -(4.A)SELECT s1.id, s1.username, s2.department_name -FROM source1 AS s1 -INNER JOIN source2 AS s2 -ON s1.favorite_color = s2.manager_name; -(4.B)SELECT s1.id, s1.email_address, s2.department_name -FROM source1 AS s1 -INNER JOIN source2 AS s2 -ON s1.favorite_color = s2.manager_name; -*/ + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } -/* -(5) -Operators: GroupBy, Aggregation(SUM, AVG), Select -SQL: -(5.A)SELECT favorite_color, AVG(age_years) AS avg_age, SUM(account_balance_usd) AS total_balance -FROM source1 -GROUP BY favorite_color; -(5.B)SELECT is_active, COUNT(*) AS active_count, AVG(age_years) AS avg_age -FROM source1 -GROUP BY is_active; + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewColumnResolve("username"), + Expr.NewAlias(Expr.NewScalarFunction(Expr.Lower, Expr.NewColumnResolve("favorite_color")), "fav_color_lower"), + ) + proj, err := project.NewProjectExec(filt, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } -*/ + batch, err := proj.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(2A) got nil batch (possibly EOF)") + return + } + t.Logf("(2A) batch:\n%v\n", batch.PrettyPrint()) + }) -/* -(6) -Operators: Distinct, Sort(DESC) -SQL: -(6.A)SELECT DISTINCT favorite_color -FROM source1 -ORDER BY favorite_color DESC; -(6.B)SELECT DISTINCT is_active -FROM source1 -ORDER BY is_active DESC; + // (2.B) SELECT username, LOWER(email_address) AS email_lower FROM source1 WHERE UPPER(username) = 'ALICE'; + t.Run("2B", func(t *testing.T) { + src := source1Project() -*/ + pred := Expr.NewBinaryExpr( + Expr.NewScalarFunction(Expr.Upper, Expr.NewColumnResolve("username")), + Expr.Equal, + Expr.NewLiteralResolve(arrow.BinaryTypes.String, "ALICE"), + ) -/* -(7) -Operators: Join(INNER), Filter, Projection, Limit + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } -SQL: -(7.A)SELECT s1.id, s1.username, s2.department_name -FROM source1 AS s1 -INNER JOIN source2 AS s2 -ON s1.favorite_color = s2.manager_name -WHERE s1.age_years > 30 -LIMIT 5; -(7.B)SELECT s1.username, s2.manager_email -FROM source1 AS s1 -JOIN source2 AS s2 -ON s1.favorite_color = s2.manager_name -WHERE s2.department_name = 'Engineering' -LIMIT 3; -(7.C)SELECT s1.id, s2.manager_name -FROM source1 s1 -JOIN source2 s2 -ON s1.favorite_color = s2.manager_name -WHERE s1.account_balance_usd > 10000 -LIMIT 2; -*/ + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("username"), + Expr.NewAlias(Expr.NewScalarFunction(Expr.Lower, Expr.NewColumnResolve("email_address")), "email_lower"), + ) + proj, err := project.NewProjectExec(filt, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + batch, err := proj.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch != nil { + t.Fatalf("was expecting an empty batch but recieved %s\n", batch.PrettyPrint()) + return + } + }) +} + +// ------------------------------------------------------------------------- +// (3) Operators: select, Sort +// (3.A) SELECT id, account_balance_usd, username FROM source1 ORDER BY account_balance_usd ASC +// (3.B) SELECT id, favorite_color FROM source1 ORDER BY favorite_color ASC; +func TestSelectSort(t *testing.T) { + // (3.A) SELECT id, account_balance_usd, username FROM source1 ORDER BY account_balance_usd ASC + t.Run("3A", func(t *testing.T) { + src := source1Project() + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewColumnResolve("account_balance_usd"), + Expr.NewColumnResolve("username"), + ) + proj, err := project.NewProjectExec(src, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + sk := aggr.NewSortKey(Expr.NewColumnResolve("account_balance_usd"), true) + sortExec, err := aggr.NewSortExec(proj, aggr.CombineSortKeys(sk)) + if err != nil { + t.Fatalf("sort init failed: %v", err) + } + batch, err := sortExec.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(3A) got nil batch (possibly EOF)") + return + } + t.Logf("(3A) batch:\n%v\n", batch.PrettyPrint()) + }) + + // (3.B) SELECT id, favorite_color FROM source1 ORDER BY favorite_color ASC; + t.Run("3B", func(t *testing.T) { + src := source1Project() + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewColumnResolve("favorite_color"), + ) + proj, err := project.NewProjectExec(src, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + sk := aggr.NewSortKey(Expr.NewColumnResolve("favorite_color"), true) + sortExec, err := aggr.NewSortExec(proj, aggr.CombineSortKeys(sk)) + if err != nil { + t.Fatalf("sort init failed: %v", err) + } + batch, err := sortExec.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(3B) got nil batch (possibly EOF)") + return + } + t.Logf("(3B) batch:\n%v\n", batch.PrettyPrint()) + }) +} + +// ------------------------------------------------------------------------- +// (4) Operators: Join(INNER), Select +// (4.A) SELECT s1.id, s1.username, s2.department_name FROM source1 AS s1 INNER JOIN source2 AS s2 ON s1.id = s2.id; +// (4.B) SELECT s1.id, s1.email_address, s2.department_name FROM source1 AS s1 INNER JOIN source2 AS s2 ON s1.id = s2.id; +func TestJoinSelect(t *testing.T) { + // (4.A) SELECT s1.id, s1.username, s2.department_name FROM source1 AS s1 INNER JOIN source2 AS s2 ON s1.favorite_color = s2.manager_name; + t.Run("4A", func(t *testing.T) { + src1 := source1Project() + src2 := source2Project() + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("id")}, + []Expr.Expression{Expr.NewColumnResolve("id")}, + ) + j, err := join.NewHashJoinExec(src1, src2, clause, join.InnerJoin, nil) + if err != nil { + t.Fatalf("join init failed: %v", err) + } + exprs := Expr.NewExpressions( + Expr.NewAlias(Expr.NewColumnResolve("left_id"), "id"), + Expr.NewColumnResolve("username"), + Expr.NewColumnResolve("department_name"), + ) + t.Logf("\t%v\n", j.Schema()) + proj, err := project.NewProjectExec(j, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + batch, err := proj.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(4A) got nil batch (possibly EOF)") + return + } + t.Logf("(4A) batch:\n%v\n", batch.PrettyPrint()) + }) + + // (4.B) SELECT s1.id, s1.email_address, s2.department_name FROM source1 AS s1 INNER JOIN source2 AS s2 ON s1.id = s2.id; + t.Run("4B", func(t *testing.T) { + src1 := source1Project() + src2 := source2Project() + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("id")}, + []Expr.Expression{Expr.NewColumnResolve("id")}, + ) + j, err := join.NewHashJoinExec(src1, src2, clause, join.InnerJoin, nil) + if err != nil { + t.Fatalf("join init failed: %v", err) + } + exprs := Expr.NewExpressions( + Expr.NewAlias(Expr.NewColumnResolve("left_id"), "cool_guy_id"), + Expr.NewColumnResolve("email_address"), + Expr.NewColumnResolve("department_name"), + ) + proj, err := project.NewProjectExec(j, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + batch, err := proj.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(4B) got nil batch (possibly EOF)") + return + } + t.Logf("(4B) batch:\n%v\n", batch.PrettyPrint()) + }) +} + +func TestGroupByAggregation(t *testing.T) { + // (5.A) SELECT favorite_color, AVG(age_years) AS avg_age, SUM(account_balance_usd) AS total_balance FROM source1 GROUP BY favorite_color order by avg_age; + t.Run("5A", func(t *testing.T) { + src := source1Project() + + groupBy := []Expr.Expression{Expr.NewColumnResolve("favorite_color")} + aggs := []aggr.AggregateFunctions{ + aggr.NewAggregateFunctions(aggr.Avg, Expr.NewColumnResolve("age_years")), + aggr.NewAggregateFunctions(aggr.Sum, Expr.NewColumnResolve("account_balance_usd")), + } + + gb, err := aggr.NewGroupByExec(src, aggs, groupBy) + if err != nil { + t.Fatalf("groupby init failed: %v", err) + } + sortExec, err := aggr.NewSortExec(gb, aggr.CombineSortKeys(aggr.NewSortKey(Expr.NewColumnResolve("avg_Column(age_years)"), true))) + if err != nil { + t.Fatalf("sort init failed: %v", err) + } + + batch, err := sortExec.Next(1000) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(5A) got nil batch (possibly EOF)") + return + } + t.Logf("(5A) batch:\n%v\n", batch.PrettyPrint()) + }) + + // (5.B) SELECT is_active, COUNT(*) AS active_count, AVG(age_years) AS avg_age FROM source1 GROUP BY is_active; + t.Run("5B", func(t *testing.T) { + src := source1Project() + fmt.Printf("\t%v\n", src.Schema()) + groupBy := []Expr.Expression{Expr.NewColumnResolve("is_active")} + aggs := []aggr.AggregateFunctions{ + aggr.NewAggregateFunctions(aggr.Count, Expr.NewColumnResolve("id")), + aggr.NewAggregateFunctions(aggr.Avg, Expr.NewColumnResolve("age_years")), + } + + gb, err := aggr.NewGroupByExec(src, aggs, groupBy) + if err != nil { + t.Fatalf("groupby init failed: %v", err) + } + + batch, err := gb.Next(1000) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(5B) got nil batch (possibly EOF)") + return + } + t.Logf("(5B) batch:\n%v\n", batch.PrettyPrint()) + }) +} + +// TestDistinctSort runs DISTINCT + Sort pipelines for source1 +// (6.A)SELECT DISTINCT favorite_color +// FROM source1 +// ORDER BY favorite_color DESC; +// (6.B)SELECT DISTINCT is_active +// FROM source1 +// ORDER BY is_active DESC; +func TestDistinctSort(t *testing.T) { + // (6.A) SELECT DISTINCT favorite_color FROM source1 ORDER BY favorite_color DESC; + t.Run("6A", func(t *testing.T) { + src := source1Project() + + cols := []Expr.Expression{Expr.NewColumnResolve("favorite_color")} + distinct, err := filter.NewDistinctExec(src, cols) + if err != nil { + t.Fatalf("distinct init failed: %v", err) + } + + sk := aggr.NewSortKey(Expr.NewColumnResolve("favorite_color"), false) // DESC + sortExec, err := aggr.NewSortExec(distinct, aggr.CombineSortKeys(sk)) + if err != nil { + t.Fatalf("sort init failed: %v", err) + } + proj, err := project.NewProjectExec(sortExec, Expr.NewExpressions(Expr.NewColumnResolve("favorite_color"))) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + batch, err := proj.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(6A) got nil batch (possibly EOF)") + return + } + t.Logf("(6A) batch:\n%v\n", batch.PrettyPrint()) + }) + + // (6.B) SELECT DISTINCT is_active FROM source1 ORDER BY is_active DESC; + t.Run("6B", func(t *testing.T) { + src := source1Project() + + cols := []Expr.Expression{Expr.NewColumnResolve("is_active")} + distinct, err := filter.NewDistinctExec(src, cols) + if err != nil { + t.Fatalf("distinct init failed: %v", err) + } + + sk := aggr.NewSortKey(Expr.NewColumnResolve("is_active"), false) // DESC + sortExec, err := aggr.NewSortExec(distinct, aggr.CombineSortKeys(sk)) + if err != nil { + t.Fatalf("sort init failed: %v", err) + } + proj, err := project.NewProjectExec(sortExec, Expr.NewExpressions(Expr.NewColumnResolve("is_active"))) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + batch, err := proj.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(6B) got nil batch (possibly EOF)") + return + } + t.Logf("(6B) batch:\n%v\n", batch.PrettyPrint()) + }) +} + +// TestJoinFilterProjLimit runs join + filter + project + limit pipelines +// (7.A)SELECT s1.id, s1.username, s2.department_name FROM source1 AS s1 INNER JOIN source2 AS s2 ON s1.id = s2.id WHERE s1.age_years > 30 LIMIT 5; +// (7.B)SELECT s1.username, s2.manager_email FROM source1 AS s1 JOIN source2 AS s2 ON s1.id = s2.id WHERE s2.department_name = 'Engineering' LIMIT 3; +// (7.C)SELECT s1.id, s2.manager_name FROM source1 s1 JOIN source2 s2 ON s1.id = s2.id WHERE s1.account_balance_usd > 10000 LIMIT 2; +func TestJoinFilterProjLimit(t *testing.T) { + // (7.A)SELECT s1.id, s1.username, s2.department_name FROM source1 AS s1 INNER JOIN source2 AS s2 ON s1.id = s2.id WHERE s1.age_years > 30 LIMIT 5; + t.Run("7A", func(t *testing.T) { + src1 := source1Project() + src2 := source2Project() + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("id")}, + []Expr.Expression{Expr.NewColumnResolve("id")}, + ) + j, err := join.NewHashJoinExec(src1, src2, clause, join.InnerJoin, nil) + if err != nil { + t.Fatalf("join init failed: %v", err) + } + fmt.Printf("schema:%v\n", j.Schema()) + pred := Expr.NewBinaryExpr( + Expr.NewColumnResolve("age_years"), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int64, 30), + ) + + filt, err := filter.NewFilterExec(j, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + exprs := Expr.NewExpressions( + Expr.NewAlias(Expr.NewColumnResolve("left_id"), "id"), + Expr.NewColumnResolve("username"), + Expr.NewAlias(Expr.NewColumnResolve("department_name"), "deptartment"), + ) + proj, err := project.NewProjectExec(filt, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + lim, err := filter.NewLimitExec(proj, 5) + if err != nil { + t.Fatalf("limit init failed: %v", err) + } + + batch, err := lim.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(7A) got nil batch (possibly EOF)") + return + } + t.Logf("(7A) batch:\n%v\n", batch.PrettyPrint()) + }) + + // (7.B)SELECT s1.username, s2.manager_email FROM source1 AS s1 JOIN source2 AS s2 ON s1.id = s2.id WHERE s2.department_name = 'Engineering' LIMIT 3; + t.Run("7B", func(t *testing.T) { + src1 := source1Project() + src2 := source2Project() + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("id")}, + []Expr.Expression{Expr.NewColumnResolve("id")}, + ) + j, err := join.NewHashJoinExec(src1, src2, clause, join.InnerJoin, nil) + if err != nil { + t.Fatalf("join init failed: %v", err) + } + + pred := Expr.NewBinaryExpr( + Expr.NewColumnResolve("department_name"), + Expr.Equal, + Expr.NewLiteralResolve(arrow.BinaryTypes.String, "Engineering"), + ) + + filt, err := filter.NewFilterExec(j, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("username"), + Expr.NewColumnResolve("manager_email"), + ) + proj, err := project.NewProjectExec(filt, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + lim, err := filter.NewLimitExec(proj, 3) + if err != nil { + t.Fatalf("limit init failed: %v", err) + } + + batch, err := lim.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(7B) got nil batch (possibly EOF)") + return + } + t.Logf("(7B) batch:\n%v\n", batch.PrettyPrint()) + }) + + // (7.C)SELECT s1.id, s2.manager_name FROM source1 s1 JOIN source2 s2 ON s1.id = s2.id WHERE s1.account_balance_usd > 10000 LIMIT 2; + t.Run("7C", func(t *testing.T) { + src1 := source1Project() + src2 := source2Project() + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("id")}, + []Expr.Expression{Expr.NewColumnResolve("id")}, + ) + j, err := join.NewHashJoinExec(src1, src2, clause, join.InnerJoin, nil) + if err != nil { + t.Fatalf("join init failed: %v", err) + } + + pred := Expr.NewBinaryExpr( + Expr.NewColumnResolve("account_balance_usd"), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 10000.0), + ) + + filt, err := filter.NewFilterExec(j, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("left_id"), + Expr.NewColumnResolve("manager_name"), + ) + proj, err := project.NewProjectExec(filt, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + lim, err := filter.NewLimitExec(proj, 2) + if err != nil { + t.Fatalf("limit init failed: %v", err) + } + + batch, err := lim.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(7C) got nil batch (possibly EOF)") + return + } + t.Logf("(7C) batch:\n%v\n", batch.PrettyPrint()) + }) +} /* (8) @@ -312,29 +718,149 @@ FROM source1 WHERE ABS(account_balance_usd) > 5000; */ -/* -(9) -Operators: Sort (multiple columns), Select +// TestScalarAbsRound runs scalar ABS/ROUND with Filter + Projection +// (8.A)SELECT id, ROUND(ABS(average_session_minutes)) AS rounded_session FROM source1 WHERE ABS(average_session_minutes) > 5; +// (8.B)SELECT username, ROUND(account_balance_usd) AS rounded_balance FROM source1 WHERE ABS(account_balance_usd) > 5000; +func TestScalarAbsRound(t *testing.T) { + // (8.A)SELECT id, ROUND(ABS(average_session_minutes)) AS rounded_session FROM source1 WHERE ABS(average_session_minutes) > 5; + t.Run("8A", func(t *testing.T) { + src := source1Project() -SQL: -(9.A)SELECT id, username, age_years -FROM source1 -ORDER BY age_years DESC, username ASC; -(9.B)SELECT id, email_address, age_years -FROM source1 -ORDER BY age_years ASC, email_address DESC; + // predicate: ABS(average_session_minutes) > 5 + pred := Expr.NewBinaryExpr( + Expr.NewScalarFunction(Expr.Abs, Expr.NewColumnResolve("average_session_minutes")), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 5.0), + ) -*/ + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } -/* -(10) -Operators: Join (INNER, multiple conditions), Select, Sort (multiple columns) + // projection: id, ROUND(ABS(average_session_minutes)) as rounded_session + roundExpr := Expr.NewScalarFunction(Expr.Round, Expr.NewScalarFunction(Expr.Abs, Expr.NewColumnResolve("average_session_minutes"))) + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewAlias(roundExpr, "rounded_session"), + ) + proj, err := project.NewProjectExec(filt, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } -(10.A)SELECT s1.id, s1.username, s2.manager_name, s2.budget -FROM source1 AS s1 -INNER JOIN source2 AS s2 - ON s1.favorite_color = s2.manager_name - AND s1.region = s2.region -ORDER BY s2.budget DESC, s1.username ASC; + batch, err := proj.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(8A) got nil batch (possibly EOF)") + return + } + t.Logf("(8A) batch:\n%v\n", batch.PrettyPrint()) + }) -*/ + // (8.B)SELECT username, ROUND(account_balance_usd) AS rounded_balance FROM source1 WHERE ABS(account_balance_usd) > 5000; + t.Run("8B", func(t *testing.T) { + src := source1Project() + + // predicate: ABS(account_balance_usd) > 5000 + pred := Expr.NewBinaryExpr( + Expr.NewScalarFunction(Expr.Abs, Expr.NewColumnResolve("account_balance_usd")), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 5000.0), + ) + + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + roundExpr := Expr.NewScalarFunction(Expr.Round, Expr.NewColumnResolve("account_balance_usd")) + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("username"), + Expr.NewAlias(roundExpr, "rounded_balance"), + ) + proj, err := project.NewProjectExec(filt, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + batch, err := proj.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(8B) got nil batch (possibly EOF)") + return + } + t.Logf("(8B) batch:\n%v\n", batch.PrettyPrint()) + }) +} + +// TestSelectMultiSort runs multi-column ORDER BY tests +// (9.A)SELECT id, username, age_years FROM source1 ORDER BY age_years DESC, username ASC; +// (9.B)SELECT id, email_address, age_years FROM source1 ORDER BY age_years ASC, email_address DESC; +func TestSelectMultiSort(t *testing.T) { + // (9.A) + t.Run("9A", func(t *testing.T) { + src := source1Project() + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewColumnResolve("username"), + Expr.NewColumnResolve("age_years"), + ) + proj, err := project.NewProjectExec(src, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + sk1 := aggr.NewSortKey(Expr.NewColumnResolve("age_years"), false) // DESC + sk2 := aggr.NewSortKey(Expr.NewColumnResolve("username"), true) // ASC + sortExec, err := aggr.NewSortExec(proj, aggr.CombineSortKeys(sk1, sk2)) + if err != nil { + t.Fatalf("sort init failed: %v", err) + } + + batch, err := sortExec.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(9A) got nil batch (possibly EOF)") + return + } + t.Logf("(9A) batch:\n%v\n", batch.PrettyPrint()) + }) + + // (9.B) + t.Run("9B", func(t *testing.T) { + src := source1Project() + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewColumnResolve("email_address"), + Expr.NewColumnResolve("age_years"), + ) + proj, err := project.NewProjectExec(src, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + sk1 := aggr.NewSortKey(Expr.NewColumnResolve("age_years"), true) // ASC + sk2 := aggr.NewSortKey(Expr.NewColumnResolve("email_address"), false) // DESC + sortExec, err := aggr.NewSortExec(proj, aggr.CombineSortKeys(sk1, sk2)) + if err != nil { + t.Fatalf("sort init failed: %v", err) + } + + batch, err := sortExec.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(9B) got nil batch (possibly EOF)") + return + } + t.Logf("(9B) batch:\n%v\n", batch.PrettyPrint()) + }) +} From e12e1d768ac04ea5a6cf1d5c65b1f91c5a7e4439 Mon Sep 17 00:00:00 2001 From: Richard Baah Date: Fri, 5 Dec 2025 02:37:41 -0500 Subject: [PATCH 21/21] Documentation: Update read me, remove print statments, work on TODO statments --- README.md | 1 + src/Backend/opti-sql-go/Expr/expr.go | 19 +-- src/Backend/opti-sql-go/Expr/expr_test.go | 2 - src/Backend/opti-sql-go/config/config.go | 1 - src/Backend/opti-sql-go/main.go | 2 - .../opti-sql-go/operators/Join/hashJoin.go | 5 +- .../opti-sql-go/operators/OPERATORS.md | 158 ++++++++++++++++++ .../opti-sql-go/operators/aggr/groupBy.go | 1 - .../operators/aggr/groupBy_test.go | 2 - .../opti-sql-go/operators/aggr/singleAggr.go | 2 +- .../opti-sql-go/operators/aggr/sort.go | 6 +- .../opti-sql-go/operators/filter/filter.go | 9 +- .../operators/filter/filter_test.go | 7 +- .../opti-sql-go/operators/filter/limit.go | 5 +- .../opti-sql-go/operators/project/parquet.go | 4 +- .../operators/project/parquet_test.go | 1 - .../operators/project/projectExecExpr_test.go | 7 - .../operators/test/intergration_test.go | 2 - .../opti-sql-go/operators/test/t1_test.go | 2 - .../opti-sql-go/substrait/substrait_test.go | 4 +- 20 files changed, 186 insertions(+), 54 deletions(-) create mode 100644 src/Backend/opti-sql-go/operators/OPERATORS.md diff --git a/README.md b/README.md index 52dedf9..51f4799 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,7 @@ Initial development is done in **Go** (`opti-sql-go`), which serves as the prima - `/operators` - SQL operator implementations (filter, join, aggregation, project) - `/physical-optimizer` - Query plan parsing and optimization - `/substrait` - Substrait plan integration +- `/operators/OPERATORS.md` - concise reference for operator constructors, behavior and examples ## Branching Model diff --git a/src/Backend/opti-sql-go/Expr/expr.go b/src/Backend/opti-sql-go/Expr/expr.go index cd19dab..4899a15 100644 --- a/src/Backend/opti-sql-go/Expr/expr.go +++ b/src/Backend/opti-sql-go/Expr/expr.go @@ -260,7 +260,6 @@ func NewLiteralResolve(Type arrow.DataType, Value any) *LiteralResolve { castVal = float64(v) } default: - fmt.Printf("%v did not match any case, of type %T\n", v, v) castVal = Value } return &LiteralResolve{Type: Type, Value: castVal} @@ -447,37 +446,36 @@ func EvalBinary(b *BinaryExpr, batch *operators.RecordBatch) (arrow.Array, error if err != nil { return nil, err } + ctx := context.Background() opt := compute.ArithmeticOptions{} switch b.Op { // arithmetic case Addition: - datum, err := compute.Add(context.TODO(), opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr)) + datum, err := compute.Add(ctx, opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr)) if err != nil { return nil, err } return unpackDatum(datum) case Subtraction: - datum, err := compute.Subtract(context.TODO(), opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr)) + datum, err := compute.Subtract(ctx, opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr)) if err != nil { return nil, err } return unpackDatum(datum) case Multiplication: - datum, err := compute.Multiply(context.TODO(), opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr)) + datum, err := compute.Multiply(ctx, opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr)) if err != nil { return nil, err } return unpackDatum(datum) case Division: - datum, err := compute.Divide(context.TODO(), opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr)) + datum, err := compute.Divide(ctx, opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr)) if err != nil { return nil, err } return unpackDatum(datum) - // comparisions TODO: - // These return a boolean array case Equal: if leftArr.DataType() != rightArr.DataType() { return nil, ErrCantCompareDifferentTypes(leftArr.DataType(), rightArr.DataType()) @@ -592,6 +590,7 @@ func NewScalarFunction(function supportedFunctions, Argument Expression) *Scalar } func EvalScalarFunction(s *ScalarFunction, batch *operators.RecordBatch) (arrow.Array, error) { + ctx := context.Background() switch s.Function { case Upper: arr, err := EvalExpression(s.Arguments, batch) @@ -611,7 +610,7 @@ func EvalScalarFunction(s *ScalarFunction, batch *operators.RecordBatch) (arrow. if err != nil { return nil, err } - datum, err := compute.AbsoluteValue(context.TODO(), compute.ArithmeticOptions{}, compute.NewDatum(arr)) + datum, err := compute.AbsoluteValue(ctx, compute.ArithmeticOptions{}, compute.NewDatum(arr)) if err != nil { return nil, err } @@ -621,7 +620,7 @@ func EvalScalarFunction(s *ScalarFunction, batch *operators.RecordBatch) (arrow. if err != nil { return nil, err } - datum, err := compute.Round(context.TODO(), compute.DefaultRoundOptions, compute.NewDatum(arr)) + datum, err := compute.Round(ctx, compute.DefaultRoundOptions, compute.NewDatum(arr)) if err != nil { return nil, err } @@ -656,7 +655,7 @@ func EvalCast(c *CastExpr, batch *operators.RecordBatch) (arrow.Array, error) { // Use Arrow compute kernel to cast castOpts := compute.SafeCastOptions(c.TargetType) - out, err := compute.CastArray(context.TODO(), arr, castOpts) + out, err := compute.CastArray(context.Background(), arr, castOpts) if err != nil { return nil, fmt.Errorf("cast error: cannot cast %s to %s: %w", arr.DataType(), c.TargetType, err) diff --git a/src/Backend/opti-sql-go/Expr/expr_test.go b/src/Backend/opti-sql-go/Expr/expr_test.go index 5941807..f0d2f43 100644 --- a/src/Backend/opti-sql-go/Expr/expr_test.go +++ b/src/Backend/opti-sql-go/Expr/expr_test.go @@ -1,7 +1,6 @@ package Expr import ( - "fmt" "log" "opti-sql-go/operators" "testing" @@ -1727,7 +1726,6 @@ func TestNullCheckExpr(t *testing.T) { defer maskArr.Release() boolMask := maskArr.(*array.Boolean) - fmt.Printf("boolean mask:\t%v\n", boolMask) if boolMask.Len() != 5 { t.Fatalf("expected length 5 mask, got %d", boolMask.Len()) } diff --git a/src/Backend/opti-sql-go/config/config.go b/src/Backend/opti-sql-go/config/config.go index 627136b..17154fe 100644 --- a/src/Backend/opti-sql-go/config/config.go +++ b/src/Backend/opti-sql-go/config/config.go @@ -92,7 +92,6 @@ var configInstance *Config = &Config{ EnableQueryStats: true, EnableMemoryStats: true, }, - // TODO: remove hardcoded secretes before production. we are just testing for now Secretes: secretesConfig{ AccessKey: "DO8013ZT6VDHJ2EM94RN", SecretKey: "kPvQSMt6naiwe/FhDnzXpYmVE5yzJUsIR0/OJpsUNzo", diff --git a/src/Backend/opti-sql-go/main.go b/src/Backend/opti-sql-go/main.go index f277de6..82e1eb8 100644 --- a/src/Backend/opti-sql-go/main.go +++ b/src/Backend/opti-sql-go/main.go @@ -6,8 +6,6 @@ import ( "os" ) -// TODO: in the project operators make sure the record batches account for the RowCount field properly. - func main() { if len(os.Args) > 1 { if err := config.Decode(os.Args[1]); err != nil { diff --git a/src/Backend/opti-sql-go/operators/Join/hashJoin.go b/src/Backend/opti-sql-go/operators/Join/hashJoin.go index e630a70..13a6969 100644 --- a/src/Backend/opti-sql-go/operators/Join/hashJoin.go +++ b/src/Backend/opti-sql-go/operators/Join/hashJoin.go @@ -17,10 +17,7 @@ import ( "github.com/apache/arrow/go/v17/arrow/memory" ) -// TODO: clean up PR and push again -// TODO: write intergration test for operators to work together // TODO: see ticket #27 -// TODO: take small break from this project to work on inverted index search for a couple days var ( ErrInvalidJoinClauseCount = func(l, r int) error { @@ -395,7 +392,7 @@ func (hj *HashJoinExec) buildOutputArrays( leftIdxArr arrow.Array, rightIdxArr arrow.Array, ) ([]arrow.Array, error) { - ctx := context.TODO() + ctx := context.Background() output := make([]arrow.Array, hj.schema.NumFields()) for i := range len(leftCols) { diff --git a/src/Backend/opti-sql-go/operators/OPERATORS.md b/src/Backend/opti-sql-go/operators/OPERATORS.md new file mode 100644 index 0000000..1302f6b --- /dev/null +++ b/src/Backend/opti-sql-go/operators/OPERATORS.md @@ -0,0 +1,158 @@ +# Operators — quick reference + +This document gives a concise overview of the operator model used in this repository, how to construct the most common operators, and what each operator's constructor expects and why. Placeholders like `Expr.Expression` and `RecordBatch` refer to the repository types found under `Expr` and `operators/record.go`. + +## What is an Operator? + +An operator implements the `operators.Operator` interface: + +- `Next(n uint16) (*operators.RecordBatch, error)` — return up to `n` rows (many operators ignore the exact n and read/produce what they need). Returns `io.EOF` when finished. +- `Schema() *arrow.Schema` — the operator's output schema. +- `Close() error` — release resources (files, network handles, etc.). + +The basic data unit is `operators.RecordBatch` (schema + Arrow arrays + rowcount). Operators compose: the output of one operator becomes the input (child) of the next. + +## Leaf (source) operators + +Leaf operators are the pipeline entry points. They read data from some storage and produce `RecordBatch` values. + +- CSV source + - Constructor: `project.NewProjectCSVLeaf(io.Reader)` + - Inputs: an `io.Reader` (file, buffer). Produces typed Arrow arrays from CSV columns. + - Notes: simple, fast for local CSVs. Use when you want a streaming CSV source. + +- Parquet source + - Constructor: (parquet reader; see project package) + - Inputs: parquet file handle. Produces Arrow arrays preserving parquet types. + +- In-memory source + - Constructor: `project.NewInMemoryProjectExec(names []string, columns []any)` + - Inputs: column names and Go slices (used heavily in unit tests). + - Notes: useful for deterministic test inputs and small-memory datasets. + +- S3 / NetworkResource + - use `project.NewStreamReader` to create a network file reader. this just means it allows chunk reading of files not on local disk. + - Notes: the repository supports reading remote files; a configuration option lets you download the full remote file first to avoid per-request network latency when the operator needs repeated random access (e.g., for Parquet or when sorting). This is exposed as a NetworkResource / download option in the project/source constructors. + - the result of `project.NewStreamReader(fileName)` can be passed directly to `project.NewProjectCSVLeaf(io.Reader)` and `project.NewParquetSource(readSeeker)`. This was intentional so its seemless to work with s3 files as possible + +## How to construct operators — summary of common operators + +The pattern is consistent: each operator has a `NewXxx...` constructor that takes one or more child operators, expression descriptors, or configuration params. + +### Project (Select) +- Constructor: `project.NewProjectExec(child operators.Operator, exprs []Expr.Expression)` +- Purpose: evaluate a list of projection expressions (column refs, scalar functions, aliases) and return a batch with only the requested columns. +- What to pass in: + - `child` — the input operator to project from (leaf or intermediate op). + - `exprs` — expressions created with `Expr.NewColumnResolve`, `Expr.NewLiteralResolve`, `Expr.NewAlias`, `Expr.NewScalarFunction`, etc. +- Why: keeps expression evaluation centralized and lets downstream operators work with a narrow schema. + +### Filter +- Constructor: `filter.NewFilterExec(child operators.Operator, predicate Expr.Expression)` +- Purpose: apply boolean predicates to input rows and emit only matching rows. +- What to pass in: + - `child` — operator producing input rows. + - `predicate` — an `Expr.Expression` that evaluates to boolean (can combine binary operators, scalar functions, null checks). +- Why: decouples predicate evaluation from projection and other operators; filter may buffer results across batches to serve limit-like requests. + +### Limit +- Constructor: `filter.NewLimitExec(child operators.Operator, limit uint64)` +- Purpose: stop the pipeline after `limit` rows are emitted. +- What to pass in: the `child` operator and the numeric `limit`. +- Why: simple consumer-side cap; implemented as a thin operator above any child. + +### Distinct +- Constructor: `filter.NewDistinctExec(child operators.Operator, colExprs []Expr.Expression)` +- Purpose: remove duplicate rows on the selected key columns. +- What to pass in: `child` and the list of key column expressions. +- Why: used to produce unique values for a given set of columns; often followed by `Sort` for deterministic order. + +### Sort / TopK +- Constructors: + - `aggr.NewSortExec(child operators.Operator, sortKeys []aggr.SortKey)` — fully sorts input + - `aggr.NewTopKSortExec(child operators.Operator, sortKeys []aggr.SortKey, k uint16)` — keep top-k +- Purpose: order rows by one or more columns. +- What to pass in: + - `child` — input operator + - `sortKeys` — built with `aggr.NewSortKey(expr Expr.Expression, asc bool)`; multiple keys are combined with `aggr.CombineSortKeys(...)`. +- Why: some consumers require sorted input (ORDER BY) or only the top-k entries (TopK). +- Notes: current implementations read data into memory and sort; care must be taken for large datasets. + +### GroupBy / Aggregation +- Constructors: + - `aggr.NewGroupByExec(child operators.Operator, groupExpr []aggr.AggregateFunctions, groupBy []Expr.Expression)` — group-by with aggregates + - `aggr.NewGlobalAggrExec(child operators.Operator, aggExprs []aggr.AggregateFunctions)` — global aggregation (no GROUP BY) +- Purpose: compute aggregates (SUM, AVG, COUNT, MIN, MAX) grouped by one or more columns. +- What to pass in: + - `child` — input operator + - `groupExpr` / `aggExprs` — list of `aggr.AggregateFunctions` (built with `aggr.NewAggregateFunctions(aggr.AggrFunc, Expr.Expression)`) describing the aggregate function and its child expression (usually a column). + - `groupBy` — expressions for the group-by keys (column resolves). +- Why: central place for aggregator logic; constructors validate types (numeric types for SUM/AVG) and construct the output schema. + +### Join (HashJoin) +- Constructor: `join.NewHashJoinExec(left, right operators.Operator, clause join.JoinClause, joinType join.JoinType, filters []Expr.Expression)` +- Purpose: perform hash-based joins (Inner, Left, Right). +- What to pass in: + - `left`, `right` — child operators for the two sides of the join (usually scans or projections) + - `clause` — `join.NewJoinClause(leftExprs []Expr.Expression, rightExprs []Expr.Expression)` describing which columns pair together (supports multiple equality clauses) + - `joinType` — `join.InnerJoin`, `join.LeftJoin`, etc. + - `filters` — optional post-join filters (not always used) | still need to implement this but no time soon, as these can just be treated as Filter Opererations +- Why: joins combine rows from two inputs. The constructor validates schema compatibility and builds the combined output schema (prefixing duplicate column names with `left_`/`right_`). +- Implementation notes: the HashJoin reads the entirety of both children (current implementation) into memory and builds a hash table on the right side for probing. + +## Common constructor patterns & rationale + +- Child operator(s) always come first: most operators are constructed around one input (`child`) or two (`left`, `right`). This makes pipelines composable. +- Expressions are passed as `Expr.Expression` objects. Use the `Expr` package helpers to build column resolves, literals, scalar functions, binary operators and aliases. +- Constructors perform validation: type checking for aggregates, matching # of join expressions, or validity of projection expressions — this fails fast at construction time instead of at runtime. +- Many blocking operators (Sort, GroupBy, Join) read the full input before producing output. Be careful with large inputs — these operators are not yet externalized (spill-to-disk) and may require configuration or chunking for large datasets. + +## Practical examples (pseudocode) + +- Project + Filter + Limit pipeline: + +```go +src := project.NewProjectCSVLeaf(fileReader) +pred := Expr.NewBinaryExpr(Expr.NewColumnResolve("age"), Expr.GreaterThan, Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int64, 30)) +filt, _ := filter.NewFilterExec(src, pred) +projExprs := Expr.NewExpressions(Expr.NewColumnResolve("id"), Expr.NewColumnResolve("name")) +proj, _ := project.NewProjectExec(filt, projExprs) +lim, _ := filter.NewLimitExec(proj, 10) +batch, _ := lim.Next(10) +``` + +- GroupBy example: + +```go +col := func(n string) Expr.Expression { return Expr.NewColumnResolve(n) } +aggs := []aggr.AggregateFunctions{{AggrFunc: aggr.Sum, Child: col("salary")}} +gb, _ := aggr.NewGroupByExec(src, aggs, []Expr.Expression{col("department")}) +result, _ := gb.Next(1000) +``` + +- HashJoin example (equality on `id`): + +```go +clause := join.NewJoinClause([]Expr.Expression{Expr.NewColumnResolve("id")}, []Expr.Expression{Expr.NewColumnResolve("id")}) +j, _ := join.NewHashJoinExec(leftSrc, rightSrc, clause, join.InnerJoin, nil) +batch, _ := j.Next(100) +``` + +## Notes & best practices + +- Always call `Close()` on the root operator when done (after `Next` returns `io.EOF`) to release files and network handles. +- Use `project.NewInMemoryProjectExec` for tests — it builds reproducible `RecordBatch` inputs quickly. +- When writing pipelines that may read remote files, prefer to configure the source to download the whole file if the operator will need random access or many read passes (sorting, joining, grouping). This avoids repeated network calls and unpredictable latency. +- Watch out for duplicate column names after joins: the join constructor prefixes with `left_`/`right_` when needed. + +## Where to look next in the codebase +- `operators/record.go` — `Operator` interface and `RecordBatch` helpers (builder, PrettyPrint). +- `operators/project/` — project implementations and CSV/parquet readers. +- `operators/filter/` — Filter, Limit, Distinct operator implementations. +- `operators/aggr/` — Sort, TopK, GroupBy and aggregate implementations. +- `operators/Join/` — HashJoin implementation. + +Reading the tests +----------------- + +For concrete examples of how SQL statements map to operator pipelines, read the integration/unit tests in `operators/test/` (and other test files under `operators/`). The tests build real pipelines (CSV/InMemory -> Filter/Project/Join/GroupBy/Sort/etc.) and show the exact constructor calls and expressions used to represent SQL queries. They are the best source of truth for small end-to-end examples. \ No newline at end of file diff --git a/src/Backend/opti-sql-go/operators/aggr/groupBy.go b/src/Backend/opti-sql-go/operators/aggr/groupBy.go index 07ba08f..7ca86ea 100644 --- a/src/Backend/opti-sql-go/operators/aggr/groupBy.go +++ b/src/Backend/opti-sql-go/operators/aggr/groupBy.go @@ -433,7 +433,6 @@ func buildDynamicArray(mem memory.Allocator, dt arrow.DataType, values []any) ar case arrow.BOOL: b := array.NewBooleanBuilder(mem) for _, v := range values { - fmt.Printf("values:%v type:%T\n", v, v) if v == nil { b.AppendNull() } else { diff --git a/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go b/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go index 10756f0..41434ac 100644 --- a/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go +++ b/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go @@ -150,7 +150,6 @@ func TestNewGroupByExecAndSchema(t *testing.T) { if schema == nil { t.Fatalf("schema should not be nil") } - fmt.Println(schema) // group-by + 1 agg = 2 fields if got, want := schema.NumFields(), 2; got != want { @@ -198,7 +197,6 @@ func TestNewGroupByExecAndSchema(t *testing.T) { } schema := gb.Schema() - fmt.Printf("schema: %v\n", schema) wantFields := len(groupBy) + len(aggs) if schema.NumFields() != wantFields { t.Fatalf("expected %d fields, got %d", wantFields, schema.NumFields()) diff --git a/src/Backend/opti-sql-go/operators/aggr/singleAggr.go b/src/Backend/opti-sql-go/operators/aggr/singleAggr.go index 1fcccdd..0f7c3b5 100644 --- a/src/Backend/opti-sql-go/operators/aggr/singleAggr.go +++ b/src/Backend/opti-sql-go/operators/aggr/singleAggr.go @@ -266,7 +266,7 @@ func validAggrType(dt arrow.DataType) bool { } func castArrayToFloat64(arr arrow.Array) (arrow.Array, error) { - outDatum, err := compute.CastArray(context.TODO(), arr, compute.NewCastOptions(&arrow.Float64Type{}, true)) + outDatum, err := compute.CastArray(context.Background(), arr, compute.NewCastOptions(&arrow.Float64Type{}, true)) if err != nil { return nil, err } diff --git a/src/Backend/opti-sql-go/operators/aggr/sort.go b/src/Backend/opti-sql-go/operators/aggr/sort.go index 18ca64a..1b731f8 100644 --- a/src/Backend/opti-sql-go/operators/aggr/sort.go +++ b/src/Backend/opti-sql-go/operators/aggr/sort.go @@ -119,7 +119,7 @@ func (s *SortExec) Next(n uint16) (*operators.RecordBatch, error) { idxArray := idxToArrowArray(idx, mem) defer idxArray.Release() for i := range len(allColumns) { - arr, err := compute.TakeArray(context.TODO(), allColumns[i], idxArray) + arr, err := compute.TakeArray(context.Background(), allColumns[i], idxArray) if err != nil { return nil, err } @@ -160,7 +160,7 @@ func (s *SortExec) Close() error { return s.input.Close() } func (s *SortExec) consumeSortedBatch(readsize uint64, mem memory.Allocator) ([]arrow.Array, error) { - ctx := context.TODO() + ctx := context.Background() resultColumns := make([]arrow.Array, len(s.schema.Fields())) offsetArray := genoffsetTakeIdx(s.consumedOffset, readsize, mem) defer offsetArray.Release() @@ -347,7 +347,7 @@ func joinArrays(existing, newarrs []arrow.Array, mem memory.Allocator) ([]arrow. } func (t *TopKSortExec) consumeSortedBatch(readsize uint64, mem memory.Allocator) ([]arrow.Array, error) { - ctx := context.TODO() + ctx := context.Background() resultColumns := make([]arrow.Array, len(t.schema.Fields())) offsetArray := genoffsetTakeIdx(t.consumedOffset, readsize, mem) defer offsetArray.Release() diff --git a/src/Backend/opti-sql-go/operators/filter/filter.go b/src/Backend/opti-sql-go/operators/filter/filter.go index 5493712..d09f4a2 100644 --- a/src/Backend/opti-sql-go/operators/filter/filter.go +++ b/src/Backend/opti-sql-go/operators/filter/filter.go @@ -122,7 +122,7 @@ func (f *FilterExec) Close() error { func ApplyBooleanMask(col arrow.Array, mask *array.Boolean) (arrow.Array, error) { datum, err := compute.Filter( - context.TODO(), + context.Background(), compute.NewDatum(col), compute.NewDatum(mask), *compute.DefaultFilterOptions(), @@ -163,13 +163,11 @@ func validPredicates(pred Expr.Expression, schema *arrow.Schema) bool { if err != nil { return false } - //TODO: allow for nulls to be comparable fmt.Printf("dt1:\t%v\ndt2:\t%v\n", dt1, dt2) if !arrow.TypeEqual(dt1, dt2) { return false } fmt.Printf("left:\t%v\nright:\t%v\n", p.Left, p.Right) - // recursively validate children return validPredicates(p.Left, schema) && validPredicates(p.Right, schema) @@ -218,16 +216,17 @@ func (f *FilterExec) sliceFilterCols(n int64, mem memory.Allocator) ([]arrow.Arr defer keepArr.Release() // For each column: materialize output slice + update buffer + ctx := context.Background() for i, col := range f.bufferedCols { // emit slice - sliceOut, err := compute.TakeArray(context.TODO(), col, emitArr) + sliceOut, err := compute.TakeArray(ctx, col, emitArr) if err != nil { return nil, err } out[i] = sliceOut // keep remaining slice - keepSlice, err := compute.TakeArray(context.TODO(), col, keepArr) + keepSlice, err := compute.TakeArray(ctx, col, keepArr) if err != nil { return nil, err } diff --git a/src/Backend/opti-sql-go/operators/filter/filter_test.go b/src/Backend/opti-sql-go/operators/filter/filter_test.go index 9facb8c..8e90489 100644 --- a/src/Backend/opti-sql-go/operators/filter/filter_test.go +++ b/src/Backend/opti-sql-go/operators/filter/filter_test.go @@ -2,7 +2,6 @@ package filter import ( "errors" - "fmt" "io" "opti-sql-go/Expr" "testing" @@ -333,16 +332,14 @@ func TestFilterBuffer(t *testing.T) { if err != nil { t.Fatalf("failed to create filter exec: %v", err) } - rc, err := f.Next(5) + _, err = f.Next(5) if err != nil { t.Fatalf("unexpected error: %v", err) } - fmt.Printf("First Batch:\t%v\n", rc.PrettyPrint()) - rc, err = f.Next(5) + _, err = f.Next(5) if err != nil { t.Fatalf("unexpected error: %v", err) } - fmt.Printf("second Batch:\t%v\n", rc.PrettyPrint()) }) } diff --git a/src/Backend/opti-sql-go/operators/filter/limit.go b/src/Backend/opti-sql-go/operators/filter/limit.go index d25b848..6a5aa86 100644 --- a/src/Backend/opti-sql-go/operators/filter/limit.go +++ b/src/Backend/opti-sql-go/operators/filter/limit.go @@ -110,6 +110,7 @@ func (d *DistinctExec) Next(n uint16) (*operators.RecordBatch, error) { return nil, io.EOF } mem := memory.NewGoAllocator() + ctx := context.Background() if !d.consumedInput { for { childBatch, err := d.input.Next(math.MaxUint16) @@ -161,7 +162,7 @@ func (d *DistinctExec) Next(n uint16) (*operators.RecordBatch, error) { takeArray := idxToArrowArray(idxTracker, mem) for i := range len(childBatch.Columns) { largeArray := childBatch.Columns[i] - uniqueElements, err := compute.TakeArray(context.TODO(), largeArray, takeArray) + uniqueElements, err := compute.TakeArray(ctx, largeArray, takeArray) if err != nil { return nil, err } @@ -209,7 +210,7 @@ func (d *DistinctExec) Close() error { return d.input.Close() } func (d *DistinctExec) consumeDistinctArrays(readSize uint64, mem memory.Allocator) ([]arrow.Array, error) { - ctx := context.TODO() + ctx := context.Background() resultColumns := make([]arrow.Array, len(d.schema.Fields())) offsetArray := genoffsetTakeIdx(d.consumedOffset, readSize, mem) defer offsetArray.Release() diff --git a/src/Backend/opti-sql-go/operators/project/parquet.go b/src/Backend/opti-sql-go/operators/project/parquet.go index 42d5c14..50aa856 100644 --- a/src/Backend/opti-sql-go/operators/project/parquet.go +++ b/src/Backend/opti-sql-go/operators/project/parquet.go @@ -49,7 +49,7 @@ func NewParquetSource(r parquet.ReaderAtSeeker) (*ParquetSource, error) { if err != nil { return nil, err } - rdr, err := arrowReader.GetRecordReader(context.TODO(), nil, nil) + rdr, err := arrowReader.GetRecordReader(context.Background(), nil, nil) if err != nil { return nil, err } @@ -98,7 +98,7 @@ func NewParquetSourcePushDown(r parquet.ReaderAtSeeker, columns []string) (*Parq wantedColumnsIDX = append(wantedColumnsIDX, idx_array...) } - rdr, err := arrowReader.GetRecordReader(context.TODO(), wantedColumnsIDX, nil) + rdr, err := arrowReader.GetRecordReader(context.Background(), wantedColumnsIDX, nil) if err != nil { return nil, err } diff --git a/src/Backend/opti-sql-go/operators/project/parquet_test.go b/src/Backend/opti-sql-go/operators/project/parquet_test.go index ff28535..c051d9f 100644 --- a/src/Backend/opti-sql-go/operators/project/parquet_test.go +++ b/src/Backend/opti-sql-go/operators/project/parquet_test.go @@ -34,7 +34,6 @@ schema: metadata: ["PARQUET:field_id": "-1"] - lon: type=float64, nullable */ -// TODO: more to their own files later down the line func existIn(str string, arr []string) bool { for _, a := range arr { if a == str { diff --git a/src/Backend/opti-sql-go/operators/project/projectExecExpr_test.go b/src/Backend/opti-sql-go/operators/project/projectExecExpr_test.go index 47435b7..3832a39 100644 --- a/src/Backend/opti-sql-go/operators/project/projectExecExpr_test.go +++ b/src/Backend/opti-sql-go/operators/project/projectExecExpr_test.go @@ -834,10 +834,3 @@ func TestProjectExec_FunctionExpr(t *testing.T) { } }) } - -/* -complex expr -ex: alias(function(column |operator| literal) |operator| literal) -TODO: not the most important thing right now since we know basic expression are fine -*/ -func TestProjectExec_ComplexExpr(t *testing.T) {} diff --git a/src/Backend/opti-sql-go/operators/test/intergration_test.go b/src/Backend/opti-sql-go/operators/test/intergration_test.go index 20b0a01..15786a9 100644 --- a/src/Backend/opti-sql-go/operators/test/intergration_test.go +++ b/src/Backend/opti-sql-go/operators/test/intergration_test.go @@ -456,7 +456,6 @@ func TestGroupByAggregation(t *testing.T) { // (5.B) SELECT is_active, COUNT(*) AS active_count, AVG(age_years) AS avg_age FROM source1 GROUP BY is_active; t.Run("5B", func(t *testing.T) { src := source1Project() - fmt.Printf("\t%v\n", src.Schema()) groupBy := []Expr.Expression{Expr.NewColumnResolve("is_active")} aggs := []aggr.AggregateFunctions{ aggr.NewAggregateFunctions(aggr.Count, Expr.NewColumnResolve("id")), @@ -568,7 +567,6 @@ func TestJoinFilterProjLimit(t *testing.T) { if err != nil { t.Fatalf("join init failed: %v", err) } - fmt.Printf("schema:%v\n", j.Schema()) pred := Expr.NewBinaryExpr( Expr.NewColumnResolve("age_years"), Expr.GreaterThan, diff --git a/src/Backend/opti-sql-go/operators/test/t1_test.go b/src/Backend/opti-sql-go/operators/test/t1_test.go index 71bf2b1..dd728fb 100644 --- a/src/Backend/opti-sql-go/operators/test/t1_test.go +++ b/src/Backend/opti-sql-go/operators/test/t1_test.go @@ -2,7 +2,6 @@ package test import ( "errors" - "fmt" "io" "math" "opti-sql-go/Expr" @@ -991,7 +990,6 @@ func TestHavingExec(t *testing.T) { ) hv, _ := aggr.NewHavingExec(gb, having) - fmt.Printf("\t%v\n", hv.Schema()) batch, err := hv.Next(500) if err != nil { t.Fatalf("having next failed: %v", err) diff --git a/src/Backend/opti-sql-go/substrait/substrait_test.go b/src/Backend/opti-sql-go/substrait/substrait_test.go index 122ad0b..fe23790 100644 --- a/src/Backend/opti-sql-go/substrait/substrait_test.go +++ b/src/Backend/opti-sql-go/substrait/substrait_test.go @@ -31,13 +31,13 @@ func TestDummyInput(t *testing.T) { dummyRequest := &QueryExecutionRequest{ SqlStatement: "SELECT * FROM table", SubstraitLogical: []byte("CgJTUxIMCgpTZWxlY3QgKiBGUk9NIHRhYmxl"), - Id: "GenerateDTODOHaasdavdasvasdvada", + Id: "GenerateDTMoneyOHaasdavdasvasdvada", Source: &SourceType{ S3Source: "s3://my-bucket/data/table.parquet", Mime: "application/vnd.apache.parquet", }, } - resp, err := ss.ExecuteQuery(context.TODO(), dummyRequest) + resp, err := ss.ExecuteQuery(context.Background(), dummyRequest) if err != nil { t.Errorf("Expected no error, got %v", err) }