diff --git a/.gitignore b/.gitignore index 3970ccd..6ea7b8b 100644 --- a/.gitignore +++ b/.gitignore @@ -107,6 +107,8 @@ src/Backend/test_data/json # Allow a specific CSV dataset that we want tracked despite the general csv ignores !src/Backend/test_data/csv/ !src/Backend/test_data/csv/Mental_Health_and_Social_Media_Balance_Dataset.csv +!src/Backend/test_data/csv/intergration_test_data_1.csv +!src/Backend/test_data/csv/intergration_test_data_2.csv # allow parquet file !src/Backend/test_data/parquet/ !src/Backend/test_data/parquet/capitals_clean.parquet \ No newline at end of file diff --git a/README.md b/README.md index 52dedf9..51f4799 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,7 @@ Initial development is done in **Go** (`opti-sql-go`), which serves as the prima - `/operators` - SQL operator implementations (filter, join, aggregation, project) - `/physical-optimizer` - Query plan parsing and optimization - `/substrait` - Substrait plan integration +- `/operators/OPERATORS.md` - concise reference for operator constructors, behavior and examples ## Branching Model diff --git a/src/Backend/opti-sql-go/Expr/expr.go b/src/Backend/opti-sql-go/Expr/expr.go index b3eed34..4899a15 100644 --- a/src/Backend/opti-sql-go/Expr/expr.go +++ b/src/Backend/opti-sql-go/Expr/expr.go @@ -105,6 +105,8 @@ func EvalExpression(expr Expression, batch *operators.RecordBatch) (arrow.Array, return EvalScalarFunction(e, batch) case *CastExpr: return EvalCast(e, batch) + case *NullCheckExpr: + return EvalNullCheckMask(e.Expr, batch) default: return nil, ErrUnsupportedExpression(expr.String()) } @@ -146,6 +148,8 @@ func ExprDataType(e Expression, inputSchema *arrow.Schema) (arrow.DataType, erro return nil, err } return inferScalarFunctionType(ex.Function, argType), nil + case *NullCheckExpr: + return arrow.FixedWidthTypes.Boolean, nil default: return nil, ErrUnsupportedExpression(ex.String()) @@ -215,7 +219,50 @@ type LiteralResolve struct { } func NewLiteralResolve(Type arrow.DataType, Value any) *LiteralResolve { - return &LiteralResolve{Type: Type, Value: Value} + var castVal any + + switch v := Value.(type) { + + // ------------------------------------------------------ + // INT → cast based on Arrow integer type + // ------------------------------------------------------ + case int: + switch Type.ID() { + case arrow.INT8: + castVal = int8(v) + case arrow.INT16: + castVal = int16(v) + case arrow.INT32: + castVal = int32(v) + case arrow.INT64: + castVal = int64(v) + case arrow.UINT8: + castVal = uint8(v) + case arrow.UINT16: + castVal = uint16(v) + case arrow.UINT32: + castVal = uint32(v) + case arrow.UINT64: + castVal = uint64(v) + default: + // not an integer Arrow type → store original + castVal = v + } + case string: + castVal = string(v) + case bool: + castVal = bool(v) + case float64: + switch Type.ID() { + case arrow.FLOAT32: + castVal = float32(v) + case arrow.FLOAT64: + castVal = float64(v) + } + default: + castVal = Value + } + return &LiteralResolve{Type: Type, Value: castVal} } func EvalLiteral(l *LiteralResolve, batch *operators.RecordBatch) (arrow.Array, error) { n := int(batch.RowCount) @@ -355,6 +402,16 @@ func EvalLiteral(l *LiteralResolve, batch *operators.RecordBatch) (arrow.Array, b.Append(v) } return b.NewArray(), nil + // ------------------------------ + // Nulls + // ------------------------------ + case arrow.NULL: + b := array.NewNullBuilder(memory.DefaultAllocator) + defer b.Release() + for i := 0; i < n; i++ { + b.AppendNull() + } + return b.NewArray(), nil default: return nil, fmt.Errorf("literal type %s not supported", l.Type) @@ -389,37 +446,36 @@ func EvalBinary(b *BinaryExpr, batch *operators.RecordBatch) (arrow.Array, error if err != nil { return nil, err } + ctx := context.Background() opt := compute.ArithmeticOptions{} switch b.Op { // arithmetic case Addition: - datum, err := compute.Add(context.TODO(), opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr)) + datum, err := compute.Add(ctx, opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr)) if err != nil { return nil, err } return unpackDatum(datum) case Subtraction: - datum, err := compute.Subtract(context.TODO(), opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr)) + datum, err := compute.Subtract(ctx, opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr)) if err != nil { return nil, err } return unpackDatum(datum) case Multiplication: - datum, err := compute.Multiply(context.TODO(), opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr)) + datum, err := compute.Multiply(ctx, opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr)) if err != nil { return nil, err } return unpackDatum(datum) case Division: - datum, err := compute.Divide(context.TODO(), opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr)) + datum, err := compute.Divide(ctx, opt, compute.NewDatum(leftArr), compute.NewDatum(rightArr)) if err != nil { return nil, err } return unpackDatum(datum) - // comparisions TODO: - // These return a boolean array case Equal: if leftArr.DataType() != rightArr.DataType() { return nil, ErrCantCompareDifferentTypes(leftArr.DataType(), rightArr.DataType()) @@ -495,7 +551,6 @@ func EvalBinary(b *BinaryExpr, batch *operators.RecordBatch) (arrow.Array, error return unpackDatum(datum) case Like: if leftArr.DataType() != arrow.BinaryTypes.String || rightArr.DataType() != arrow.BinaryTypes.String { - // regEx runs only on strings return nil, errors.New("binary operator Like only works on arrays of strings") } var compiledRegEx = compileSqlRegEx(rightArr.ValueStr(0)) @@ -503,7 +558,6 @@ func EvalBinary(b *BinaryExpr, batch *operators.RecordBatch) (arrow.Array, error leftStrArray := leftArr.(*array.String) for i := 0; i < leftStrArray.Len(); i++ { valid := validRegEx(leftStrArray.Value(i), compiledRegEx) - fmt.Printf("does %s match %s: %v\n", leftStrArray.Value(i), compiledRegEx, valid) filterBuilder.Append(valid) } return filterBuilder.NewArray(), nil @@ -536,6 +590,7 @@ func NewScalarFunction(function supportedFunctions, Argument Expression) *Scalar } func EvalScalarFunction(s *ScalarFunction, batch *operators.RecordBatch) (arrow.Array, error) { + ctx := context.Background() switch s.Function { case Upper: arr, err := EvalExpression(s.Arguments, batch) @@ -555,7 +610,7 @@ func EvalScalarFunction(s *ScalarFunction, batch *operators.RecordBatch) (arrow. if err != nil { return nil, err } - datum, err := compute.AbsoluteValue(context.TODO(), compute.ArithmeticOptions{}, compute.NewDatum(arr)) + datum, err := compute.AbsoluteValue(ctx, compute.ArithmeticOptions{}, compute.NewDatum(arr)) if err != nil { return nil, err } @@ -565,7 +620,7 @@ func EvalScalarFunction(s *ScalarFunction, batch *operators.RecordBatch) (arrow. if err != nil { return nil, err } - datum, err := compute.Round(context.TODO(), compute.DefaultRoundOptions, compute.NewDatum(arr)) + datum, err := compute.Round(ctx, compute.DefaultRoundOptions, compute.NewDatum(arr)) if err != nil { return nil, err } @@ -600,9 +655,8 @@ func EvalCast(c *CastExpr, batch *operators.RecordBatch) (arrow.Array, error) { // Use Arrow compute kernel to cast castOpts := compute.SafeCastOptions(c.TargetType) - out, err := compute.CastArray(context.TODO(), arr, castOpts) + out, err := compute.CastArray(context.Background(), arr, castOpts) if err != nil { - // This is a runtime cast error return nil, fmt.Errorf("cast error: cannot cast %s to %s: %w", arr.DataType(), c.TargetType, err) } @@ -615,6 +669,39 @@ func (c *CastExpr) String() string { return fmt.Sprintf("Cast(%s AS %s)", c.Expr, c.TargetType) } +type NullCheckExpr struct { + Expr Expression +} + +func NewNullCheckExpr(expr Expression) *NullCheckExpr { + return &NullCheckExpr{Expr: expr} +} +func (n *NullCheckExpr) ExprNode() {} +func (n *NullCheckExpr) String() string { + return fmt.Sprintf("NullCheck(%s)", n.Expr.String()) +} +func EvalNullCheckMask(expr Expression, batch *operators.RecordBatch) (arrow.Array, error) { + // Step 1: Evaluate underlying expression + arr, err := EvalExpression(expr, batch) + if err != nil { + return nil, err + } + + length := arr.Len() + + // Step 2: Build boolean mask + builder := array.NewBooleanBuilder(memory.DefaultAllocator) + builder.Resize(length) + + for i := 0; i < length; i++ { + builder.Append(!arr.IsNull(i)) // true = not null + } + // Step 3: produce final Boolean array + mask := builder.NewArray() + builder.Release() + return mask, nil +} + func upperImpl(arr arrow.Array) (arrow.Array, error) { strArr, ok := arr.(*array.String) if !ok { diff --git a/src/Backend/opti-sql-go/Expr/expr_test.go b/src/Backend/opti-sql-go/Expr/expr_test.go index 487e8f2..f0d2f43 100644 --- a/src/Backend/opti-sql-go/Expr/expr_test.go +++ b/src/Backend/opti-sql-go/Expr/expr_test.go @@ -1124,7 +1124,7 @@ func TestInferScalarFunctionType(t *testing.T) { // test constructor methods for expressions func TestExprInitMethods(t *testing.T) { t.Run("New Alias", func(t *testing.T) { - literal := NewLiteralResolve(arrow.BinaryTypes.String, string("the golfer")) + literal := NewLiteralResolve(arrow.BinaryTypes.String, "the golfer") a := NewAlias(literal, "nickname") if a == nil { t.Fatalf("failed to create Alias expression") @@ -1137,35 +1137,35 @@ func TestExprInitMethods(t *testing.T) { } }) t.Run("New LiteralResolve", func(t *testing.T) { - lit := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(42)) + lit := NewLiteralResolve(arrow.PrimitiveTypes.Int32, 42) if lit == nil { t.Fatalf("failed to create LiteralResolve expression") } }) t.Run("New BinaryExpr", func(t *testing.T) { - left := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(10)) - right := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(5)) + left := NewLiteralResolve(arrow.PrimitiveTypes.Int32, 10) + right := NewLiteralResolve(arrow.PrimitiveTypes.Int32, 5) be := NewBinaryExpr(left, Addition, right) if be == nil { t.Fatalf("failed to create BinaryExpr expression") } }) t.Run("New ScalarFunc", func(t *testing.T) { - arg := NewLiteralResolve(arrow.BinaryTypes.String, string("hello")) + arg := NewLiteralResolve(arrow.BinaryTypes.String, "hello") sf := NewScalarFunction(Upper, arg) if sf == nil { t.Fatalf("failed to create ScalarFunction expression") } }) t.Run("New CastExpr", func(t *testing.T) { - expr := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(100)) + expr := NewLiteralResolve(arrow.PrimitiveTypes.Int32, 100) ce := NewCastExpr(expr, arrow.PrimitiveTypes.Float64) if ce == nil { t.Fatalf("failed to create CastExpr expression") } }) t.Run("New Expressions", func(t *testing.T) { - literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(7)) + literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, 7) cr := NewColumnResolve("score") left := NewBinaryExpr(literal, Multiplication, cr) sf := NewScalarFunction(Abs, left) @@ -1182,7 +1182,7 @@ func TestExprInitMethods(t *testing.T) { func TestFilterBinaryExpr(t *testing.T) { t.Run("age == 22", func(t *testing.T) { rc := generateTestColumns() //4 - literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(22)) + literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, (22)) col := NewColumnResolve("age") be := NewBinaryExpr(col, Equal, literal) arr, err := EvalExpression(be, rc) @@ -1203,7 +1203,7 @@ func TestFilterBinaryExpr(t *testing.T) { }) t.Run("age != 22", func(t *testing.T) { rc := generateTestColumns() - literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(22)) + literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, (22)) col := NewColumnResolve("age") be := NewBinaryExpr(col, NotEqual, literal) @@ -1226,7 +1226,7 @@ func TestFilterBinaryExpr(t *testing.T) { }) t.Run("age < 34", func(t *testing.T) { rc := generateTestColumns() - literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(34)) + literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, (34)) col := NewColumnResolve("age") be := NewBinaryExpr(col, LessThan, literal) @@ -1249,7 +1249,7 @@ func TestFilterBinaryExpr(t *testing.T) { }) t.Run("age <= 34", func(t *testing.T) { rc := generateTestColumns() - literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(34)) + literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, (34)) col := NewColumnResolve("age") be := NewBinaryExpr(col, LessThanOrEqual, literal) @@ -1272,7 +1272,7 @@ func TestFilterBinaryExpr(t *testing.T) { }) t.Run("age > 30", func(t *testing.T) { rc := generateTestColumns() - literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30)) + literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, (30)) col := NewColumnResolve("age") be := NewBinaryExpr(col, GreaterThan, literal) @@ -1295,7 +1295,7 @@ func TestFilterBinaryExpr(t *testing.T) { }) t.Run("age >= 34", func(t *testing.T) { rc := generateTestColumns() - literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(34)) + literal := NewLiteralResolve(arrow.PrimitiveTypes.Int32, (34)) col := NewColumnResolve("age") be := NewBinaryExpr(col, GreaterThanOrEqual, literal) @@ -1322,7 +1322,7 @@ func TestFilterBinaryExpr(t *testing.T) { left := NewBinaryExpr( NewColumnResolve("age"), GreaterThan, - NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30)), + NewLiteralResolve(arrow.PrimitiveTypes.Int32, (30)), ) right := NewBinaryExpr( @@ -1352,7 +1352,7 @@ func TestFilterBinaryExpr(t *testing.T) { left := NewBinaryExpr( NewColumnResolve("age"), LessThan, - NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30)), + NewLiteralResolve(arrow.PrimitiveTypes.Int32, (30)), ) right := NewBinaryExpr( @@ -1549,7 +1549,7 @@ func TestLikeOperatorSQL(t *testing.T) { t.Run("name starts with a", func(t *testing.T) { rc := generateTestColumns() sqlStatment := "A%" - whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, string(sqlStatment))) + whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, sqlStatment)) boolMask, err := EvalExpression(whereStatment, rc) if err != nil { t.Fatalf("unexpected error from EvalExpression") @@ -1571,7 +1571,7 @@ func TestLikeOperatorSQL(t *testing.T) { t.Run("name contains li", func(t *testing.T) { rc := generateTestColumns() sqlStatment := "%li%" - whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, string(sqlStatment))) + whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, sqlStatment)) boolMask, err := EvalExpression(whereStatment, rc) if err != nil { @@ -1597,7 +1597,7 @@ func TestLikeOperatorSQL(t *testing.T) { t.Run("name ends with d", func(t *testing.T) { rc := generateTestColumns() sqlStatment := "%d" - whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, string(sqlStatment))) + whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, (sqlStatment))) boolMask, err := EvalExpression(whereStatment, rc) if err != nil { @@ -1623,7 +1623,7 @@ func TestLikeOperatorSQL(t *testing.T) { t.Run("name is exactly 5 letters", func(t *testing.T) { rc := generateTestColumns() sqlStatment := "_____" - whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, string(sqlStatment))) + whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, sqlStatment)) boolMask, err := EvalExpression(whereStatment, rc) if err != nil { @@ -1649,7 +1649,7 @@ func TestLikeOperatorSQL(t *testing.T) { t.Run("name starts with Ch", func(t *testing.T) { rc := generateTestColumns() sqlStatment := "Ch%" - whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, string(sqlStatment))) + whereStatment := NewBinaryExpr(NewColumnResolve("name"), Like, NewLiteralResolve(arrow.BinaryTypes.String, sqlStatment)) boolMask, err := EvalExpression(whereStatment, rc) if err != nil { @@ -1673,3 +1673,196 @@ func TestLikeOperatorSQL(t *testing.T) { } }) } + +func TestNullCases(t *testing.T) { + t.Run("null Column literal", func(t *testing.T) { + v := NewLiteralResolve(arrow.Null, nil) + array, err := EvalExpression(v, &operators.RecordBatch{ + RowCount: 10, + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + t.Logf("\t%v\n", array) + }) + +} +func makeBatch(schema *arrow.Schema, cols []arrow.Array) *operators.RecordBatch { + return &operators.RecordBatch{ + Schema: schema, + Columns: cols, + RowCount: uint64(cols[0].Len()), + } +} + +func TestNullCheckExpr(t *testing.T) { + + t.Run("int32_some_nulls_mask", func(t *testing.T) { + mem := memory.NewGoAllocator() + // col = [10, null, 30, null, 50] + b := array.NewInt32Builder(mem) + b.AppendValues( + []int32{10, 20, 30, 40, 50}, + []bool{true, false, true, false, true}, + ) + arr := b.NewArray() + b.Release() + defer arr.Release() + + schema := arrow.NewSchema( + []arrow.Field{ + {Name: "col", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, + }, + nil, + ) + batch := makeBatch(schema, []arrow.Array{arr}) + t.Logf("%v\n", batch.PrettyPrint()) + expr := NewColumnResolve("col") + + maskArr, err := EvalNullCheckMask(expr, batch) + if err != nil { + t.Fatalf("EvalNullCheckMask failed: %v", err) + } + defer maskArr.Release() + + boolMask := maskArr.(*array.Boolean) + if boolMask.Len() != 5 { + t.Fatalf("expected length 5 mask, got %d", boolMask.Len()) + } + + // expected mask: [true, false, true, false, true] + want := []bool{true, false, true, false, true} + + for i := 0; i < 5; i++ { + if boolMask.Value(i) != want[i] { + t.Fatalf("mask[%d]: expected %v, got %v", i, want[i], boolMask.Value(i)) + } + } + }) + + // ─────────────────────────────────────────────── + + t.Run("string_all_nulls_mask", func(t *testing.T) { + mem := memory.NewGoAllocator() + + b := array.NewStringBuilder(mem) + b.AppendValues([]string{"A", "B", "C"}, []bool{false, false, false}) + arr := b.NewArray() + b.Release() + defer arr.Release() + + schema := arrow.NewSchema( + []arrow.Field{ + {Name: "name", Type: arrow.BinaryTypes.String, Nullable: true}, + }, + nil, + ) + + batch := makeBatch(schema, []arrow.Array{arr}) + + expr := NewColumnResolve("name") + + maskArr, err := EvalNullCheckMask(expr, batch) + if err != nil { + t.Fatalf("EvalNullCheckMask failed: %v", err) + } + defer maskArr.Release() + + boolMask := maskArr.(*array.Boolean) + + // expected: [false, false, false] + for i := 0; i < boolMask.Len(); i++ { + if boolMask.Value(i) != false { + t.Fatalf("expected all false, got true at row %d", i) + } + } + }) + + // ─────────────────────────────────────────────── + + t.Run("no_nulls_all_true_mask", func(t *testing.T) { + mem := memory.NewGoAllocator() + + b := array.NewFloat64Builder(mem) + b.AppendValues([]float64{1.1, 2.2, 3.3}, []bool{true, true, true}) + arr := b.NewArray() + b.Release() + defer arr.Release() + + schema := arrow.NewSchema( + []arrow.Field{ + {Name: "val", Type: arrow.PrimitiveTypes.Float64, Nullable: false}, + }, + nil, + ) + + batch := makeBatch(schema, []arrow.Array{arr}) + + expr := NewColumnResolve("val") + + maskArr, err := EvalNullCheckMask(expr, batch) + if err != nil { + t.Fatalf("EvalNullCheckMask failed: %v", err) + } + defer maskArr.Release() + + boolMask := maskArr.(*array.Boolean) + + // expected mask = [true, true, true] + for i := 0; i < boolMask.Len(); i++ { + if !boolMask.Value(i) { + t.Fatalf("expected true at %d, got false", i) + } + } + }) +} + +func TestLiteralCast(t *testing.T) { + + tests := []struct { + name string + dtype arrow.DataType + value any + rowCount uint64 + }{ + // ---- INT CASTS ---- + {"Int8 literal", arrow.PrimitiveTypes.Int8, 5, 3}, + {"Int16 literal", arrow.PrimitiveTypes.Int16, 5, 3}, + {"Int32 literal", arrow.PrimitiveTypes.Int32, 5, 3}, + {"Int64 literal", arrow.PrimitiveTypes.Int64, 5, 3}, + {"Uint8 literal", arrow.PrimitiveTypes.Uint8, 5, 3}, + {"Uint16 literal", arrow.PrimitiveTypes.Uint16, 5, 3}, + {"Uint32 literal", arrow.PrimitiveTypes.Uint32, 5, 3}, + {"Uint64 literal", arrow.PrimitiveTypes.Uint64, 5, 3}, + + // ---- FLOAT CASTS ---- + {"Float32 literal", arrow.PrimitiveTypes.Float32, 23.5, 4}, + {"Float64 literal", arrow.PrimitiveTypes.Float64, 23.5, 4}, + + // ---- STRING ---- + {"String literal", arrow.BinaryTypes.String, "hello", 2}, + + // ---- BOOL ---- + {"Bool literal", arrow.FixedWidthTypes.Boolean, true, 5}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + lit := NewLiteralResolve(tt.dtype, tt.value) + + batch := &operators.RecordBatch{ + RowCount: tt.rowCount, + } + + arr, err := EvalExpression(lit, batch) + if err != nil { + t.Fatalf("EvalExpression failed: %v", err) + } + defer arr.Release() + + if !arrow.TypeEqual(arr.DataType(), tt.dtype) { + t.Fatalf("expected Arrow type %v but got %v", tt.dtype, arr.DataType()) + } + }) + } +} diff --git a/src/Backend/opti-sql-go/config/config.go b/src/Backend/opti-sql-go/config/config.go index 627136b..17154fe 100644 --- a/src/Backend/opti-sql-go/config/config.go +++ b/src/Backend/opti-sql-go/config/config.go @@ -92,7 +92,6 @@ var configInstance *Config = &Config{ EnableQueryStats: true, EnableMemoryStats: true, }, - // TODO: remove hardcoded secretes before production. we are just testing for now Secretes: secretesConfig{ AccessKey: "DO8013ZT6VDHJ2EM94RN", SecretKey: "kPvQSMt6naiwe/FhDnzXpYmVE5yzJUsIR0/OJpsUNzo", diff --git a/src/Backend/opti-sql-go/go.mod b/src/Backend/opti-sql-go/go.mod index c9ee239..5b872b6 100644 --- a/src/Backend/opti-sql-go/go.mod +++ b/src/Backend/opti-sql-go/go.mod @@ -1,6 +1,6 @@ module opti-sql-go -go 1.23 +go 1.24.0 require ( github.com/apache/arrow/go/v15 v15.0.2 @@ -28,6 +28,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.13 // indirect github.com/aws/smithy-go v1.23.2 // indirect github.com/go-ini/ini v1.67.0 // indirect + github.com/go-jose/go-jose/v4 v4.1.3 // indirect github.com/goccy/go-json v0.10.3 // indirect github.com/golang/snappy v0.0.4 // indirect github.com/google/flatbuffers v24.3.25+incompatible // indirect diff --git a/src/Backend/opti-sql-go/go.sum b/src/Backend/opti-sql-go/go.sum index 9c4220d..7c4ee5c 100644 --- a/src/Backend/opti-sql-go/go.sum +++ b/src/Backend/opti-sql-go/go.sum @@ -37,6 +37,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A= github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= +github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs= +github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= diff --git a/src/Backend/opti-sql-go/main.go b/src/Backend/opti-sql-go/main.go index f277de6..82e1eb8 100644 --- a/src/Backend/opti-sql-go/main.go +++ b/src/Backend/opti-sql-go/main.go @@ -6,8 +6,6 @@ import ( "os" ) -// TODO: in the project operators make sure the record batches account for the RowCount field properly. - func main() { if len(os.Args) > 1 { if err := config.Decode(os.Args[1]); err != nil { diff --git a/src/Backend/opti-sql-go/operators/Join/hashJoin.go b/src/Backend/opti-sql-go/operators/Join/hashJoin.go index 2502e5b..13a6969 100644 --- a/src/Backend/opti-sql-go/operators/Join/hashJoin.go +++ b/src/Backend/opti-sql-go/operators/Join/hashJoin.go @@ -1 +1,415 @@ package join + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "math" + "opti-sql-go/Expr" + "opti-sql-go/operators" + "strings" + + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/compute" + "github.com/apache/arrow/go/v17/arrow/memory" +) + +// TODO: see ticket #27 + +var ( + ErrInvalidJoinClauseCount = func(l, r int) error { + return fmt.Errorf("mismatched number of join expressions between left and right, left: %d vs right: %d", l, r) + } +) + +var ( + _ = (operators.Operator)(&HashJoinExec{}) +) + +type JoinType int + +const ( + InnerJoin JoinType = iota + LeftJoin + RightJoin +) + +func (j JoinType) String() string { + switch j { + case InnerJoin: + return "INNER JOIN" + case LeftJoin: + return "LEFT JOIN" + case RightJoin: + return "RIGHT JOIN" + default: + return "UNKNOWN JOIN TYPE" + } +} + +// taking in arrays of expressions allows for multiple join clauses +// Example: JOIN t2 ON t1.region = t2.region AND t1.city = t2.city +type JoinClause struct { + leftS []Expr.Expression + rightS []Expr.Expression +} + +func (j *JoinClause) String() string { + var b bytes.Buffer + + // defensive: if lengths differ, print whatever pairs exist + n := len(j.leftS) + if len(j.rightS) < n { + n = len(j.rightS) + } + + for i := 0; i < n; i++ { + b.WriteString(j.leftS[i].String()) + b.WriteString(" = ") + b.WriteString(j.rightS[i].String()) + + // add separator between pairs + if i < n-1 { + b.WriteString(" AND ") + } + } + + return b.String() +} + +func NewJoinClause(leftS, rightS []Expr.Expression) JoinClause { + return JoinClause{ + leftS: leftS, + rightS: rightS, + } +} + +// left schema + right schema, if left and right have same column name, prefix with left_ and right_ +func joinSchemas(left, right *arrow.Schema) (*arrow.Schema, error) { + // table1 : id , name , age + // table2 : id , dept , region + fields := []arrow.Field{} + + leftNames := map[string]bool{} + rightNames := map[string]bool{} + + for i := 0; i < left.NumFields(); i++ { + leftNames[left.Field(i).Name] = true + } + for i := 0; i < right.NumFields(); i++ { + rightNames[right.Field(i).Name] = true + } + // left side + for i := 0; i < left.NumFields(); i++ { + f := left.Field(i) + name := f.Name + if rightNames[name] { + name = "left_" + name + } + fields = append(fields, arrow.Field{ + Name: name, + Type: f.Type, + Nullable: f.Nullable, + Metadata: f.Metadata, + }) + } + + // right side + for i := 0; i < right.NumFields(); i++ { + f := right.Field(i) + name := f.Name + if leftNames[name] { + name = "right_" + name + } + fields = append(fields, arrow.Field{ + Name: name, + Type: f.Type, + Nullable: f.Nullable, + Metadata: f.Metadata, + }) + } + + return arrow.NewSchema(fields, nil), nil + // produces + // left_id ,name,age, right_id,dept,region +} + +// otherwise go with hash joins +type HashJoinExec struct { + leftSource operators.Operator + rightSource operators.Operator + clause JoinClause + joinType JoinType + filters []Expr.Expression //TODO: incorpoarte + schema *arrow.Schema + done bool + // internalState + outputBatch []arrow.Array // intermediate storage for output arrays + +} +type hashEntry struct { + row int +} +type joinPair struct { + leftRow int + rightRow int +} + +func NewHashJoinExec(left operators.Operator, right operators.Operator, clause JoinClause, joinType JoinType, filters []Expr.Expression) (*HashJoinExec, error) { + schema, err := joinSchemas(left.Schema(), right.Schema()) + if err != nil { + return nil, err + } + if len(clause.leftS) != len(clause.rightS) { + return nil, ErrInvalidJoinClauseCount(len(clause.leftS), len(clause.rightS)) + } + return &HashJoinExec{ + leftSource: left, + rightSource: right, + clause: clause, + joinType: joinType, + filters: filters, + schema: schema, + outputBatch: make([]arrow.Array, schema.NumFields()), + }, nil +} + +func (hj *HashJoinExec) Next(_ uint16) (*operators.RecordBatch, error) { + if hj.done { + return nil, io.EOF + } + mem := memory.NewGoAllocator() + leftArr, err := consumeOperator(hj.leftSource, mem) + if err != nil { + return nil, err + } + rightArr, err := consumeOperator(hj.rightSource, mem) + if err != nil { + return nil, err + } + emptyCols := make([]arrow.Array, hj.schema.NumFields()) + if len(leftArr) == 0 || len(rightArr) == 0 { + hj.done = true + return &operators.RecordBatch{ + Schema: hj.Schema(), + Columns: emptyCols, + RowCount: uint64(0), + }, nil + } + leftRowCount := leftArr[0].Len() + rightRowCount := rightArr[0].Len() + leftComp, err := buildComptables(hj.clause.leftS, leftArr, hj.leftSource.Schema()) + if err != nil { + return nil, err + } + + rightComp, err := buildComptables(hj.clause.rightS, rightArr, hj.rightSource.Schema()) + if err != nil { + return nil, err + } + ht := buildRightHashTable(rightComp, rightRowCount) + pairs := probeJoin(leftComp, ht, leftRowCount) + if len(pairs) == 0 { + hj.done = true + return &operators.RecordBatch{ + Schema: hj.Schema(), + Columns: emptyCols, + RowCount: 0, + }, nil + } + leftIdxArr, rightIdxArr, err := buildIndexArrays(mem, pairs) + if err != nil { + return nil, err + } + + outArr, err := hj.buildOutputArrays(leftArr, rightArr, leftIdxArr, rightIdxArr) + if err != nil { + return nil, err + } + hj.done = true + return &operators.RecordBatch{ + Schema: hj.schema, + Columns: outArr, + RowCount: uint64(outArr[0].Len()), + }, nil +} +func (hj *HashJoinExec) Schema() *arrow.Schema { return hj.schema } +func (hj *HashJoinExec) Close() error { + // do other clean up but for now just pass down to child + err1 := hj.leftSource.Close() + err2 := hj.rightSource.Close() + if err1 != nil { + return err1 + } + if err2 != nil { + return err2 + } + return nil +} + +func consumeOperator(o operators.Operator, mem memory.Allocator) ([]arrow.Array, error) { + + AllArrays := make([]arrow.Array, o.Schema().NumFields()) // concated columns + for { + childRecordBatch, err := o.Next(math.MaxUint16) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, err + } + for i := range childRecordBatch.Columns { + if AllArrays[i] == nil { + AllArrays[i] = childRecordBatch.Columns[i] + continue + } + largerArray, err := array.Concatenate([]arrow.Array{AllArrays[i], childRecordBatch.Columns[i]}, mem) + if err != nil { + return nil, err + } + AllArrays[i] = largerArray + } + } + return AllArrays, nil +} + +func buildComptables(exprs []Expr.Expression, cols []arrow.Array, schema *arrow.Schema) ([]arrow.Array, error) { + compArr := make([]arrow.Array, len(exprs)) + for i, expr := range exprs { + arr, err := Expr.EvalExpression(expr, &operators.RecordBatch{ + Schema: schema, + Columns: cols, + RowCount: uint64(cols[0].Len()), + }) + if err != nil { + return nil, err + } + compArr[i] = arr + } + return compArr, nil + +} + +func buildRowKey(cols []arrow.Array, row int) string { + var b strings.Builder + hasNull := false + + for i, col := range cols { + if i > 0 { + b.WriteByte('|') // separator between cols + } + + if col.IsNull(row) { + hasNull = true + // Keep a placeholder so non-null rows can’t collide with “all-null” rows. + b.WriteString("NULL") + continue + } + + b.WriteString(col.ValueStr(row)) + } + + // If there were no NULLs in this row, we’re done. + // Equal non-NULL rows on left/right will produce identical keys → join behaves as usual. + if !hasNull { + return b.String() + } + + // SQL semantics: any NULL in the join key means this row should not match + // anything from the other side. We “salt” the key with the identity of the + // `cols` slice so left and right sides will produce *different* full keys. + // + // This still lets rows *on the same side* share a bucket (doesn’t hurt), + // but probe from the other side will never see them as equal. + b.WriteByte('#') + b.WriteString(fmt.Sprintf("%p", cols)) + + return b.String() +} + +func buildRightHashTable(rightComp []arrow.Array, rowCount int) map[string][]hashEntry { + ht := make(map[string][]hashEntry, rowCount) + + for r := 0; r < rowCount; r++ { + key := buildRowKey(rightComp, r) + ht[key] = append(ht[key], hashEntry{row: r}) + } + return ht +} +func probeJoin( + leftComp []arrow.Array, + rightHT map[string][]hashEntry, + leftRowCount int, +) []joinPair { + var pairs []joinPair + + for l := 0; l < leftRowCount; l++ { + key := buildRowKey(leftComp, l) + matches := rightHT[key] + if len(matches) == 0 { + // inner join: skip if no matching right row + continue + } + // emit all combinations + for _, m := range matches { + pairs = append(pairs, joinPair{ + leftRow: l, + rightRow: m.row, + }) + } + } + + return pairs +} + +func buildIndexArrays( + mem memory.Allocator, + pairs []joinPair, +) (arrow.Array, arrow.Array, error) { + // use int32 indexes (Arrow Take supports that) + lb := array.NewInt32Builder(mem) + rb := array.NewInt32Builder(mem) + + for _, p := range pairs { + lb.Append(int32(p.leftRow)) + rb.Append(int32(p.rightRow)) + } + + leftIdxArr := lb.NewArray() + rightIdxArr := rb.NewArray() + lb.Release() + rb.Release() + + return leftIdxArr, rightIdxArr, nil +} + +func (hj *HashJoinExec) buildOutputArrays( + leftCols []arrow.Array, + rightCols []arrow.Array, + leftIdxArr arrow.Array, + rightIdxArr arrow.Array, +) ([]arrow.Array, error) { + ctx := context.Background() + + output := make([]arrow.Array, hj.schema.NumFields()) + for i := range len(leftCols) { + col := leftCols[i] + slice, err := compute.TakeArray(ctx, col, leftIdxArr) + if err != nil { + return nil, err + } + output[i] = slice + } + for i := range len(rightCols) { + col := rightCols[i] + slice, err := compute.TakeArray(ctx, col, rightIdxArr) + if err != nil { + return nil, err + } + output[i+len(leftCols)] = slice + } + return output, nil +} diff --git a/src/Backend/opti-sql-go/operators/Join/hashJoin_test.go b/src/Backend/opti-sql-go/operators/Join/hashJoin_test.go index 363da9e..e22872f 100644 --- a/src/Backend/opti-sql-go/operators/Join/hashJoin_test.go +++ b/src/Backend/opti-sql-go/operators/Join/hashJoin_test.go @@ -1,7 +1,564 @@ package join -import "testing" +import ( + "errors" + "io" + "opti-sql-go/Expr" + "opti-sql-go/operators" + "opti-sql-go/operators/project" + "strings" + "testing" -func TestHashJoin(t *testing.T) { - // Simple passing test + "github.com/apache/arrow/go/v15/arrow/memory" + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" +) + +func generateDataset1WithNulls(mem memory.Allocator) ([]string, []arrow.Array) { + names := []string{"id", "name", "age", "salary"} + + // ----- id (int32) ----- + idB := array.NewInt32Builder(mem) + idVals := []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + idValid := []bool{ + true, true, false, true, true, + false, true, true, true, false, + } + idB.AppendValues(idVals, idValid) + idArr := idB.NewArray() + + // ----- name (string) ----- + nameB := array.NewStringBuilder(mem) + nameVals := []string{ + "Alice", "Bob", "Charlie", "David", "Eve", + "Frank", "Grace", "Hannah", "Ivy", "Jake", + } + nameValid := []bool{ + true, true, true, false, true, + true, true, true, false, true, + } + nameB.AppendValues(nameVals, nameValid) + nameArr := nameB.NewArray() + + // ----- age (int32) ----- + ageB := array.NewInt32Builder(mem) + ageVals := []int32{28, 34, 45, 22, 31, 29, 40, 36, 50, 26} + ageValid := []bool{ + true, false, true, true, true, + true, false, true, true, true, + } + ageB.AppendValues(ageVals, ageValid) + ageArr := ageB.NewArray() + + // ----- salary (float64) ----- + salB := array.NewFloat64Builder(mem) + salVals := []float64{ + 70000, 82000, 54000, 91000, 60000, + 75000, 66000, 0, 45000, 99000, + } + salaryValid := []bool{ + true, true, true, true, true, + true, true, false, true, true, + } + salB.AppendValues(salVals, salaryValid) + salaryArr := salB.NewArray() + + return names, []arrow.Array{idArr, nameArr, ageArr, salaryArr} +} +func generateJoinDataset2(mem memory.Allocator) ([]string, []arrow.Array) { + names := []string{"id", "department", "region"} + + // ---- id (int32) ---- + // overlap on: 1,2,4,5 + // unique to dataset2: 11,12,13,14 + // and one null + idB := array.NewInt32Builder(mem) + idB.AppendValues( + []int32{1, 2, 4, 5, 11, 12, 13, 14, 3, 0}, + []bool{true, true, true, true, true, true, true, true, false, false}, // null at idx 8 and 9 + ) + idArr := idB.NewArray() + + // ---- department (string) ---- + deptB := array.NewStringBuilder(mem) + deptB.AppendValues( + []string{"HR", "Engineering", "Sales", "Finance", "Marketing", + "Support", "Research", "Security", "Unknown", "Unknown"}, + []bool{true, true, true, true, true, true, true, false, true, false}, // some nulls + ) + deptArr := deptB.NewArray() + + // ---- region (string) ---- + regionB := array.NewStringBuilder(mem) + regionB.AppendValues( + []string{"US", "EU", "EU", "APAC", "US", "US", "LATAM", "EU", "N/A", "N/A"}, + []bool{true, true, true, true, true, true, false, true, true, false}, + ) + regionArr := regionB.NewArray() + + return names, []arrow.Array{idArr, deptArr, regionArr} +} +func newSources() (*project.InMemorySource, *project.InMemorySource) { + mem := memory.NewGoAllocator() + leftNames, leftCols := generateDataset1WithNulls(mem) + rightNames, rightCols := generateJoinDataset2(mem) + + leftSource, _ := project.NewInMemoryProjectExecFromArrays(leftNames, leftCols) + rightSource, _ := project.NewInMemoryProjectExecFromArrays(rightNames, rightCols) + return leftSource, rightSource +} + +func TestJoinSchemas(t *testing.T) { + + makeField := func(name string, dt arrow.DataType) arrow.Field { + return arrow.Field{Name: name, Type: dt, Nullable: true} + } + + tests := []struct { + name string + left *arrow.Schema + right *arrow.Schema + wantFields []string + }{ + { + name: "No duplicate fields", + left: arrow.NewSchema([]arrow.Field{ + makeField("id", arrow.PrimitiveTypes.Int32), + makeField("name", arrow.BinaryTypes.String), + }, nil), + right: arrow.NewSchema([]arrow.Field{ + makeField("dept", arrow.BinaryTypes.String), + makeField("region", arrow.BinaryTypes.String), + }, nil), + wantFields: []string{"id", "name", "dept", "region"}, + }, + { + name: "Single duplicate (id)", + left: arrow.NewSchema([]arrow.Field{ + makeField("id", arrow.PrimitiveTypes.Int32), + makeField("name", arrow.BinaryTypes.String), + makeField("age", arrow.PrimitiveTypes.Int32), + }, nil), + right: arrow.NewSchema([]arrow.Field{ + makeField("id", arrow.PrimitiveTypes.Int32), + makeField("dept", arrow.BinaryTypes.String), + }, nil), + wantFields: []string{"left_id", "name", "age", "right_id", "dept"}, + }, + { + name: "Multiple duplicates", + left: arrow.NewSchema([]arrow.Field{ + makeField("id", arrow.PrimitiveTypes.Int32), + makeField("name", arrow.BinaryTypes.String), + }, nil), + right: arrow.NewSchema([]arrow.Field{ + makeField("id", arrow.PrimitiveTypes.Int32), + makeField("name", arrow.BinaryTypes.String), + makeField("salary", arrow.PrimitiveTypes.Float64), + }, nil), + wantFields: []string{"left_id", "left_name", "right_id", "right_name", "salary"}, + }, + { + name: "Nullable metadata preserved", + left: arrow.NewSchema([]arrow.Field{ + {Name: "id", Type: arrow.PrimitiveTypes.Int32, Nullable: false}, + }, nil), + right: arrow.NewSchema([]arrow.Field{ + {Name: "id", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, + }, nil), + wantFields: []string{"left_id", "right_id"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := joinSchemas(tt.left, tt.right) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if got.NumFields() != len(tt.wantFields) { + t.Fatalf("wrong number of fields: got %d want %d", + got.NumFields(), len(tt.wantFields)) + } + + for i, wantName := range tt.wantFields { + gotName := got.Field(i).Name + if gotName != wantName { + t.Fatalf("field %d mismatch: got %s want %s", i, gotName, wantName) + } + } + }) + } +} + +// collectAllRows drains an operator into a slice of *operators.RecordBatch. +func collectAllRows(t *testing.T, op operators.Operator) []*operators.RecordBatch { + t.Helper() + + var batches []*operators.RecordBatch + for { + b, err := op.Next(1024) + if errors.Is(err, io.EOF) { + break + } + if err != nil { + t.Fatalf("unexpected error from Next: %v", err) + } + if b == nil || b.RowCount == 0 { + continue + } + batches = append(batches, b) + } + return batches +} + +// flattenRowCount sums total rows across all batches. +func flattenRowCount(batches []*operators.RecordBatch) int { + total := 0 + for _, b := range batches { + total += int(b.RowCount) + } + return total +} + +// evalInt32Slice evaluates an expression to an Int32 array and returns values + validity bitmap. +func evalInt32Slice(t *testing.T, expr Expr.Expression, batch *operators.RecordBatch) ([]int32, []bool) { + t.Helper() + + arr, err := Expr.EvalExpression(expr, batch) + if err != nil { + t.Fatalf("EvalExpression failed: %v", err) + } + defer arr.Release() + + intArr, ok := arr.(*array.Int32) + if !ok { + t.Fatalf("expected Int32 array, got %T", arr) + } + + n := intArr.Len() + values := make([]int32, n) + valid := make([]bool, n) + for i := 0; i < n; i++ { + if intArr.IsNull(i) { + valid[i] = false + continue + } + valid[i] = true + values[i] = intArr.Value(i) + } + return values, valid +} + +// +// Simple helpers to get your left/right sources for the id-join dataset. +// + +// +// Multi-attribute dataset: first_name + last_name join. +// + +func generateMultiAttrLeft(mem memory.Allocator) ([]string, []arrow.Array) { + names := []string{"first_name", "last_name", "emp_id"} + + fnB := array.NewStringBuilder(mem) + fnB.AppendValues( + []string{"Alice", "Bob", "Charlie", "Diana"}, + []bool{true, true, true, true}, + ) + fnArr := fnB.NewArray() + + lnB := array.NewStringBuilder(mem) + lnB.AppendValues( + []string{"Smith", "Jones", "Stone", "Lopez"}, + []bool{true, true, true, true}, + ) + lnArr := lnB.NewArray() + + empB := array.NewInt32Builder(mem) + empB.AppendValues( + []int32{1, 2, 3, 4}, + []bool{true, true, true, true}, + ) + empArr := empB.NewArray() + + return names, []arrow.Array{fnArr, lnArr, empArr} +} + +func generateMultiAttrRight(mem memory.Allocator) ([]string, []arrow.Array) { + names := []string{"first_name", "last_name", "department"} + + fnB := array.NewStringBuilder(mem) + fnB.AppendValues( + []string{"Alice", "Charlie", "Evan"}, + []bool{true, true, true}, + ) + fnArr := fnB.NewArray() + + lnB := array.NewStringBuilder(mem) + lnB.AppendValues( + []string{"Smith", "Stone", "Miller"}, + []bool{true, true, true}, + ) + lnArr := lnB.NewArray() + + deptB := array.NewStringBuilder(mem) + deptB.AppendValues( + []string{"HR", "Engineering", "Sales"}, + []bool{true, true, true}, + ) + deptArr := deptB.NewArray() + + return names, []arrow.Array{fnArr, lnArr, deptArr} +} + +// +// "Computed" key dataset: we simulate a computed join key by precomputing a normalized field. +// + +func generateEmailLeft(mem memory.Allocator) ([]string, []arrow.Array) { + names := []string{"id", "email_lower"} + + idB := array.NewInt32Builder(mem) + idB.AppendValues([]int32{1, 2, 3}, []bool{true, true, true}) + idArr := idB.NewArray() + + emailB := array.NewStringBuilder(mem) + emailB.AppendValues( + []string{"alice@example.com", "bob@example.com", "charlie@example.com"}, + []bool{true, true, true}, + ) + emailArr := emailB.NewArray() + + return names, []arrow.Array{idArr, emailArr} +} + +func generateEmailRight(mem memory.Allocator) ([]string, []arrow.Array) { + names := []string{"id", "email_lower", "group"} + + idB := array.NewInt32Builder(mem) + idB.AppendValues([]int32{10, 20, 30}, []bool{true, true, true}) + idArr := idB.NewArray() + + emailB := array.NewStringBuilder(mem) + emailB.AppendValues( + []string{"alice@example.com", "notused@example.com", "charlie@example.com"}, + []bool{true, true, true}, + ) + emailArr := emailB.NewArray() + + groupB := array.NewStringBuilder(mem) + groupB.AppendValues([]string{"A", "B", "C"}, []bool{true, true, true}) + groupArr := groupB.NewArray() + + return names, []arrow.Array{idArr, emailArr, groupArr} +} + +// +// ---------- (1) Simple id join tests ---------- +// + +func TestHashJoin_OnSimpleKey(t *testing.T) { + t.Run("inner join on id with SQL NULL semantics", func(t *testing.T) { + left, right := newSources() + + leftExpr := Expr.NewExpressions(Expr.NewColumnResolve("id")) + rightExpr := Expr.NewExpressions(Expr.NewColumnResolve("id")) + clause := NewJoinClause(leftExpr, rightExpr) + + hj, err := NewHashJoinExec(left, right, clause, InnerJoin, nil) + if err != nil { + t.Fatalf("NewHashJoinExec failed: %v", err) + } + defer func() { + if err := hj.Close(); err != nil { + t.Fatalf("HashJoinExec Close failed: %v", err) + } + + }() + + batches := collectAllRows(t, hj) + totalRows := flattenRowCount(batches) + + // Overlap on non-NULL ids is: 1, 2, 4, 5 => 4 rows for inner join. + if totalRows != 4 { + t.Fatalf("expected 4 joined rows, got %d", totalRows) + } + + if len(batches) == 0 { + t.Fatal("expected at least one output batch") + } + first := batches[0] + + leftIDExpr := Expr.NewColumnResolve("left_id") + rightIDExpr := Expr.NewColumnResolve("right_id") + + leftVals, leftValid := evalInt32Slice(t, leftIDExpr, first) + rightVals, rightValid := evalInt32Slice(t, rightIDExpr, first) + + for i := range leftVals { + if !leftValid[i] || !rightValid[i] { + t.Fatalf("unexpected NULL id in joined row %d", i) + } + if leftVals[i] != rightVals[i] { + t.Fatalf("mismatched ids at row %d: left=%d right=%d", + i, leftVals[i], rightVals[i]) + } + } + }) + + t.Run("constructor error on mismatched join clause length", func(t *testing.T) { + left, right := newSources() + + // left has 1 expression, right has 2 → must error + leftExpr := Expr.NewExpressions(Expr.NewColumnResolve("id")) + rightExpr := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewColumnResolve("department"), + ) + + clause := NewJoinClause(leftExpr, rightExpr) + _, err := NewHashJoinExec(left, right, clause, InnerJoin, nil) + if err == nil { + t.Fatal("expected error due to mismatched join expression counts, got nil") + } + if !strings.Contains(err.Error(), "mismatched number of join expressions") { + t.Fatalf("unexpected error: %v", err) + } + }) +} + +// +// ---------- (2) Multi-attribute join tests ---------- +// + +func TestHashJoin_MultiAttributeKey(t *testing.T) { + mem := memory.NewGoAllocator() + leftNames, leftCols := generateMultiAttrLeft(mem) + rightNames, rightCols := generateMultiAttrRight(mem) + + leftSource, _ := project.NewInMemoryProjectExecFromArrays(leftNames, leftCols) + rightSource, _ := project.NewInMemoryProjectExecFromArrays(rightNames, rightCols) + + leftExprs := Expr.NewExpressions( + Expr.NewColumnResolve("first_name"), + Expr.NewColumnResolve("last_name"), + ) + rightExprs := Expr.NewExpressions( + Expr.NewColumnResolve("first_name"), + Expr.NewColumnResolve("last_name"), + ) + clause := NewJoinClause(leftExprs, rightExprs) + + hj, err := NewHashJoinExec(leftSource, rightSource, clause, InnerJoin, nil) + if err != nil { + t.Fatalf("NewHashJoinExec failed: %v", err) + } + + defer func() { + if err := hj.Close(); err != nil { + t.Fatalf("HashJoinExec Close failed: %v", err) + } + + }() + + batches := collectAllRows(t, hj) + totalRows := flattenRowCount(batches) + + // Matches: ("Alice","Smith") and ("Charlie","Stone") => 2 rows. + if totalRows != 2 { + t.Fatalf("expected 2 rows from multi-attribute join, got %d", totalRows) + } + + if len(batches) == 0 { + t.Fatal("expected at least one batch") + } + first := batches[0] + + deptExpr := Expr.NewColumnResolve("department") + arr, err := Expr.EvalExpression(deptExpr, first) + if err != nil { + t.Fatalf("EvalExpression department failed: %v", err) + } + defer arr.Release() + + strArr := arr.(*array.String) + if strArr.Len() != totalRows { + t.Fatalf("expected department array len %d, got %d", totalRows, strArr.Len()) + } + for i := 0; i < strArr.Len(); i++ { + if strArr.IsNull(i) { + t.Fatalf("expected non-null department at row %d", i) + } + } +} + +// +// ---------- (3) "Computed" key join tests ---------- +// + +func TestHashJoin_ComputedKeySimulation(t *testing.T) { + mem := memory.NewGoAllocator() + leftNames, leftCols := generateEmailLeft(mem) + rightNames, rightCols := generateEmailRight(mem) + + leftSource, _ := project.NewInMemoryProjectExecFromArrays(leftNames, leftCols) + rightSource, _ := project.NewInMemoryProjectExecFromArrays(rightNames, rightCols) + + leftExprs := Expr.NewExpressions(Expr.NewColumnResolve("email_lower")) + rightExprs := Expr.NewExpressions(Expr.NewColumnResolve("email_lower")) + clause := NewJoinClause(leftExprs, rightExprs) + + hj, err := NewHashJoinExec(leftSource, rightSource, clause, InnerJoin, nil) + if err != nil { + t.Fatalf("NewHashJoinExec failed: %v", err) + } + + defer func() { + if err := hj.Close(); err != nil { + t.Fatalf("HashJoinExec Close failed: %v", err) + } + + }() + batches := collectAllRows(t, hj) + totalRows := flattenRowCount(batches) + + // Overlap on email_lower: alice + charlie => 2 rows. + if totalRows != 2 { + t.Fatalf("expected 2 joined rows on email_lower, got %d", totalRows) + } + + if len(batches) == 0 { + t.Fatal("expected at least one batch") + } + first := batches[0] + + leftEmailExpr := Expr.NewColumnResolve("left_email_lower") + rightEmailExpr := Expr.NewColumnResolve("right_email_lower") + + leftArr, err := Expr.EvalExpression(leftEmailExpr, first) + if err != nil { + t.Fatalf("EvalExpression left_email_lower failed: %v", err) + } + defer leftArr.Release() + + rightArr, err := Expr.EvalExpression(rightEmailExpr, first) + if err != nil { + t.Fatalf("EvalExpression right_email_lower failed: %v", err) + } + defer rightArr.Release() + + lStr := leftArr.(*array.String) + rStr := rightArr.(*array.String) + + if lStr.Len() != rStr.Len() { + t.Fatalf("expected same length for left/right email arrays, got %d vs %d", + lStr.Len(), rStr.Len()) + } + for i := 0; i < lStr.Len(); i++ { + if lStr.IsNull(i) || rStr.IsNull(i) { + t.Fatalf("unexpected NULL email at row %d", i) + } + } } diff --git a/src/Backend/opti-sql-go/operators/Join/nested.go b/src/Backend/opti-sql-go/operators/Join/nested.go deleted file mode 100644 index 2502e5b..0000000 --- a/src/Backend/opti-sql-go/operators/Join/nested.go +++ /dev/null @@ -1 +0,0 @@ -package join diff --git a/src/Backend/opti-sql-go/operators/Join/nested_test.go b/src/Backend/opti-sql-go/operators/Join/nested_test.go deleted file mode 100644 index 0428968..0000000 --- a/src/Backend/opti-sql-go/operators/Join/nested_test.go +++ /dev/null @@ -1,7 +0,0 @@ -package join - -import "testing" - -func TestNested(t *testing.T) { - // Simple passing test -} diff --git a/src/Backend/opti-sql-go/operators/OPERATORS.md b/src/Backend/opti-sql-go/operators/OPERATORS.md new file mode 100644 index 0000000..1302f6b --- /dev/null +++ b/src/Backend/opti-sql-go/operators/OPERATORS.md @@ -0,0 +1,158 @@ +# Operators — quick reference + +This document gives a concise overview of the operator model used in this repository, how to construct the most common operators, and what each operator's constructor expects and why. Placeholders like `Expr.Expression` and `RecordBatch` refer to the repository types found under `Expr` and `operators/record.go`. + +## What is an Operator? + +An operator implements the `operators.Operator` interface: + +- `Next(n uint16) (*operators.RecordBatch, error)` — return up to `n` rows (many operators ignore the exact n and read/produce what they need). Returns `io.EOF` when finished. +- `Schema() *arrow.Schema` — the operator's output schema. +- `Close() error` — release resources (files, network handles, etc.). + +The basic data unit is `operators.RecordBatch` (schema + Arrow arrays + rowcount). Operators compose: the output of one operator becomes the input (child) of the next. + +## Leaf (source) operators + +Leaf operators are the pipeline entry points. They read data from some storage and produce `RecordBatch` values. + +- CSV source + - Constructor: `project.NewProjectCSVLeaf(io.Reader)` + - Inputs: an `io.Reader` (file, buffer). Produces typed Arrow arrays from CSV columns. + - Notes: simple, fast for local CSVs. Use when you want a streaming CSV source. + +- Parquet source + - Constructor: (parquet reader; see project package) + - Inputs: parquet file handle. Produces Arrow arrays preserving parquet types. + +- In-memory source + - Constructor: `project.NewInMemoryProjectExec(names []string, columns []any)` + - Inputs: column names and Go slices (used heavily in unit tests). + - Notes: useful for deterministic test inputs and small-memory datasets. + +- S3 / NetworkResource + - use `project.NewStreamReader` to create a network file reader. this just means it allows chunk reading of files not on local disk. + - Notes: the repository supports reading remote files; a configuration option lets you download the full remote file first to avoid per-request network latency when the operator needs repeated random access (e.g., for Parquet or when sorting). This is exposed as a NetworkResource / download option in the project/source constructors. + - the result of `project.NewStreamReader(fileName)` can be passed directly to `project.NewProjectCSVLeaf(io.Reader)` and `project.NewParquetSource(readSeeker)`. This was intentional so its seemless to work with s3 files as possible + +## How to construct operators — summary of common operators + +The pattern is consistent: each operator has a `NewXxx...` constructor that takes one or more child operators, expression descriptors, or configuration params. + +### Project (Select) +- Constructor: `project.NewProjectExec(child operators.Operator, exprs []Expr.Expression)` +- Purpose: evaluate a list of projection expressions (column refs, scalar functions, aliases) and return a batch with only the requested columns. +- What to pass in: + - `child` — the input operator to project from (leaf or intermediate op). + - `exprs` — expressions created with `Expr.NewColumnResolve`, `Expr.NewLiteralResolve`, `Expr.NewAlias`, `Expr.NewScalarFunction`, etc. +- Why: keeps expression evaluation centralized and lets downstream operators work with a narrow schema. + +### Filter +- Constructor: `filter.NewFilterExec(child operators.Operator, predicate Expr.Expression)` +- Purpose: apply boolean predicates to input rows and emit only matching rows. +- What to pass in: + - `child` — operator producing input rows. + - `predicate` — an `Expr.Expression` that evaluates to boolean (can combine binary operators, scalar functions, null checks). +- Why: decouples predicate evaluation from projection and other operators; filter may buffer results across batches to serve limit-like requests. + +### Limit +- Constructor: `filter.NewLimitExec(child operators.Operator, limit uint64)` +- Purpose: stop the pipeline after `limit` rows are emitted. +- What to pass in: the `child` operator and the numeric `limit`. +- Why: simple consumer-side cap; implemented as a thin operator above any child. + +### Distinct +- Constructor: `filter.NewDistinctExec(child operators.Operator, colExprs []Expr.Expression)` +- Purpose: remove duplicate rows on the selected key columns. +- What to pass in: `child` and the list of key column expressions. +- Why: used to produce unique values for a given set of columns; often followed by `Sort` for deterministic order. + +### Sort / TopK +- Constructors: + - `aggr.NewSortExec(child operators.Operator, sortKeys []aggr.SortKey)` — fully sorts input + - `aggr.NewTopKSortExec(child operators.Operator, sortKeys []aggr.SortKey, k uint16)` — keep top-k +- Purpose: order rows by one or more columns. +- What to pass in: + - `child` — input operator + - `sortKeys` — built with `aggr.NewSortKey(expr Expr.Expression, asc bool)`; multiple keys are combined with `aggr.CombineSortKeys(...)`. +- Why: some consumers require sorted input (ORDER BY) or only the top-k entries (TopK). +- Notes: current implementations read data into memory and sort; care must be taken for large datasets. + +### GroupBy / Aggregation +- Constructors: + - `aggr.NewGroupByExec(child operators.Operator, groupExpr []aggr.AggregateFunctions, groupBy []Expr.Expression)` — group-by with aggregates + - `aggr.NewGlobalAggrExec(child operators.Operator, aggExprs []aggr.AggregateFunctions)` — global aggregation (no GROUP BY) +- Purpose: compute aggregates (SUM, AVG, COUNT, MIN, MAX) grouped by one or more columns. +- What to pass in: + - `child` — input operator + - `groupExpr` / `aggExprs` — list of `aggr.AggregateFunctions` (built with `aggr.NewAggregateFunctions(aggr.AggrFunc, Expr.Expression)`) describing the aggregate function and its child expression (usually a column). + - `groupBy` — expressions for the group-by keys (column resolves). +- Why: central place for aggregator logic; constructors validate types (numeric types for SUM/AVG) and construct the output schema. + +### Join (HashJoin) +- Constructor: `join.NewHashJoinExec(left, right operators.Operator, clause join.JoinClause, joinType join.JoinType, filters []Expr.Expression)` +- Purpose: perform hash-based joins (Inner, Left, Right). +- What to pass in: + - `left`, `right` — child operators for the two sides of the join (usually scans or projections) + - `clause` — `join.NewJoinClause(leftExprs []Expr.Expression, rightExprs []Expr.Expression)` describing which columns pair together (supports multiple equality clauses) + - `joinType` — `join.InnerJoin`, `join.LeftJoin`, etc. + - `filters` — optional post-join filters (not always used) | still need to implement this but no time soon, as these can just be treated as Filter Opererations +- Why: joins combine rows from two inputs. The constructor validates schema compatibility and builds the combined output schema (prefixing duplicate column names with `left_`/`right_`). +- Implementation notes: the HashJoin reads the entirety of both children (current implementation) into memory and builds a hash table on the right side for probing. + +## Common constructor patterns & rationale + +- Child operator(s) always come first: most operators are constructed around one input (`child`) or two (`left`, `right`). This makes pipelines composable. +- Expressions are passed as `Expr.Expression` objects. Use the `Expr` package helpers to build column resolves, literals, scalar functions, binary operators and aliases. +- Constructors perform validation: type checking for aggregates, matching # of join expressions, or validity of projection expressions — this fails fast at construction time instead of at runtime. +- Many blocking operators (Sort, GroupBy, Join) read the full input before producing output. Be careful with large inputs — these operators are not yet externalized (spill-to-disk) and may require configuration or chunking for large datasets. + +## Practical examples (pseudocode) + +- Project + Filter + Limit pipeline: + +```go +src := project.NewProjectCSVLeaf(fileReader) +pred := Expr.NewBinaryExpr(Expr.NewColumnResolve("age"), Expr.GreaterThan, Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int64, 30)) +filt, _ := filter.NewFilterExec(src, pred) +projExprs := Expr.NewExpressions(Expr.NewColumnResolve("id"), Expr.NewColumnResolve("name")) +proj, _ := project.NewProjectExec(filt, projExprs) +lim, _ := filter.NewLimitExec(proj, 10) +batch, _ := lim.Next(10) +``` + +- GroupBy example: + +```go +col := func(n string) Expr.Expression { return Expr.NewColumnResolve(n) } +aggs := []aggr.AggregateFunctions{{AggrFunc: aggr.Sum, Child: col("salary")}} +gb, _ := aggr.NewGroupByExec(src, aggs, []Expr.Expression{col("department")}) +result, _ := gb.Next(1000) +``` + +- HashJoin example (equality on `id`): + +```go +clause := join.NewJoinClause([]Expr.Expression{Expr.NewColumnResolve("id")}, []Expr.Expression{Expr.NewColumnResolve("id")}) +j, _ := join.NewHashJoinExec(leftSrc, rightSrc, clause, join.InnerJoin, nil) +batch, _ := j.Next(100) +``` + +## Notes & best practices + +- Always call `Close()` on the root operator when done (after `Next` returns `io.EOF`) to release files and network handles. +- Use `project.NewInMemoryProjectExec` for tests — it builds reproducible `RecordBatch` inputs quickly. +- When writing pipelines that may read remote files, prefer to configure the source to download the whole file if the operator will need random access or many read passes (sorting, joining, grouping). This avoids repeated network calls and unpredictable latency. +- Watch out for duplicate column names after joins: the join constructor prefixes with `left_`/`right_` when needed. + +## Where to look next in the codebase +- `operators/record.go` — `Operator` interface and `RecordBatch` helpers (builder, PrettyPrint). +- `operators/project/` — project implementations and CSV/parquet readers. +- `operators/filter/` — Filter, Limit, Distinct operator implementations. +- `operators/aggr/` — Sort, TopK, GroupBy and aggregate implementations. +- `operators/Join/` — HashJoin implementation. + +Reading the tests +----------------- + +For concrete examples of how SQL statements map to operator pipelines, read the integration/unit tests in `operators/test/` (and other test files under `operators/`). The tests build real pipelines (CSV/InMemory -> Filter/Project/Join/GroupBy/Sort/etc.) and show the exact constructor calls and expressions used to represent SQL queries. They are the best source of truth for small end-to-end examples. \ No newline at end of file diff --git a/src/Backend/opti-sql-go/operators/aggr/avgExec.go b/src/Backend/opti-sql-go/operators/aggr/avgExec.go deleted file mode 100644 index abd1ad5..0000000 --- a/src/Backend/opti-sql-go/operators/aggr/avgExec.go +++ /dev/null @@ -1 +0,0 @@ -package aggr diff --git a/src/Backend/opti-sql-go/operators/aggr/avgExec_test.go b/src/Backend/opti-sql-go/operators/aggr/avgExec_test.go deleted file mode 100644 index 67671d0..0000000 --- a/src/Backend/opti-sql-go/operators/aggr/avgExec_test.go +++ /dev/null @@ -1,7 +0,0 @@ -package aggr - -import "testing" - -func TestAvgExec(t *testing.T) { - // Simple passing test -} diff --git a/src/Backend/opti-sql-go/operators/aggr/basicAggr.go b/src/Backend/opti-sql-go/operators/aggr/basicAggr.go deleted file mode 100644 index 0ffa1f3..0000000 --- a/src/Backend/opti-sql-go/operators/aggr/basicAggr.go +++ /dev/null @@ -1,5 +0,0 @@ -package aggr - -// Min -//Max -//Count diff --git a/src/Backend/opti-sql-go/operators/aggr/basicAggr_test.go b/src/Backend/opti-sql-go/operators/aggr/basicAggr_test.go deleted file mode 100644 index 7a59206..0000000 --- a/src/Backend/opti-sql-go/operators/aggr/basicAggr_test.go +++ /dev/null @@ -1,7 +0,0 @@ -package aggr - -import "testing" - -func TestBasicAggr(t *testing.T) { - // Simple passing test -} diff --git a/src/Backend/opti-sql-go/operators/aggr/groupBy.go b/src/Backend/opti-sql-go/operators/aggr/groupBy.go index abd1ad5..7ca86ea 100644 --- a/src/Backend/opti-sql-go/operators/aggr/groupBy.go +++ b/src/Backend/opti-sql-go/operators/aggr/groupBy.go @@ -1 +1,458 @@ package aggr + +import ( + "errors" + "fmt" + "io" + "opti-sql-go/Expr" + "opti-sql-go/operators" + "strings" + + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/memory" +) + +/* +rules for group by: +1.Every non-aggregated column in SELECT must be in GROUP BY +2.You can group by multiple columns - creates groups for each unique combination +3.Use HAVING to filter groups (WHERE filters before grouping, HAVING filters after) +*/ +var ( + _ = (operators.Operator)(&GroupByExec{}) +) + +// place all unique elements of the group by column into a hash table, each element gets their own Accumulator instance +type GroupByExec struct { + input operators.Operator + schema *arrow.Schema + groupExpr []AggregateFunctions + groupByExpr []Expr.Expression // column names + + groups map[string][]accumulator // maps group by key to its accumulator + keys map[string][]string // key → original values for output + done bool +} + +func NewGroupByExec(child operators.Operator, groupExpr []AggregateFunctions, groupBy []Expr.Expression) (*GroupByExec, error) { + s, err := buildGroupBySchema(child.Schema(), groupBy, groupExpr) + if err != nil { + return nil, err + } + + return &GroupByExec{ + input: child, + schema: s, + groupExpr: groupExpr, + groupByExpr: groupBy, + keys: make(map[string][]string), + groups: make(map[string][]accumulator), + }, nil +} + +/* +grab child rows +*/ +func (g *GroupByExec) Next(batchSize uint16) (*operators.RecordBatch, error) { + if g.done { + return nil, io.EOF + } + + for { + childBatch, err := g.input.Next(batchSize) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, err + } + + rowCount := int(childBatch.RowCount) + + // 1. evaluate all group-by expressions into arrays + groupArrays := make([]arrow.Array, len(g.groupByExpr)) + for i, expr := range g.groupByExpr { + arr, err := Expr.EvalExpression(expr, childBatch) + if err != nil { + operators.ReleaseArrays(groupArrays) + operators.ReleaseArrays(childBatch.Columns) + return nil, err + } + groupArrays[i] = arr + } + + // 2. evaluate all aggregation child expressions + aggrArrays := make([]arrow.Array, len(g.groupExpr)) + for i, agg := range g.groupExpr { + arr, err := Expr.EvalExpression(agg.Child, childBatch) + if err != nil { + operators.ReleaseArrays(aggrArrays) + operators.ReleaseArrays(groupArrays) + operators.ReleaseArrays(childBatch.Columns) + return nil, err + } + arr, err = castArrayToFloat64(arr) + if err != nil { + operators.ReleaseArrays(aggrArrays) + operators.ReleaseArrays(groupArrays) + operators.ReleaseArrays(childBatch.Columns) + return nil, err + } + aggrArrays[i] = arr + } + + // 3. process rows + for row := 0; row < rowCount; row++ { + + // Build group key + keyParts := make([]string, len(groupArrays)) + for j, arr := range groupArrays { + if arr.IsNull(row) { + keyParts[j] = "NULL" + } else { + keyParts[j] = fmt.Sprintf("%v", getValue(arr, row)) + } + } + key := strings.Join(keyParts, "|") + // Allocate accumulator list if new group + if _, exists := g.groups[key]; !exists { + g.groups[key] = make([]accumulator, len(g.groupExpr)) + for i, agg := range g.groupExpr { + g.groups[key][i] = createAccumulator(agg.AggrFunc) + } + g.keys[key] = keyParts // store original values + } + + // UPDATE accumulators + for i, arr := range aggrArrays { + if arr.IsNull(row) { + continue + } + val := arr.(*array.Float64).Value(row) + g.groups[key][i].Update(val) + } + } + // 4. release temp arrays + operators.ReleaseArrays(aggrArrays) + operators.ReleaseArrays(groupArrays) + operators.ReleaseArrays(childBatch.Columns) + } + + // 4. Build output RecordBatch + batch := buildGroupByOutput(g) + + g.done = true + return batch, nil +} + +func (g *GroupByExec) Schema() *arrow.Schema { + return g.schema +} +func (g *GroupByExec) Close() error { + return g.input.Close() +} + +// handles validation and building of schema for group by +func buildGroupBySchema(childSchema *arrow.Schema, groupByExpr []Expr.Expression, aggrExprs []AggregateFunctions) (*arrow.Schema, error) { + + fields := make([]arrow.Field, 0, len(groupByExpr)+len(aggrExprs)) + + // 1. Add group-by columns + for _, expr := range groupByExpr { + dt, err := Expr.ExprDataType(expr, childSchema) + if err != nil { + return nil, fmt.Errorf("group-by expr %s has invalid type: %w", expr.String(), err) + } + + fields = append(fields, arrow.Field{ + Name: fmt.Sprintf("group_%s", expr.String()), + Type: dt, + Nullable: true, + }) + } + + // 2. Add aggregate columns + for _, agg := range aggrExprs { + dt, err := Expr.ExprDataType(agg.Child, childSchema) + if err != nil || !validAggrType(dt) { + return nil, ErrInvalidAggrColumnType(dt) + } + // All aggregates produce float64 + fieldName := fmt.Sprintf("%s_%s", + strings.ToLower(aggrToString(int(agg.AggrFunc))), + agg.Child.String(), + ) + + fields = append(fields, arrow.Field{ + Name: fieldName, + Type: arrow.PrimitiveTypes.Float64, + Nullable: false, + }) + } + + return arrow.NewSchema(fields, nil), nil +} + +func getValue(arr arrow.Array, row int) any { + switch col := arr.(type) { + case *array.Int32: + return col.Value(row) + case *array.Int64: + return col.Value(row) + case *array.Float32: + return col.Value(row) + case *array.Float64: + return col.Value(row) + case *array.String: + return col.Value(row) + case *array.Boolean: + return col.Value(row) + default: + // fallback – debug only + return fmt.Sprintf("%v", col) + } +} +func createAccumulator(fn AggrFunc) accumulator { + switch fn { + case Min: + return newMinAggr() + case Max: + return newMaxAggr() + case Sum: + return newSumAggr() + case Count: + return newCountAggr() + case Avg: + return newAvgAggr() + default: + panic(fmt.Sprintf("unsupported aggregate function: %v", fn)) + } +} + +func buildGroupByOutput(g *GroupByExec) *operators.RecordBatch { + alloc := memory.NewGoAllocator() + + rowCount := len(g.groups) + if rowCount == 0 { + return &operators.RecordBatch{ + Schema: g.schema, + Columns: []arrow.Array{}, + RowCount: 0, + } + } + + // Prepare column builders + colBuilders := make([]arrow.Array, len(g.schema.Fields())) + + // Temporary storage for columns + groupCols := make([][]any, len(g.groupByExpr)) // group columns + aggrCols := make([][]float64, len(g.groupExpr)) // aggregate columns + + for i := range groupCols { + groupCols[i] = make([]any, 0, rowCount) + } + for i := range aggrCols { + aggrCols[i] = make([]float64, 0, rowCount) + } + + for key, accs := range g.groups { + // Add group-by (dimension) values + dims := g.keys[key] + for j, v := range dims { + groupCols[j] = append(groupCols[j], v) + } + + // Add aggregated values + for j, acc := range accs { + aggrCols[j] = append(aggrCols[j], acc.Finalize()) + } + + } + + // Now build Arrow arrays in correct schema order + fieldIndex := 0 + + // Build group-by columns first + for j := range g.groupByExpr { + colBuilders[fieldIndex] = buildDynamicArray(alloc, g.schema.Field(fieldIndex).Type, groupCols[j]) + fieldIndex++ + } + + // Build aggregate columns + for j := range g.groupExpr { + colBuilders[fieldIndex] = buildFloatArray(alloc, aggrCols[j]) + fieldIndex++ + } + + return &operators.RecordBatch{ + Schema: g.schema, + Columns: colBuilders, + RowCount: uint64(rowCount), + } +} +func buildDynamicArray(mem memory.Allocator, dt arrow.DataType, values []any) arrow.Array { + switch dt.ID() { + + // =========================== + // STRING (UTF8) + // =========================== + case arrow.STRING: + sb := array.NewStringBuilder(mem) + for _, v := range values { + if v == nil { + sb.AppendNull() + } else { + sb.Append(fmt.Sprintf("%v", v)) + } + } + return sb.NewArray() + + // =========================== + // SIGNED INTEGERS + // =========================== + case arrow.INT8: + b := array.NewInt8Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(int8)) + } + } + return b.NewArray() + + case arrow.INT16: + b := array.NewInt16Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(int16)) + } + } + return b.NewArray() + + case arrow.INT32: + b := array.NewInt32Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(int32)) + } + } + return b.NewArray() + + case arrow.INT64: + b := array.NewInt64Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(int64)) + } + } + return b.NewArray() + + // =========================== + // UNSIGNED INTEGERS + // =========================== + case arrow.UINT8: + b := array.NewUint8Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(uint8)) + } + } + return b.NewArray() + + case arrow.UINT16: + b := array.NewUint16Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(uint16)) + } + } + return b.NewArray() + + case arrow.UINT32: + b := array.NewUint32Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(uint32)) + } + } + return b.NewArray() + + case arrow.UINT64: + b := array.NewUint64Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(uint64)) + } + } + return b.NewArray() + + // =========================== + // FLOATS + // =========================== + case arrow.FLOAT32: + b := array.NewFloat32Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(float32)) + } + } + return b.NewArray() + + case arrow.FLOAT64: + b := array.NewFloat64Builder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(v.(float64)) + } + } + return b.NewArray() + + // =========================== + // UNSUPPORTED TYPE + // =========================== + case arrow.BOOL: + b := array.NewBooleanBuilder(mem) + for _, v := range values { + if v == nil { + b.AppendNull() + } else { + b.Append(castToBool(v)) + } + } + return b.NewArray() + default: + panic(fmt.Sprintf("unsupported dynamic array type: %v", dt)) + } +} + +func buildFloatArray(mem memory.Allocator, values []float64) arrow.Array { + b := array.NewFloat64Builder(mem) + b.AppendValues(values, nil) + return b.NewArray() +} +func castToBool(v any) bool { + if v == "true" || v == true { + return true + } + return false +} diff --git a/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go b/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go index 3313b3e..41434ac 100644 --- a/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go +++ b/src/Backend/opti-sql-go/operators/aggr/groupBy_test.go @@ -1,7 +1,678 @@ package aggr -import "testing" +import ( + "errors" + "fmt" + "io" + "opti-sql-go/Expr" + "opti-sql-go/operators/project" + "strings" + "testing" -func TestGroupBy(t *testing.T) { - // Simple passing test + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/memory" +) + +func generateGroupByTestColumns() ([]string, []any) { + names := []string{ + "id", + "name", + "department", + "region", + "seniority", + "salary", + "age", + } + + // 40 IDs + ids := make([]int32, 40) + for i := range ids { + ids[i] = int32(i + 1) + } + + // Names – 40 names + namesArr := []string{ + "Alice", "Bob", "Charlie", "David", "Eve", + "Frank", "Grace", "Hannah", "Ivy", "Jake", + "Karen", "Leo", "Mona", "Nate", "Olive", + "Paul", "Quinn", "Rita", "Sam", "Tina", + "Uma", "Victor", "Wendy", "Xavier", "Yara", + "Zane", "Becky", "Carlos", "Dora", "Elias", + "Fiona", "Gabe", "Helena", "Isaac", "Julia", + "Kevin", "Lara", "Miles", "Nora", "Owen", + } + + // Randomized but balanced departments (5 groups) + departments := []string{ + "Engineering", "HR", "Sales", "Engineering", "Finance", + "Support", "Sales", "Engineering", "Support", "Finance", + "HR", "Engineering", "Sales", "Support", "Finance", + "Engineering", "Sales", "HR", "Support", "Engineering", + "Finance", "Sales", "Engineering", "Support", "HR", + "Support", "Engineering", "Finance", "Sales", "HR", + "Engineering", "Support", "Finance", "Sales", "Engineering", + "HR", "Finance", "Support", "Engineering", "Sales", + } + + // Randomized but balanced regions (4 groups) + regions := []string{ + "North", "East", "South", "West", "South", + "North", "West", "East", "North", "South", + "West", "East", "North", "South", "West", + "North", "East", "West", "South", "North", + "East", "West", "South", "North", "East", + "South", "North", "West", "East", "South", + "West", "North", "East", "South", "West", + "North", "South", "East", "West", "North", + } + + // Randomized seniority (3 groups) + seniority := []string{ + "Junior", "Senior", "Mid", "Junior", "Mid", + "Senior", "Junior", "Mid", "Senior", "Junior", + "Mid", "Senior", "Junior", "Mid", "Senior", + "Junior", "Mid", "Senior", "Junior", "Mid", + "Senior", "Junior", "Mid", "Senior", "Junior", + "Mid", "Senior", "Junior", "Mid", "Senior", + "Junior", "Mid", "Senior", "Junior", "Mid", + "Senior", "Junior", "Mid", "Senior", "Junior", + } + + // Salaries (same as before) + salaries := []float64{ + 70000, 82000, 54000, 91000, 60000, + 75000, 66000, 88000, 45000, 99000, + 72000, 81000, 53000, 86000, 64000, + 93000, 68000, 76000, 89000, 71000, + 83000, 94000, 55000, 87000, 91500, + 72000, 69000, 58000, 84000, 79000, + 81000, 78000, 62000, 97000, 82000, + 95000, 76000, 88000, 91000, 64000, + } + + // Ages with some repetition + ages := []int32{ + 28, 34, 45, 22, 31, + 29, 40, 36, 50, 26, + 33, 41, 27, 38, 24, + 46, 30, 35, 43, 32, + 39, 48, 29, 37, 42, + 28, 34, 45, 22, 31, + 29, 40, 36, 50, 26, + 39, 48, 29, 37, 42, + } + + columns := []any{ + ids, + namesArr, + departments, + regions, + seniority, + salaries, + ages, + } + + return names, columns +} + +func groupByProject() *project.InMemorySource { + names, cols := generateGroupByTestColumns() + p, _ := project.NewInMemoryProjectExec(names, cols) + return p +} + +func TestGroupByInit(t *testing.T) { + p := groupByProject() + _, _ = p.Next(12) +} + +func TestNewGroupByExecAndSchema(t *testing.T) { + // convenience builder + col := func(name string) Expr.Expression { + return Expr.NewColumnResolve(name) + } + + t.Run("single group-by single aggregate", func(t *testing.T) { + child := groupByProject() + + groupBy := []Expr.Expression{col("department")} + aggs := []AggregateFunctions{ + {AggrFunc: Sum, Child: col("salary")}, + } + + gb, err := NewGroupByExec(child, aggs, groupBy) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + schema := gb.Schema() + if schema == nil { + t.Fatalf("schema should not be nil") + } + + // group-by + 1 agg = 2 fields + if got, want := schema.NumFields(), 2; got != want { + t.Fatalf("expected %d fields, got %d", want, got) + } + + // group field + f0 := schema.Field(0) + expName := "group_" + groupBy[0].String() + if f0.Name != expName { + t.Fatalf("expected group field name %q, got %q", expName, f0.Name) + } + + // aggregate field + f1 := schema.Field(1) + properAggName := fmt.Sprintf("%s_%s", + strings.ToLower(aggrToString(int(aggs[0].AggrFunc))), + aggs[0].Child.String(), + ) + if f1.Name != properAggName { + t.Fatalf("expected agg field %q, got %q", properAggName, f1.Name) + } + + if gb.groups == nil { + t.Fatalf("groups map not initialized") + } + if gb.keys == nil { + t.Fatalf("keys map not initialized") + } + }) + + t.Run("multiple group-by and multiple aggregates", func(t *testing.T) { + child := groupByProject() + + groupBy := []Expr.Expression{col("region"), col("seniority")} + aggs := []AggregateFunctions{ + {AggrFunc: Min, Child: col("age")}, + {AggrFunc: Max, Child: col("salary")}, + {AggrFunc: Count, Child: col("id")}, + } + + gb, err := NewGroupByExec(child, aggs, groupBy) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + schema := gb.Schema() + wantFields := len(groupBy) + len(aggs) + if schema.NumFields() != wantFields { + t.Fatalf("expected %d fields, got %d", wantFields, schema.NumFields()) + } + + // group fields first + for i, gexpr := range groupBy { + f := schema.Field(i) + exp := "group_" + gexpr.String() + if f.Name != exp { + t.Fatalf("group field[%d] mismatch: want %q got %q", i, exp, f.Name) + } + } + + // aggregate fields next + offset := len(groupBy) + for j, agg := range aggs { + f := schema.Field(offset + j) + expAggName := fmt.Sprintf("%s_%s", + strings.ToLower(aggrToString(int(agg.AggrFunc))), + agg.Child.String(), + ) + if f.Name != expAggName { + t.Fatalf("agg field name mismatch: want %q got %q", expAggName, f.Name) + } + } + }) + + t.Run("invalid group-by column triggers error", func(t *testing.T) { + child := groupByProject() + + invalidGB := []Expr.Expression{col("not_a_col")} + aggs := []AggregateFunctions{ + {AggrFunc: Sum, Child: col("salary")}, + } + + // direct schema builder test + _, err := buildGroupBySchema(child.Schema(), invalidGB, aggs) + if err == nil { + t.Fatalf("expected error for invalid group-by expr") + } + + // NewGroupByExec should also fail + if _, err := NewGroupByExec(child, aggs, invalidGB); err == nil { + t.Fatalf("expected NewGroupByExec error for invalid group-by") + } + }) + + t.Run("no aggregates - schema should only contain group-by columns", func(t *testing.T) { + child := groupByProject() + + groupBy := []Expr.Expression{col("region")} + var aggs []AggregateFunctions + + gb, err := NewGroupByExec(child, aggs, groupBy) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + schema := gb.Schema() + + if schema.NumFields() != 1 { + t.Fatalf("expected 1 field, got %d", schema.NumFields()) + } + + f := schema.Field(0) + exp := "group_" + groupBy[0].String() + if f.Name != exp { + t.Fatalf("wrong group field name: want %q got %q", exp, f.Name) + } + }) + + t.Run("multiple aggregates produce float64 regardless of source type", func(t *testing.T) { + child := groupByProject() + + groupBy := []Expr.Expression{col("department")} + aggs := []AggregateFunctions{ + {AggrFunc: Avg, Child: col("age")}, // int32 → float64 + {AggrFunc: Sum, Child: col("salary")}, // float64 → float64 + } + + gb, err := NewGroupByExec(child, aggs, groupBy) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + schema := gb.Schema() + + // group-by + 2 aggregates = 3 + if schema.NumFields() != 3 { + t.Fatalf("expected 3 fields, got %d", schema.NumFields()) + } + + for idx := 1; idx < 3; idx++ { + f := schema.Field(idx) + if f.Type.ID() != arrow.FLOAT64 { + t.Fatalf("expected field[%d] to be float64, got %v", idx, f.Type) + } + } + }) + + t.Run("schema names must match exact string() output of expressions", func(t *testing.T) { + child := groupByProject() + + gbExpr := []Expr.Expression{ + Expr.NewColumnResolve("seniority"), + Expr.NewColumnResolve("region"), + } + aggs := []AggregateFunctions{ + {AggrFunc: Count, Child: Expr.NewColumnResolve("id")}, + } + + gb, err := NewGroupByExec(child, aggs, gbExpr) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + schema := gb.Schema() + + expected0 := "group_" + gbExpr[0].String() // group_Column(seniority) + expected1 := "group_" + gbExpr[1].String() // group_Column(region) + + if schema.Field(0).Name != expected0 { + t.Fatalf("wrong field[0] name: want %q got %q", expected0, schema.Field(0).Name) + } + if schema.Field(1).Name != expected1 { + t.Fatalf("wrong field[1] name: want %q got %q", expected1, schema.Field(1).Name) + } + + // count column + expectedAgg := "count_" + aggs[0].Child.String() + if schema.Field(2).Name != expectedAgg { + t.Fatalf("wrong agg field name: want %q got %q", expectedAgg, schema.Field(2).Name) + } + }) + t.Run("basic close check", func(t *testing.T) { + child := groupByProject() + + gbExpr := []Expr.Expression{ + Expr.NewColumnResolve("seniority"), + Expr.NewColumnResolve("region"), + } + aggs := []AggregateFunctions{ + {AggrFunc: Count, Child: Expr.NewColumnResolve("id")}, + } + + gb, err := NewGroupByExec(child, aggs, gbExpr) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if gb.Close() != nil { + t.Fatalf("unexpected error on close") + } + + }) +} +func TestBasicOperatorCasesGroupBy(t *testing.T) { + + t.Run("basic close check", func(t *testing.T) { + child := groupByProject() + + gbExpr := []Expr.Expression{ + Expr.NewColumnResolve("seniority"), + Expr.NewColumnResolve("region"), + } + aggs := []AggregateFunctions{ + {AggrFunc: Count, Child: Expr.NewColumnResolve("id")}, + } + + gb, err := NewGroupByExec(child, aggs, gbExpr) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if gb.Close() != nil { + t.Fatalf("unexpected error on close") + } + + }) + t.Run("done case", func(t *testing.T) { + child := groupByProject() + + gbExpr := []Expr.Expression{ + Expr.NewColumnResolve("seniority"), + Expr.NewColumnResolve("region"), + } + aggs := []AggregateFunctions{ + {AggrFunc: Count, Child: Expr.NewColumnResolve("id")}, + } + + gb, err := NewGroupByExec(child, aggs, gbExpr) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + gb.done = true + _, err = gb.Next(100) + if err == nil || !errors.Is(err, io.EOF) { + t.Fatalf("expected EOF but received %v", err) + } + + }) +} +func TestGroupByNext_SingleColumnCount(t *testing.T) { + col := func(n string) Expr.Expression { return Expr.NewColumnResolve(n) } + + child := groupByProject() + + gbExpr := []Expr.Expression{col("region")} + aggs := []AggregateFunctions{ + {AggrFunc: Count, Child: col("id")}, + } + + gb, err := NewGroupByExec(child, aggs, gbExpr) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + batch, _ := gb.Next(1000) + + if batch == nil || batch.RowCount == 0 { + t.Fatalf("expected non-empty grouped result") + } + + // Validate schema + if batch.Schema.NumFields() != 2 { + t.Fatalf("expected 2 fields, got %d", batch.Schema.NumFields()) + } + + // Validate that group keys exist and aggregates exist + if batch.Columns[0].Len() == 0 { + t.Fatalf("expected region groups") + } + + if batch.Columns[1].Len() == 0 { + t.Fatalf("expected aggregated counts") + } +} + +func TestGroupByNext_MultipleGroupBy_MultipleAggs(t *testing.T) { + col := func(n string) Expr.Expression { return Expr.NewColumnResolve(n) } + + child := groupByProject() + + gbExpr := []Expr.Expression{ + col("seniority"), + col("region"), + } + + aggs := []AggregateFunctions{ + {AggrFunc: Min, Child: col("age")}, + {AggrFunc: Max, Child: col("salary")}, + {AggrFunc: Count, Child: col("id")}, + } + + gb, err := NewGroupByExec(child, aggs, gbExpr) + if err != nil { + t.Fatal(err) + } + + batch, _ := gb.Next(50) + + if batch.RowCount == 0 { + t.Fatalf("expected non-zero grouped rows") + } + + if batch.Schema.NumFields() != 5 { + t.Fatalf("expected 5 fields (2 group-by + 3 aggr), got %d", batch.Schema.NumFields()) + } +} + +func TestGroupByNext_MultipleNextCalls(t *testing.T) { + col := func(n string) Expr.Expression { return Expr.NewColumnResolve(n) } + + child := groupByProject() + + gbExpr := []Expr.Expression{col("region")} + aggs := []AggregateFunctions{ + {AggrFunc: Sum, Child: col("salary")}, + } + + gb, err := NewGroupByExec(child, aggs, gbExpr) + if err != nil { + t.Fatal(err) + } + + // First call returns batch + EOF + _, _ = gb.Next(100) + _, err = gb.Next(100) + if !errors.Is(err, io.EOF) { + t.Fatalf("expected EOF on second return, got %v", err) + } + +} + +func TestBuildGroupBySchema_AllBranches(t *testing.T) { + col := func(n string) Expr.Expression { return Expr.NewColumnResolve(n) } + + child := groupByProject() + + groupBy := []Expr.Expression{col("region"), col("seniority")} + aggs := []AggregateFunctions{ + {AggrFunc: Sum, Child: col("salary")}, + {AggrFunc: Count, Child: col("id")}, + } + + schema, err := buildGroupBySchema(child.Schema(), groupBy, aggs) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if schema.NumFields() != 4 { + t.Fatalf("expected 4 fields got %d", schema.NumFields()) + } + + // test group-by fields + if schema.Field(0).Type.ID() != arrow.STRING { + t.Fatalf("expected STRING for region") + } + + // aggregated fields always float64 + if schema.Field(2).Type.ID() != arrow.FLOAT64 { + t.Fatalf("expected FLOAT64 for aggregate field") + } +} + +func TestBuildGroupBySchema_InvalidColumn(t *testing.T) { + col := func(n string) Expr.Expression { return Expr.NewColumnResolve(n) } + child := groupByProject() + + _, err := buildGroupBySchema(child.Schema(), []Expr.Expression{col("doesnotexist")}, nil) + if err == nil { + t.Fatalf("expected error but got none") + } +} + +func TestBuildGroupBySchema_InvalidAggType(t *testing.T) { + col := func(n string) Expr.Expression { return Expr.NewColumnResolve(n) } + child := groupByProject() + + aggs := []AggregateFunctions{ + // Boolean type or unsupported type + {AggrFunc: Sum, Child: col("name")}, + } + + _, err := buildGroupBySchema(child.Schema(), nil, aggs) + if err == nil { + t.Fatalf("expected invalid agg type error") + } +} +func TestGetValue_AllTypes(t *testing.T) { + mem := memory.NewGoAllocator() + + // int32 + i32 := array.NewInt32Builder(mem) + i32.Append(42) + arr32 := i32.NewArray() + if getValue(arr32, 0).(int32) != 42 { + t.Fatal("failed int32 case") + } + + // int64 + i64 := array.NewInt64Builder(mem) + i64.Append(99) + arr64 := i64.NewArray() + if getValue(arr64, 0).(int64) != 99 { + t.Fatal("failed int64 case") + } + + // float32 + f32 := array.NewFloat32Builder(mem) + f32.Append(3.5) + arrf32 := f32.NewArray() + if getValue(arrf32, 0).(float32) != 3.5 { + t.Fatal("failed float32 case") + } + + // float64 + f64 := array.NewFloat64Builder(mem) + f64.Append(9.1) + arrf64 := f64.NewArray() + if getValue(arrf64, 0).(float64) != 9.1 { + t.Fatal("failed float64 case") + } + + // string + sb := array.NewStringBuilder(mem) + sb.Append("hello") + sarr := sb.NewArray() + if getValue(sarr, 0).(string) != "hello" { + t.Fatal("failed string case") + } + + // boolean + bb := array.NewBooleanBuilder(mem) + bb.Append(true) + barr := bb.NewArray() + if getValue(barr, 0).(bool) != true { + t.Fatal("failed boolean case") + } +} + +func TestBuildDynamicArray_AllPrimitiveTypes(t *testing.T) { + mem := memory.NewGoAllocator() + + tests := []struct { + dt arrow.DataType + val []any + }{ + {arrow.PrimitiveTypes.Int8, []any{int8(1), nil, int8(3)}}, + {arrow.PrimitiveTypes.Int16, []any{int16(2), int16(5)}}, + {arrow.PrimitiveTypes.Int32, []any{int32(10), nil, int32(12)}}, + {arrow.PrimitiveTypes.Int64, []any{int64(20), int64(40)}}, + + {arrow.PrimitiveTypes.Uint8, []any{uint8(7), nil}}, + {arrow.PrimitiveTypes.Uint16, []any{uint16(100)}}, + {arrow.PrimitiveTypes.Uint32, []any{uint32(2000)}}, + {arrow.PrimitiveTypes.Uint64, []any{uint64(99999)}}, + + {arrow.PrimitiveTypes.Float32, []any{float32(2.2), nil}}, + {arrow.PrimitiveTypes.Float64, []any{float64(9.9)}}, + + {arrow.BinaryTypes.String, []any{"a", "b", nil}}, + } + + for _, tc := range tests { + arr := buildDynamicArray(mem, tc.dt, tc.val) + if arr.Len() != len(tc.val) { + t.Fatalf("wrong length for type %v", tc.dt) + } + } +} + +func TestCreateAccumulator_AllCases(t *testing.T) { + funcs := []AggrFunc{Min, Max, Sum, Count, Avg} + + for _, fn := range funcs { + acc := createAccumulator(fn) + if acc == nil { + t.Fatalf("expected accumulator for fn=%v", fn) + } + } +} + +func TestCreateAccumulator_PanicOnInvalid(t *testing.T) { + defer func() { + if recover() == nil { + t.Fatalf("expected panic for invalid function") + } + }() + + createAccumulator(AggrFunc(9999)) // invalid +} + +func TestBuildGroupByOutput_Basic(t *testing.T) { + col := func(n string) Expr.Expression { return Expr.NewColumnResolve(n) } + child := groupByProject() + + gbExpr := []Expr.Expression{col("region")} + aggs := []AggregateFunctions{ + {AggrFunc: Count, Child: col("id")}, + } + + gb, err := NewGroupByExec(child, aggs, gbExpr) + if err != nil { + t.Fatal(err) + } + + // invoke Next (fills accumulators) + _, _ = gb.Next(100) + + batch := buildGroupByOutput(gb) + + if batch.RowCount == 0 { + t.Fatalf("expected grouped rows") + } + + if len(batch.Columns) != 2 { + t.Fatalf("expected 2 columns, got %d", len(batch.Columns)) + } } diff --git a/src/Backend/opti-sql-go/operators/aggr/having.go b/src/Backend/opti-sql-go/operators/aggr/having.go new file mode 100644 index 0000000..a2a559f --- /dev/null +++ b/src/Backend/opti-sql-go/operators/aggr/having.go @@ -0,0 +1,77 @@ +package aggr + +import ( + "errors" + "io" + "opti-sql-go/Expr" + "opti-sql-go/operators" + "opti-sql-go/operators/filter" + + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" +) + +// carbon copy of filter.go with minor changes to fit having semantics +var ( + _ = (operators.Operator)(&HavingExec{}) +) + +type HavingExec struct { + input operators.Operator + schema *arrow.Schema + + havingExpr Expr.Expression + done bool +} + +func NewHavingExec(input operators.Operator, havingFilter Expr.Expression) (*HavingExec, error) { + + return &HavingExec{ + input: input, + schema: input.Schema(), + havingExpr: havingFilter, + }, nil +} + +func (h *HavingExec) Next(n uint16) (*operators.RecordBatch, error) { + if h.done { + return nil, io.EOF + } + childBatch, err := h.input.Next(n) + if err != nil { + if errors.Is(err, io.EOF) { + h.done = true + } + return nil, err + } + booleanMask, err := Expr.EvalExpression(h.havingExpr, childBatch) + if err != nil { + return nil, err + } + boolArr, ok := booleanMask.(*array.Boolean) // impossible for this to not be a boolean array,assuming validPredicates works as it should + if !ok { + return nil, errors.New("having predicate did not evaluate to boolean array") + } + filteredCol := make([]arrow.Array, len(childBatch.Columns)) + for i, col := range childBatch.Columns { + filteredCol[i], err = filter.ApplyBooleanMask(col, boolArr) + if err != nil { + return nil, err + } + } + // release old columns + operators.ReleaseArrays(childBatch.Columns) + size := uint64(filteredCol[0].Len()) + + return &operators.RecordBatch{ + Schema: childBatch.Schema, + Columns: filteredCol, + RowCount: size, + }, nil +} +func (h *HavingExec) Schema() *arrow.Schema { + return h.schema +} +func (h *HavingExec) Close() error { + return h.input.Close() +} diff --git a/src/Backend/opti-sql-go/operators/aggr/having_test.go b/src/Backend/opti-sql-go/operators/aggr/having_test.go new file mode 100644 index 0000000..45275b2 --- /dev/null +++ b/src/Backend/opti-sql-go/operators/aggr/having_test.go @@ -0,0 +1,213 @@ +package aggr + +import ( + "errors" + "io" + "strings" + "testing" + + "opti-sql-go/Expr" + + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" +) + +func TestHavingExec_OnGroupBy(t *testing.T) { + + // ============================================================= + // 1) HAVING SUM(salary) > 600000 + // ============================================================= + t.Run("having_sum_salary_gt_600k", func(t *testing.T) { + + child := groupByProject() + + groupBy := []Expr.Expression{col("department")} + aggs := []AggregateFunctions{ + {AggrFunc: Sum, Child: col("salary")}, + } + + gb, err := NewGroupByExec(child, aggs, groupBy) + if err != nil { + t.Fatalf("unexpected GroupBy error: %v", err) + } + + sumCol := "sum_Column(salary)" + + // SUM(salary) > 600000 + havingExpr := Expr.NewBinaryExpr( + Expr.NewColumnResolve(sumCol), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 600000.0), + ) + + having, err := NewHavingExec(gb, havingExpr) + if err != nil { + t.Fatalf("unexpected HavingExec init error: %v", err) + } + + batch, err := having.Next(1024) + if err != nil { + t.Fatalf("unexpected error running Next: %v", err) + } + t.Logf("batch : %v\n", batch.PrettyPrint()) + sumValues := batch.Columns[1].(*array.Float64) + for i := 0; i < sumValues.Len(); i++ { + if sumValues.Value(i) <= 600000 { + t.Fatalf("expected sum(salary) > 600000, got %f", sumValues.Value(i)) + } + } + + }) + + // ============================================================= + // 2) HAVING COUNT(id) >= 10 + // ============================================================= + t.Run("having_count_id_ge_10", func(t *testing.T) { + + child := groupByProject() + + groupBy := []Expr.Expression{col("region")} + aggs := []AggregateFunctions{ + {AggrFunc: Count, Child: col("id")}, + } + + gb, err := NewGroupByExec(child, aggs, groupBy) + if err != nil { + t.Fatalf("unexpected GroupBy err: %v", err) + } + + countCol := "count_Column(id)" + + havingExpr := Expr.NewBinaryExpr( + Expr.NewColumnResolve(countCol), + Expr.GreaterThanOrEqual, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 10.0), + ) + + having, err := NewHavingExec(gb, havingExpr) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + batch, err := having.Next(200) + if err != nil { + t.Fatalf("unexpected Next error: %v", err) + } + + if batch.RowCount != 3 { // North, South, West ≥ 10 + t.Fatalf("expected 3 regions with >=10 rows, got %d", batch.RowCount) + } + }) + + // ============================================================= + // 3) HAVING filters all groups out + // ============================================================= + t.Run("having_filters_all", func(t *testing.T) { + + child := groupByProject() + + groupBy := []Expr.Expression{col("department")} + aggs := []AggregateFunctions{ + {AggrFunc: Sum, Child: col("salary")}, + } + + gb, _ := NewGroupByExec(child, aggs, groupBy) + + sumCol := "sum_Column(salary)" + + // Impossible condition + havingExpr := Expr.NewBinaryExpr( + Expr.NewColumnResolve(sumCol), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 1_000_000_000.0), + ) + + having, _ := NewHavingExec(gb, havingExpr) + + batch, err := having.Next(1024) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + if batch.RowCount != 0 { + t.Fatalf("expected all rows to be filtered out, got %d", batch.RowCount) + } + }) + + // ============================================================= + // 4) Non-boolean predicate → error + // ============================================================= + t.Run("having_non_boolean_predicate", func(t *testing.T) { + + child := groupByProject() + groupBy := []Expr.Expression{col("department")} + aggs := []AggregateFunctions{ + {AggrFunc: Sum, Child: col("salary")}, + } + + gb, _ := NewGroupByExec(child, aggs, groupBy) + + // invalid: resolves to float, not boolean + invalidExpr := Expr.NewColumnResolve("sum_Column(salary)") + + having, _ := NewHavingExec(gb, invalidExpr) + + _, err := having.Next(100) + if err == nil { + t.Fatalf("expected non-boolean error, got nil") + } + if !strings.Contains(err.Error(), "boolean") { + t.Fatalf("expected boolean error, got: %v", err) + } + }) + + // ============================================================= + // 5) done = true returns EOF + // ============================================================= + t.Run("done_returns_eof", func(t *testing.T) { + + child := groupByProject() + + groupBy := []Expr.Expression{col("region")} + aggs := []AggregateFunctions{ + {AggrFunc: Count, Child: col("id")}, + } + + gb, _ := NewGroupByExec(child, aggs, groupBy) + + countCol := "count_Column(id)" + + havingExpr := Expr.NewBinaryExpr( + Expr.NewColumnResolve(countCol), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 0.0), + ) + + h, _ := NewHavingExec(gb, havingExpr) + h.done = true + + _, err := h.Next(10) + if !errors.Is(err, io.EOF) { + t.Fatalf("expected EOF, got: %v", err) + } + }) + + // ============================================================= + // 6) Close forwards to child.Close() + // ============================================================= + t.Run("close_propagates", func(t *testing.T) { + + child := groupByProject() + + gb, _ := NewGroupByExec(child, []AggregateFunctions{ + {AggrFunc: Count, Child: col("id")}, + }, []Expr.Expression{col("region")}) + + h, _ := NewHavingExec(gb, Expr.NewLiteralResolve(arrow.FixedWidthTypes.Boolean, true)) + + if err := h.Close(); err != nil { + t.Fatalf("Close returned error: %v", err) + } + t.Log(h.Schema()) + }) +} diff --git a/src/Backend/opti-sql-go/operators/aggr/singleAggr.go b/src/Backend/opti-sql-go/operators/aggr/singleAggr.go new file mode 100644 index 0000000..0f7c3b5 --- /dev/null +++ b/src/Backend/opti-sql-go/operators/aggr/singleAggr.go @@ -0,0 +1,291 @@ +package aggr + +import ( + "context" + "errors" + "fmt" + "io" + "opti-sql-go/Expr" + "opti-sql-go/operators" + + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/compute" +) + +var ( + ErrUnsupportedAggrFunc = func(aggr int) error { + return fmt.Errorf("%d is an unsupported aggregate function", aggr) + } + ErrInvalidAggrColumnType = func(value any) error { + return fmt.Errorf("%v of type %T cannot be cast to float64 so it is not a valid column type to aggregate on", value, value) + } +) + +// AggrFunc represents the type of aggregation function to be performed. +type AggrFunc int + +const ( + Min AggrFunc = iota + Max + Count + Sum + Avg +) + +var ( + _ = (accumulator)(&minAggrAccumulator{}) + _ = (accumulator)(&maxAggrAccumulator{}) + _ = (accumulator)(&countAggrAccumulator{}) + _ = (accumulator)(&sumAggrAccumulator{}) + _ = (accumulator)(&avgAggrAccumulator{}) + _ = (operators.Operator)(&AggrExec{}) +) + +func NewAggregateFunctions(aggrFunc AggrFunc, child Expr.Expression) AggregateFunctions { + return AggregateFunctions{ + AggrFunc: aggrFunc, + Child: child, + } +} + +type AggregateFunctions struct { + AggrFunc AggrFunc // switch to deal with separate aggregate functions + Child Expr.Expression // resolves to a column generally +} +type accumulator interface { + Update(value float64) + Finalize() float64 +} + +func newMinAggr() accumulator { + return &minAggrAccumulator{} +} + +type minAggrAccumulator struct { + minV float64 + firstValue bool +} + +func (m *minAggrAccumulator) Update(value float64) { + if !m.firstValue { + m.minV = value + m.firstValue = true + return + } + m.minV = min(m.minV, value) + +} +func (m *minAggrAccumulator) Finalize() float64 { return m.minV } +func newMaxAggr() accumulator { + return &maxAggrAccumulator{} +} + +type maxAggrAccumulator struct { + maxV float64 + firstValue bool +} + +func (m *maxAggrAccumulator) Update(value float64) { + if !m.firstValue { + m.maxV = value + m.firstValue = true + return + } + m.maxV = max(m.maxV, value) +} +func (m *maxAggrAccumulator) Finalize() float64 { return m.maxV } + +func newCountAggr() accumulator { + return &countAggrAccumulator{} +} + +type countAggrAccumulator struct { + count float64 +} + +func (c *countAggrAccumulator) Update(_ float64) { + c.count++ +} +func (c *countAggrAccumulator) Finalize() float64 { return c.count } + +func newSumAggr() accumulator { + return &sumAggrAccumulator{} +} + +type sumAggrAccumulator struct { + summation float64 +} + +func (s *sumAggrAccumulator) Update(value float64) { + s.summation += value +} +func (s *sumAggrAccumulator) Finalize() float64 { return s.summation } +func newAvgAggr() accumulator { + return &avgAggrAccumulator{} +} + +type avgAggrAccumulator struct { + used bool + values float64 + count float64 +} + +func (a *avgAggrAccumulator) Update(value float64) { + a.used = true + a.values += value + a.count++ +} +func (a *avgAggrAccumulator) Finalize() float64 { + // handles divide by zero + if !a.used { + return 0.0 + } + return a.values / a.count +} + +// =================== +// Aggregator Operator +// =================== +// handles global aggregations without group by +type AggrExec struct { + input operators.Operator // child operator + schema *arrow.Schema // output schema + aggExpressions []AggregateFunctions // list of wanted aggregate expressions + accumulators []accumulator // list of accumulators corresponding to aggExpressions, these will actually work to compute the aggregation + done bool // know when to return io.EOF +} + +func NewGlobalAggrExec(child operators.Operator, aggExprs []AggregateFunctions) (*AggrExec, error) { + accs := make([]accumulator, len(aggExprs)) + fields := make([]arrow.Field, len(aggExprs)) + for i, agg := range aggExprs { + dt, err := Expr.ExprDataType(agg.Child, child.Schema()) + if err != nil || !validAggrType(dt) { + return nil, ErrInvalidAggrColumnType(dt) + } + var fieldName string + switch agg.AggrFunc { + case Min: + fieldName = fmt.Sprintf("min_%s", agg.Child.String()) + accs[i] = newMinAggr() + case Max: + fieldName = fmt.Sprintf("max_%s", agg.Child.String()) + accs[i] = newMaxAggr() + case Count: + fieldName = fmt.Sprintf("count_%s", agg.Child.String()) + accs[i] = newCountAggr() + case Sum: + fieldName = fmt.Sprintf("sum_%s", agg.Child.String()) + accs[i] = newSumAggr() + case Avg: + fieldName = fmt.Sprintf("avg_%s", agg.Child.String()) + accs[i] = newAvgAggr() + + default: + return nil, ErrUnsupportedAggrFunc(int(agg.AggrFunc)) + } + fields[i] = arrow.Field{ + Name: fieldName, + Type: arrow.PrimitiveTypes.Float64, + Nullable: true, + } + } + return &AggrExec{ + input: child, + schema: arrow.NewSchema(fields, nil), + aggExpressions: aggExprs, + accumulators: accs, + }, nil +} + +// Next consumes all batches from the child operator, evaluates the aggregate expressions, +// updates the accumulators for each value, and returns a single output batch containing +// the final aggregation results. It returns io.EOF after producing the result batch. +func (a *AggrExec) Next(n uint16) (*operators.RecordBatch, error) { + if a.done { + return nil, io.EOF + } + for { + childBatch, err := a.input.Next(n) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, err + } + for i, aggExpr := range a.aggExpressions { + agrArray, err := Expr.EvalExpression(aggExpr.Child, childBatch) + if err != nil { + return nil, err + } + agrArray, err = castArrayToFloat64(agrArray) + if err != nil { + return nil, err + } + valueArray := agrArray.(*array.Float64) + accumulator := a.accumulators[i] + for j := 0; j < valueArray.Len(); j++ { + if valueArray.IsNull(j) { + continue + } + accumulator.Update(valueArray.Value(j)) + } + + } + operators.ReleaseArrays(childBatch.Columns) + } + // build array with just the result of the column + resultColumns := make([]arrow.Array, len(a.accumulators)) + for i := range a.accumulators { + resultColumns[i] = operators.NewRecordBatchBuilder().GenFloatArray(a.accumulators[i].Finalize()) + } + a.done = true + return &operators.RecordBatch{ + Schema: a.schema, + Columns: resultColumns, + RowCount: 1, + }, nil +} + +func (a *AggrExec) Schema() *arrow.Schema { + return a.schema +} +func (a *AggrExec) Close() error { + return a.input.Close() +} + +func validAggrType(dt arrow.DataType) bool { + switch dt.ID() { + case arrow.UINT8, arrow.UINT16, arrow.UINT32, arrow.UINT64, + arrow.INT8, arrow.INT16, arrow.INT32, arrow.INT64, arrow.FLOAT16, arrow.FLOAT32, arrow.FLOAT64: + return true + default: + return false + } +} + +func castArrayToFloat64(arr arrow.Array) (arrow.Array, error) { + outDatum, err := compute.CastArray(context.Background(), arr, compute.NewCastOptions(&arrow.Float64Type{}, true)) + if err != nil { + return nil, err + } + + return outDatum, nil +} +func aggrToString(t int) string { + switch AggrFunc(t) { + case Min: + return "MIN" + case Max: + return "MAX" + case Count: + return "COUNT" + case Sum: + return "SUM" + case Avg: + return "AVG" + default: + return "UNKNOWN_AGGREGATE_FUNCTION" + } +} diff --git a/src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go b/src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go new file mode 100644 index 0000000..9b5af24 --- /dev/null +++ b/src/Backend/opti-sql-go/operators/aggr/singleAggr_test.go @@ -0,0 +1,606 @@ +package aggr + +import ( + "errors" + "fmt" + "io" + "math" + "opti-sql-go/Expr" + "opti-sql-go/operators/project" + "testing" + + "github.com/apache/arrow/go/v15/arrow/memory" + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" +) + +func generateAggTestColumns() ([]string, []any) { + names := []string{ + "id", + "name", + "age", + "salary", + } + + columns := []any{ + // id: 1 to 25 + []int32{ + 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, + }, + + // name: 25 people + []string{ + "Alice", "Bob", "Charlie", "David", "Eve", + "Frank", "Grace", "Hannah", "Ivy", "Jake", + "Karen", "Leo", "Mona", "Nate", "Olive", + "Paul", "Quinn", "Rita", "Sam", "Tina", + "Uma", "Victor", "Wendy", "Xavier", "Yara", + }, + + // age: 25 numeric values + []int32{ + 28, 34, 45, 22, 31, + 29, 40, 36, 50, 26, + 33, 41, 27, 38, 24, + 46, 30, 35, 43, 32, + 39, 48, 29, 37, 42, + }, + + // salary: 25 numeric values + []float64{ + 70000.0, 82000.5, 54000.0, 91000.0, 60000.0, + 75000.0, 66000.0, 88000.0, 45000.0, 99000.0, + 72000.0, 81000.0, 53000.0, 86000.0, 64000.0, + 93000.0, 68000.0, 76000.0, 89000.0, 71000.0, + 83000.0, 94000.0, 55000.0, 87000.0, 91500.0, + }, + } + + return names, columns +} +func generateAggTestColumnsWithNulls(mem memory.Allocator) ([]string, []arrow.Array) { + names := []string{"id", "name", "age", "salary"} + + // ------------------------- + // id column (int32) + // ------------------------- + idB := array.NewInt32Builder(mem) + idVals := []int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + idValid := []bool{ + true, true, false, true, true, + false, true, true, true, false, + } + idB.AppendValues(idVals, idValid) + idArr := idB.NewArray() + + // ------------------------- + // name column (string) + // ------------------------- + nameB := array.NewStringBuilder(mem) + nameVals := []string{ + "Alice", "Bob", "Charlie", "David", "Eve", + "Frank", "Grace", "Hannah", "Ivy", "Jake", + } + nameValid := []bool{ + true, true, true, false, true, + true, true, true, false, true, + } + nameB.AppendValues(nameVals, nameValid) + nameArr := nameB.NewArray() + + // ------------------------- + // age column (int32) + // ------------------------- + ageB := array.NewInt32Builder(mem) + ageVals := []int32{28, 34, 45, 22, 31, 29, 40, 36, 50, 26} + ageValid := []bool{ + true, false, true, true, true, + true, false, true, true, true, + } + ageB.AppendValues(ageVals, ageValid) + ageArr := ageB.NewArray() + + // ------------------------- + // salary column (float64) + // ------------------------- + salB := array.NewFloat64Builder(mem) + salVals := []float64{ + 70000, 82000, 54000, 91000, 60000, + 75000, 66000, 0, 45000, 99000, + } + + salaryValid := []bool{ + true, true, true, true, true, + true, true, false, true, true, + } + + salB.AppendValues(salVals, salaryValid) + salaryArr := salB.NewArray() + + return names, []arrow.Array{idArr, nameArr, ageArr, salaryArr} +} + +func aggProject() *project.InMemorySource { + names, cols := generateAggTestColumns() + p, _ := project.NewInMemoryProjectExec(names, cols) + return p +} + +func aggProjectNull() *project.InMemorySource { + names, arr := generateAggTestColumnsWithNulls(memory.NewGoAllocator()) + p, _ := project.NewInMemoryProjectExecFromArrays(names, arr) + return p +} + +func col(name string) Expr.Expression { + return Expr.NewColumnResolve(name) +} + +func TestNewAggrExec(t *testing.T) { + + // ----------------------------------------------------------------- + t.Run("valid_single_min", func(t *testing.T) { + child := aggProject() + + agg := []AggregateFunctions{ + {AggrFunc: Min, Child: col("age")}, + } + + exec, err := NewGlobalAggrExec(child, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if exec.Schema().NumFields() != 1 { + t.Fatalf("expected 1 schema field, got %d", exec.Schema().NumFields()) + } + + expectedName := "min_Column(age)" + if exec.Schema().Field(0).Name != expectedName { + t.Fatalf("expected name %s, got %s", + expectedName, exec.Schema().Field(0).Name) + } + }) + + // ----------------------------------------------------------------- + t.Run("multiple_aggregations_schema_names", func(t *testing.T) { + child := aggProject() + + agg := []AggregateFunctions{ + {AggrFunc: Min, Child: col("id")}, + {AggrFunc: Max, Child: col("salary")}, + {AggrFunc: Avg, Child: col("age")}, + } + + exec, err := NewGlobalAggrExec(child, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + schema := exec.Schema() + + expected := []string{ + "min_Column(id)", + "max_Column(salary)", + "avg_Column(age)", + } + + for i, f := range schema.Fields() { + if f.Name != expected[i] { + t.Fatalf("expected field %s, got %s", expected[i], f.Name) + } + } + }) + + // ----------------------------------------------------------------- + t.Run("invalid_type_detection_string_column", func(t *testing.T) { + child := aggProject() + + agg := []AggregateFunctions{ + {AggrFunc: Min, Child: col("name")}, // "name" is string → invalid + } + + _, err := NewGlobalAggrExec(child, agg) + if err == nil { + t.Fatalf("expected type error, got nil") + } + t.Logf("================\n invalid column err %v \n ============", err) + }) + + // ----------------------------------------------------------------- + t.Run("unsupported_aggregate_function", func(t *testing.T) { + child := aggProject() + + agg := []AggregateFunctions{ + {AggrFunc: 9999, Child: col("age")}, + } + + _, err := NewGlobalAggrExec(child, agg) + if err == nil { + t.Fatalf("expected unsupported aggr error") + } + }) + + // ----------------------------------------------------------------- + t.Run("schema_type_float64_for_all_numeric_aggs", func(t *testing.T) { + child := aggProject() + + agg := []AggregateFunctions{ + {AggrFunc: Min, Child: col("id")}, + {AggrFunc: Max, Child: col("salary")}, + {AggrFunc: Sum, Child: col("age")}, + {AggrFunc: Avg, Child: col("salary")}, + {AggrFunc: Count, Child: col("age")}, + } + + exec, err := NewGlobalAggrExec(child, agg) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + for _, f := range exec.Schema().Fields() { + if f.Type.ID() != arrow.FLOAT64 { + t.Fatalf("expected float64 output type, got %s", f.Type) + } + } + if err := exec.Close(); err != nil { + t.Fatalf("unexpected close error: %v", err) + } + }) + + // ----------------------------------------------------------------- + t.Run("check_all_valid_numeric_types_pass", func(t *testing.T) { + + // all numeric arrow types accepted by validAggrType() + validTypes := []arrow.DataType{ + arrow.PrimitiveTypes.Uint8, + arrow.PrimitiveTypes.Uint16, + arrow.PrimitiveTypes.Uint32, + arrow.PrimitiveTypes.Uint64, + arrow.PrimitiveTypes.Int8, + arrow.PrimitiveTypes.Int16, + arrow.PrimitiveTypes.Int32, + arrow.PrimitiveTypes.Int64, + arrow.PrimitiveTypes.Float32, + arrow.PrimitiveTypes.Float64, + } + + fieldNames := make([]string, len(validTypes)) + colData := make([]any, len(validTypes)) + + for i, dt := range validTypes { + name := fmt.Sprintf("col_%d", i) + fieldNames[i] = name + + switch dt.ID() { + case arrow.UINT8: + colData[i] = []uint8{1} + case arrow.UINT16: + colData[i] = []uint16{1} + case arrow.UINT32: + colData[i] = []uint32{1} + case arrow.UINT64: + colData[i] = []uint64{1} + case arrow.INT8: + colData[i] = []int8{1} + case arrow.INT16: + colData[i] = []int16{1} + case arrow.INT32: + colData[i] = []int32{1} + case arrow.INT64: + colData[i] = []int64{1} + case arrow.FLOAT16: + // float16 stored as float32 in Go + colData[i] = []float32{1} + case arrow.FLOAT32: + colData[i] = []float32{1} + case arrow.FLOAT64: + colData[i] = []float64{1} + } + } + + src, _ := project.NewInMemoryProjectExec(fieldNames, colData) + + for i := range fieldNames { + agg := []AggregateFunctions{ + {AggrFunc: Sum, Child: col(fieldNames[i])}, + } + + _, err := NewGlobalAggrExec(src, agg) + if err != nil { + t.Fatalf("unexpected error for type %s: %v", validTypes[i], err) + } + } + }) +} + +func TestCastArrayToFloat64(t *testing.T) { + + alloc := memory.NewGoAllocator + + // -------------------------------------------------------- + t.Run("cast_int32_to_float64", func(t *testing.T) { + b := array.NewInt32Builder(alloc()) + b.AppendValues([]int32{1, 2, 3, 4}, nil) + arr := b.NewArray() + + out, err := castArrayToFloat64(arr) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + farr, ok := out.(*array.Float64) + if !ok { + t.Fatalf("expected Float64 array, got %T", out) + } + + expected := []float64{1, 2, 3, 4} + for i := range expected { + if farr.Value(i) != expected[i] { + t.Fatalf("expected %v at %d, got %v", expected[i], i, farr.Value(i)) + } + } + }) + + // -------------------------------------------------------- + t.Run("cast_float32_to_float64", func(t *testing.T) { + b := array.NewFloat32Builder(alloc()) + b.AppendValues([]float32{10.5, 20.5, 30.5}, nil) + arr := b.NewArray() + + out, err := castArrayToFloat64(arr) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + farr, ok := out.(*array.Float64) + if !ok { + t.Fatalf("expected Float64 array, got %T", out) + } + + expected := []float64{10.5, 20.5, 30.5} + for i := range expected { + if farr.Value(i) != expected[i] { + t.Fatalf("expected %v at %d, got %v", expected[i], i, farr.Value(i)) + } + } + }) + + // -------------------------------------------------------- + t.Run("invalid_string_cast", func(t *testing.T) { + b := array.NewStringBuilder(alloc()) + b.AppendValues([]string{"a", "b", "c"}, nil) + arr := b.NewArray() + + _, err := castArrayToFloat64(arr) + if err == nil { + t.Fatalf("expected error when casting string array to float64") + } + }) + + // -------------------------------------------------------- + t.Run("empty_array_cast", func(t *testing.T) { + b := array.NewInt32Builder(alloc()) + // no values appended + arr := b.NewArray() + + out, err := castArrayToFloat64(arr) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + _, ok := out.(*array.Float64) + if !ok { + t.Fatalf("expected Float64 array for empty cast, got %T", out) + } + + if out.Len() != 0 { + t.Fatalf("expected empty array, got length %d", out.Len()) + } + }) + +} + +func TestAggregateExecNext(t *testing.T) { + t.Run("validating done case early", func(t *testing.T) { + proj := aggProject() + agg := []AggregateFunctions{ + {AggrFunc: Min, Child: col("id")}} + aggrExec, err := NewGlobalAggrExec(proj, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + aggrExec.done = true + _, err = aggrExec.Next(10) + if err == nil || !errors.Is(err, io.EOF) { + t.Fatalf("expected io.EOF error, got nil") + } + }) + t.Run("Aggr minimum value on age", func(t *testing.T) { + proj := aggProject() + agg := []AggregateFunctions{ + {AggrFunc: Min, Child: col("age")}} + aggrExec, err := NewGlobalAggrExec(proj, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + resultBatch, _ := aggrExec.Next(100) + t.Logf("record batch: %v\n", resultBatch) + if resultBatch.Columns[0].(*array.Float64).Value(0) != 22 { + t.Fatalf("expected minimum age 22, got %v", resultBatch.Columns[0].(*array.Float64).Value(0)) + } + + }) + t.Run("Aggr maximum salary", func(t *testing.T) { + proj := aggProject() + agg := []AggregateFunctions{ + {AggrFunc: Max, Child: col("salary")}, + } + + aggrExec, err := NewGlobalAggrExec(proj, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + resultBatch, _ := aggrExec.Next(100) + + maxSalary := resultBatch.Columns[0].(*array.Float64).Value(0) + if maxSalary != 99000.0 && maxSalary != 94000.0 && maxSalary != 93000.0 { + // Real max is 99000 (Jake has 99000) + t.Fatalf("expected max salary 99000, got %v", maxSalary) + } + }) + t.Run("Aggr sum of id column", func(t *testing.T) { + proj := aggProject() + agg := []AggregateFunctions{ + {AggrFunc: Sum, Child: col("id")}, + } + + aggrExec, err := NewGlobalAggrExec(proj, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + resultBatch, _ := aggrExec.Next(200) + + sumIDs := resultBatch.Columns[0].(*array.Float64).Value(0) + expected := float64((25 * 26) / 2) // sum(1..25) = 325 + if sumIDs != expected { + t.Fatalf("expected sum 325, got %v", sumIDs) + } + }) + t.Run("Aggr count of age column", func(t *testing.T) { + proj := aggProject() + agg := []AggregateFunctions{ + NewAggregateFunctions(Count, col("age")), + } + + aggrExec, err := NewGlobalAggrExec(proj, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + resultBatch, _ := aggrExec.Next(300) + + count := resultBatch.Columns[0].(*array.Float64).Value(0) + if count != 25 { + t.Fatalf("expected count 25, got %v", count) + } + }) + t.Run("Aggr average of salary ", func(t *testing.T) { + proj := aggProject() + + agg := []AggregateFunctions{ + {AggrFunc: Avg, Child: col("salary")}, + } + + aggrExec, err := NewGlobalAggrExec(proj, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + resultBatch, _ := aggrExec.Next(500) + + avg := resultBatch.Columns[0].(*array.Float64).Value(0) + expected := 75740.02 + + if math.Abs(avg-expected) > 0.001 { + t.Fatalf("expected avg %v, got %v", expected, avg) + } + + }) + t.Run("Multiple aggregators in a single request", func(t *testing.T) { + proj := aggProject() + + agg := []AggregateFunctions{ + {AggrFunc: Min, Child: col("age")}, + {AggrFunc: Max, Child: col("salary")}, + {AggrFunc: Count, Child: col("id")}, + } + + aggrExec, err := NewGlobalAggrExec(proj, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + resultBatch, _ := aggrExec.Next(1000) + + minAge := resultBatch.Columns[0].(*array.Float64).Value(0) + maxSalary := resultBatch.Columns[1].(*array.Float64).Value(0) + countIDs := resultBatch.Columns[2].(*array.Float64).Value(0) + + if minAge != 22 { + t.Fatalf("expected min age 22, got %v", minAge) + } + if maxSalary != 99000.0 { + t.Fatalf("expected max salary 99000, got %v", maxSalary) + } + if countIDs != 25 { + t.Fatalf("expected count 25, got %v", countIDs) + } + }) + + // ========================================================== + t.Run("Schema correctness for multiple aggregates", func(t *testing.T) { + proj := aggProject() + + agg := []AggregateFunctions{ + {AggrFunc: Min, Child: col("id")}, + {AggrFunc: Sum, Child: col("age")}, + {AggrFunc: Count, Child: col("salary")}, + } + + aggrExec, err := NewGlobalAggrExec(proj, agg) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + s := aggrExec.Schema() + + expectedNames := []string{ + "min_Column(id)", + "sum_Column(age)", + "count_Column(salary)", + } + + for i, f := range s.Fields() { + if f.Name != expectedNames[i] { + t.Fatalf("expected field %s, got %s", expectedNames[i], f.Name) + } + if f.Type.ID() != arrow.FLOAT64 { + t.Fatalf("expected float64 fields only") + } + } + }) +} + +func TestAggregateExecNull(t *testing.T) { + + t.Run("Aggr count of age column", func(t *testing.T) { + proj := aggProjectNull() + agg := []AggregateFunctions{ + NewAggregateFunctions(Count, col("age")), + NewAggregateFunctions(Sum, col("id")), + } + + aggrExec, err := NewGlobalAggrExec(proj, agg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + resultBatch, _ := aggrExec.Next(100) + t.Logf("rb:%v\n", resultBatch) + count := resultBatch.Columns[0].(*array.Float64).Value(0) + if count != 8 { + t.Fatalf("expected count 7, got %v", count) + } + sumIDs := resultBatch.Columns[1].(*array.Float64).Value(0) + expectedSum := float64(1 + 2 + 4 + 5 + 7 + 8 + 9) // only non-null ids + if sumIDs != expectedSum { + t.Fatalf("expected sum %v, got %v", expectedSum, sumIDs) + } + }) +} diff --git a/src/Backend/opti-sql-go/operators/aggr/sort.go b/src/Backend/opti-sql-go/operators/aggr/sort.go index abd1ad5..1b731f8 100644 --- a/src/Backend/opti-sql-go/operators/aggr/sort.go +++ b/src/Backend/opti-sql-go/operators/aggr/sort.go @@ -1 +1,755 @@ package aggr + +import ( + "context" + "errors" + "fmt" + "io" + "math" + "opti-sql-go/Expr" + "opti-sql-go/operators" + "sort" + + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/compute" + "github.com/apache/arrow/go/v17/arrow/memory" +) + +// order by col asc, col 2 desc .... etc +var ( + _ = (operators.Operator)(&SortExec{}) + _ = (operators.Operator)(&TopKSortExec{}) +) + +type SortKey struct { + Expr Expr.Expression + Ascending bool // by default false -- DESC (highest values first -> smaller values) + NullFirst bool // by default false -- nulls last +} + +func NewSortKey(expr Expr.Expression, options ...bool) *SortKey { + var asc, nullF bool + switch len(options) { + case 2: + asc = options[0] + nullF = options[1] + case 1: + asc = options[0] + } + return &SortKey{ + Expr: expr, + Ascending: asc, + NullFirst: nullF, + } +} +func CombineSortKeys(sk ...*SortKey) []SortKey { + var res []SortKey + for _, s := range sk { + res = append(res, *s) + } + return res +} + +type SortExec struct { + input operators.Operator + schema *arrow.Schema + sortKeys []SortKey // resolves to columns + // internal book keeping + totalColumns []arrow.Array + consumedOffset uint64 + totalRows uint64 + consumed bool // did we finish reading all of the child record batches? + done bool // have we already produced all the sorted record batches? +} + +func NewSortExec(child operators.Operator, sortKeys []SortKey) (*SortExec, error) { + return &SortExec{ + input: child, + schema: child.Schema(), + sortKeys: sortKeys, + }, nil +} + +// for now read everything into memory and sort -- next steps will be to do external merge + +// n is the number of records we will return,sortExec will read in 2^16-1 column entries from its child, this is more efficient that trusting the caller to pass in a reasonable +// n so that we avoid small/frequent IO operations +func (s *SortExec) Next(n uint16) (*operators.RecordBatch, error) { + if s.done { + return nil, io.EOF + } + if !s.consumed { + allColumns := make([]arrow.Array, len(s.schema.Fields())) // concated columns + mem := memory.NewGoAllocator() + var count uint64 + for { + childBatch, err := s.input.Next(math.MaxUint16) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, err + } + for i := range childBatch.Columns { + if allColumns[i] == nil { + allColumns[i] = childBatch.Columns[i] + continue + } + largerArray, err := array.Concatenate([]arrow.Array{allColumns[i], childBatch.Columns[i]}, mem) + if err != nil { + return nil, err + } + allColumns[i] = largerArray + } + } + s.consumed = true + if len(allColumns) > 0 { + count = uint64(allColumns[0].Len()) + } + idx, err := sortBatches(&operators.RecordBatch{ + Schema: s.schema, + Columns: allColumns, + RowCount: count, + }, s.sortKeys) + if err != nil { + return nil, err + } + // now update all mappings + idxArray := idxToArrowArray(idx, mem) + defer idxArray.Release() + for i := range len(allColumns) { + arr, err := compute.TakeArray(context.Background(), allColumns[i], idxArray) + if err != nil { + return nil, err + } + allColumns[i] = arr + } + s.totalColumns = allColumns + s.totalRows = count + } + var readSize uint64 + remaining := s.totalRows - s.consumedOffset + if remaining == 0 { + return nil, io.EOF + } + if remaining < uint64(n) { + // if n is more than we have left just read up to remaining + readSize = uint64(remaining) + s.done = true + } else { + // remaining > n or remaining = n then just read n and return + readSize = uint64(n) + } + mem := memory.NewGoAllocator() + sortedColumns, err := s.consumeSortedBatch(readSize, mem) + if err != nil { + return nil, err + } + + return &operators.RecordBatch{ + Schema: s.schema, + Columns: sortedColumns, + RowCount: readSize, + }, nil +} +func (s *SortExec) Schema() *arrow.Schema { + return s.schema +} +func (s *SortExec) Close() error { + return s.input.Close() +} +func (s *SortExec) consumeSortedBatch(readsize uint64, mem memory.Allocator) ([]arrow.Array, error) { + ctx := context.Background() + resultColumns := make([]arrow.Array, len(s.schema.Fields())) + offsetArray := genoffsetTakeIdx(s.consumedOffset, readsize, mem) + defer offsetArray.Release() + for i := range s.totalColumns { + sortArr := s.totalColumns[i] + arr, err := compute.TakeArray(ctx, sortArr, offsetArray) + if err != nil { + return nil, err + } + resultColumns[i] = arr + + } + s.consumedOffset += readsize + return resultColumns, nil +} + +/* +only sort and keep the top k elements in memory +*/ +type TopKSortExec struct { + input operators.Operator + schema *arrow.Schema + sortKeys []SortKey // resolves to columns + k uint16 // top k + // internal book keeping + sortedColumns []arrow.Array + heap []heapRow // at any one point this will only hold k elements + totalRows uint64 + consumedOffset uint64 + consumed bool // did we finish reading all of the input record batches? + done bool +} + +func NewTopKSortExec(child operators.Operator, sortKeys []SortKey, k uint16) (*TopKSortExec, error) { + size := len(child.Schema().Fields()) + return &TopKSortExec{ + input: child, + schema: child.Schema(), + sortKeys: sortKeys, + k: k, + /// + sortedColumns: make([]arrow.Array, size), + heap: make([]heapRow, 0, k), + }, nil +} + +// for now read everything into memory and sort -- next steps will be to do external merge +func (t *TopKSortExec) Next(n uint16) (*operators.RecordBatch, error) { + if t.done { + return nil, io.EOF + } + mem := memory.NewGoAllocator() + if !t.consumed { + for { + childBatch, err := t.input.Next(math.MaxUint16) + if err != nil { + if errors.Is(err, io.EOF) { + t.consumed = true + if len(t.sortedColumns) != 0 { + t.totalRows = uint64(t.sortedColumns[0].Len()) + } + break + } + return nil, err + } + // after the update, run take, and then update the sorted columns we store internally + // handle input validation here + err = t.UpdateTopKSorted(childBatch, t.sortKeys, mem) + if err != nil { + return nil, err + } + } + } + var readSize uint64 + remaining := t.totalRows - t.consumedOffset + if remaining < uint64(n) { + // if n is more than we have left just read up to remaining + readSize = uint64(remaining) + t.done = true + } else { + // remaining > n or remaining = n then just read n and return + readSize = uint64(n) + } + sortedArr, err := t.consumeSortedBatch(readSize, memory.NewGoAllocator()) + if err != nil { + return nil, err + } + return &operators.RecordBatch{ + Schema: t.schema, + Columns: sortedArr, + RowCount: readSize, + }, nil + +} +func (t *TopKSortExec) Schema() *arrow.Schema { + return t.schema +} +func (t *TopKSortExec) Close() error { + return t.input.Close() +} + +type heapRow struct { + rowIdx uint64 + keys []interface{} // columns +} + +/* +evaluate key cols +then iterate through all of the key columns and generate their key represenation +*/ +func (t *TopKSortExec) UpdateTopKSorted(newBatch *operators.RecordBatch, sortKeys []SortKey, mem memory.Allocator) error { + // 1. Evaluate key columns + keyCols := make([]arrow.Array, len(sortKeys)) + for i, sk := range sortKeys { + arr, err := Expr.EvalExpression(sk.Expr, newBatch) + if err != nil { + return err + } + keyCols[i] = arr + } + allColumns, err := joinArrays(newBatch.Columns, t.sortedColumns, mem) + if err != nil { + return err + } + + rowCount := int(allColumns[0].Len()) + tmpBuff := make([]heapRow, 0, rowCount) + for i := 0; i < rowCount; i++ { + keys := make([]interface{}, len(sortKeys)) + for k, col := range keyCols { + keys[k] = extractValue(col, i) + } + row := heapRow{ + rowIdx: uint64(i), + keys: keys, + } + tmpBuff = append(tmpBuff, row) + + } + sortBySortKeys(tmpBuff, sortKeys) + tk := min(int(t.k), len(tmpBuff)) // in case k > len(tmpBuff) + topK := tmpBuff[:tk] + var idxArr []uint64 + for i := range topK { + idxArr = append(idxArr, topK[i].rowIdx) + } + takeArray := idxToArrowArray(idxArr, mem) + defer takeArray.Release() + count := newBatch.Schema.NumFields() + for i := range count { + sc, err := compute.TakeArray(context.Background(), allColumns[i], takeArray) + if err != nil { + return err + } + t.sortedColumns[i] = sc + } + return nil +} + +func joinArrays(existing, newarrs []arrow.Array, mem memory.Allocator) ([]arrow.Array, error) { + if len(existing) == 0 { + return newarrs, nil + } + if len(newarrs) == 0 { + return existing, nil + } + result := make([]arrow.Array, len(existing)) + for i := range existing { + v1, v2 := existing[i], newarrs[i] + if v1 == nil { + result[i] = v2 + continue + } else if v2 == nil { + result[i] = v1 + continue + } + combined, err := array.Concatenate([]arrow.Array{v1, v2}, mem) + if err != nil { + return nil, err + } + result[i] = combined + } + return result, nil +} + +func (t *TopKSortExec) consumeSortedBatch(readsize uint64, mem memory.Allocator) ([]arrow.Array, error) { + ctx := context.Background() + resultColumns := make([]arrow.Array, len(t.schema.Fields())) + offsetArray := genoffsetTakeIdx(t.consumedOffset, readsize, mem) + defer offsetArray.Release() + for i := range t.sortedColumns { + sortArr := t.sortedColumns[i] + arr, err := compute.TakeArray(ctx, sortArr, offsetArray) + if err != nil { + return nil, err + } + resultColumns[i] = arr + + } + t.consumedOffset += readsize + return resultColumns, nil +} + +/* +shared functions +*/ +func sortBatches(fullRC *operators.RecordBatch, sortKeys []SortKey) ([]uint64, error) { + keyColumns := make([]arrow.Array, len(sortKeys)) + for i, sk := range sortKeys { + arr, err := Expr.EvalExpression(sk.Expr, fullRC) + if err != nil { + return nil, fmt.Errorf("sort batches: failed to eval sort expression: %v", err) + } + keyColumns[i] = arr + } + idVector := make([]uint64, fullRC.RowCount) + for i := 0; uint64(i) < fullRC.RowCount; i++ { + idVector[i] = uint64(i) + } + sortIndexVector(idVector, keyColumns, sortKeys) + return idVector, nil +} + +// sortIndexVector sorts idVec based on keyColumns + sortKeys. +// keyColumns[i] corresponds to sortKeys[i]. +func sortIndexVector(idVec []uint64, keyColumns []arrow.Array, sortKeys []SortKey) { + sort.Slice(idVec, func(a, b int) bool { + i := idVec[a] + j := idVec[b] + + // lexicographic: go through each sort key + for k, col := range keyColumns { + sk := sortKeys[k] + cmp := compareArrowValues(col, i, j) + + if cmp == 0 { + continue // equal → move to next key + } + + if sk.Ascending { + return cmp < 0 + } else { + return cmp > 0 + } + } + + // completely equal for all keys + return false + }) +} + +func compareArrowValues(col arrow.Array, i, j uint64) int { + // Handle nulls (treat as lowest value for now) + if col.IsNull(int(i)) && col.IsNull(int(j)) { + return 0 + } + if col.IsNull(int(i)) { + return -1 + } + if col.IsNull(int(j)) { + return 1 + } + + switch arr := col.(type) { + + case *array.String: + vi := arr.Value(int(i)) + vj := arr.Value(int(j)) + switch { + case vi < vj: + return -1 + case vi > vj: + return 1 + default: + return 0 + } + + case *array.Int8: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareNumeric(vi, vj) + + case *array.Int16: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareNumeric(vi, vj) + + case *array.Int32: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareNumeric(vi, vj) + + case *array.Int64: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareNumeric(vi, vj) + + case *array.Uint8: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareNumeric(vi, vj) + + case *array.Uint16: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareNumeric(vi, vj) + + case *array.Uint32: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareNumeric(vi, vj) + + case *array.Uint64: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareNumeric(vi, vj) + + case *array.Float32: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareFloat(vi, vj) + + case *array.Float64: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + return compareFloat(vi, vj) + + case *array.Boolean: + vi, vj := arr.Value(int(i)), arr.Value(int(j)) + if vi == vj { + return 0 + } + if !vi && vj { + return -1 + } + return 1 + + default: + panic("unsupported Arrow type in compareArrowValues") + } +} + +func compareNumeric[T int64 | int32 | int16 | int8 | uint64 | uint32 | uint16 | uint8](a, b T) int { + switch { + case a < b: + return -1 + case a > b: + return 1 + default: + return 0 + } +} + +func compareFloat[T float32 | float64](a, b T) int { + switch { + case a < b: + return -1 + case a > b: + return 1 + default: + return 0 + } +} +func extractValue(col arrow.Array, idx int) interface{} { + switch arr := col.(type) { + + case *array.String: + return arr.Value(idx) + + case *array.Int8: + return int64(arr.Value(idx)) + case *array.Int16: + return int64(arr.Value(idx)) + case *array.Int32: + return int64(arr.Value(idx)) + case *array.Int64: + return arr.Value(idx) + + case *array.Uint8: + return uint64(arr.Value(idx)) + case *array.Uint16: + return uint64(arr.Value(idx)) + case *array.Uint32: + return uint64(arr.Value(idx)) + case *array.Uint64: + return arr.Value(idx) + + case *array.Float32: + return float64(arr.Value(idx)) + case *array.Float64: + return arr.Value(idx) + + case *array.Boolean: + return arr.Value(idx) + + default: + panic("unsupported type") + } +} + +func sortBySortKeys(rows []heapRow, sortKeys []SortKey) { + sort.Slice(rows, func(i, j int) bool { + ri := rows[i] + rj := rows[j] + + for k, sk := range sortKeys { + cmp := comparePrimitive(ri.keys[k], rj.keys[k]) + + if cmp == 0 { + continue // move to next key + } + + if sk.Ascending { + return cmp < 0 + } else { + return cmp > 0 + } + } + + return false + }) +} + +func comparePrimitive(a, b any) int { + switch va := a.(type) { + + case int: + vb := b.(int) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case int8: + vb := b.(int8) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case int16: + vb := b.(int16) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case int32: + vb := b.(int32) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case int64: + vb := b.(int64) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case uint: + vb := b.(uint) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case uint8: + vb := b.(uint8) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case uint16: + vb := b.(uint16) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case uint32: + vb := b.(uint32) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case uint64: + vb := b.(uint64) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case float32: + vb := b.(float32) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case float64: + vb := b.(float64) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case string: + vb := b.(string) + switch { + case va < vb: + return -1 + case va > vb: + return 1 + default: + return 0 + } + + case bool: + vb := b.(bool) + if va == vb { + return 0 + } + if !va && vb { + return -1 + } + return 1 + + default: + panic("unsupported primitive type") + } +} + +func idxToArrowArray(v []uint64, mem memory.Allocator) arrow.Array { + // turn to array first + b := array.NewUint64Builder(mem) + defer b.Release() + for _, val := range v { + b.Append(val) + } + arr := b.NewArray() + return arr +} +func genoffsetTakeIdx(offset, size uint64, mem memory.Allocator) arrow.Array { + b := array.NewUint64Builder(mem) + defer b.Release() + for i := range size { + b.Append(offset + i) + } + return b.NewArray() +} diff --git a/src/Backend/opti-sql-go/operators/aggr/sort_test.go b/src/Backend/opti-sql-go/operators/aggr/sort_test.go index b919b31..20b8afe 100644 --- a/src/Backend/opti-sql-go/operators/aggr/sort_test.go +++ b/src/Backend/opti-sql-go/operators/aggr/sort_test.go @@ -1,7 +1,619 @@ package aggr -import "testing" +import ( + "errors" + "io" + "opti-sql-go/Expr" + "opti-sql-go/operators/project" + "testing" -func TestSort(t *testing.T) { + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/go-jose/go-jose/v4/testutils/require" +) + +func TestSortInit(t *testing.T) { // Simple passing test + t.Run("sort Exec init", func(t *testing.T) { + proj := aggProject() + sortExec, err := NewSortExec(proj, nil) + if err != nil { + t.Fatal(err) + } + if !sortExec.Schema().Equal(proj.Schema()) { + t.Fatalf("expected schema %v, got %v", proj.Schema(), sortExec.schema) + } + sortExec.done = true + _, err = sortExec.Next(100) + if err != io.EOF { + t.Fatalf("expected io.EOF error on done sortExec but got %v", err) + } + if sortExec.Close() != nil { + t.Fatalf("expected nil error on close but got %v", sortExec.Close()) + } + + }) + t.Run("SortKey options", func(t *testing.T) { + proj := aggProject() + _, err := NewSortExec(proj, []SortKey{*NewSortKey(col("id"), false, false)}) + if err != nil { + t.Fatal(err) + } + + }) + t.Run("top k sort exec init", func(t *testing.T) { + proj := aggProject() + topKVal := 5 + topK, err := NewTopKSortExec(proj, nil, uint16(topKVal)) + if err != nil { + t.Fatal(err) + } + if !topK.Schema().Equal(proj.Schema()) { + t.Fatalf("expected schema %v, got %v", proj.Schema(), topK.schema) + } + if topK.k != 5 { + t.Fatalf("expected %v for top k but got %v", topKVal, topK.k) + } + topK.done = true + _, err = topK.Next(100) + if err != io.EOF { + t.Fatalf("expected io.EOF error on done topK but got %v", err) + } + if topK.Close() != nil { + t.Fatalf("expected nil error on close but got %v", topK.Close()) + } + + }) +} + +func TestBasicSortExpr(t *testing.T) { + t.Run("Sort", func(t *testing.T) { + proj := aggProject() + nameExpr := Expr.NewColumnResolve("name") + nameSK := NewSortKey(nameExpr, true) + ageExpr := Expr.NewColumnResolve("age") + ageSK := NewSortKey(ageExpr, false) + _, err := NewSortExec(proj, CombineSortKeys(nameSK, ageSK)) + if err != nil { + t.Fatalf("unexpected error from NewSortExec : %v\n", err) + } + //t.Logf("%v\n", sortExec) + }) + t.Run("Basic Next operation", func(t *testing.T) { + proj := aggProject() + nameExpr := Expr.NewColumnResolve("name") + nameSK := NewSortKey(nameExpr, true) + ageExpr := Expr.NewColumnResolve("age") + ageSK := NewSortKey(ageExpr, false) + sortExec, err := NewSortExec(proj, CombineSortKeys(ageSK, nameSK)) + if err != nil { + t.Fatalf("unexpected error from NewSortExec : %v\n", err) + } + for { + sortedBatch, err := sortExec.Next(5) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + t.Fatalf("unexpected error from sortExec Next : %v\n", err) + } + t.Logf("%v\n", sortedBatch.PrettyPrint()) + } + }) +} +func TestFullSortOverNetwork(t *testing.T) { + t.Run("Full Sort of large file", func(t *testing.T) { + const fileName = "country_full.csv" + nr, err := project.NewStreamReader(fileName) + if err != nil { + t.Fatalf("failed to create s3 object: %v", err) + } + pj, err := project.NewProjectCSVLeaf(nr.Stream()) + if err != nil { + t.Fatalf("failed to create csv project source from s3 object: %v", err) + } + nameExpr := Expr.NewColumnResolve("name") + nameSK := NewSortKey(nameExpr, true) + sortExec, err := NewSortExec(pj, CombineSortKeys(nameSK)) + if err != nil { + t.Fatalf("unexpected error %v\n", err) + } + rc, err := sortExec.Next(10) + if err != nil { + t.Fatalf("unexpected error %v\n", err) + } + t.Logf("%v\n", rc.PrettyPrint()) + + }) + +} + +func TestFullSortExec_Next(t *testing.T) { + t.Parallel() + + t.Run("sort_age_DESC", func(t *testing.T) { + proj := aggProject() + + ageExpr := Expr.NewColumnResolve("age") + ageSK := NewSortKey(ageExpr, false) // DESC + + sortExec, err := NewSortExec(proj, CombineSortKeys(ageSK)) + require.NoError(t, err) + + batch, err := sortExec.Next(5) + require.NoError(t, err) + require.Equal(t, uint64(5), batch.RowCount) + + ages := batch.Columns[2].(*array.Int32) + got := []int32{ + ages.Value(0), + ages.Value(1), + ages.Value(2), + ages.Value(3), + ages.Value(4), + } + + expected := []int32{50, 48, 46, 45, 43} + for i, v := range expected { + if got[i] != v { + t.Fatalf("expected %v at index %d, but got %v", v, i, got[i]) + } + } + }) + + t.Run("sort_name_ASC", func(t *testing.T) { + proj := aggProject() + + nameExpr := Expr.NewColumnResolve("name") + nameSK := NewSortKey(nameExpr, true) + + sortExec, err := NewSortExec(proj, CombineSortKeys(nameSK)) + require.NoError(t, err) + + batch, err := sortExec.Next(3) + require.NoError(t, err) + + names := batch.Columns[1].(*array.String) + got := []string{ + names.Value(0), + names.Value(1), + names.Value(2), + } + + expected := []string{"Alice", "Bob", "Charlie"} + for i, v := range expected { + if got[i] != v { + t.Fatalf("expected %v at index %d, but got %v", v, i, got[i]) + } + } + }) +} + +// ----------------------------------------------------------------------------- +// TEST 2: sortIndexVector() +// ----------------------------------------------------------------------------- + +func TestSortIndexVector(t *testing.T) { + t.Parallel() + + mem := memory.NewGoAllocator() + + t.Run("single_key_int", func(t *testing.T) { + b := array.NewInt32Builder(mem) + b.AppendValues([]int32{30, 10, 20}, nil) + arr := b.NewArray() + defer arr.Release() + + keys := []arrow.Array{arr} + idVec := []uint64{0, 1, 2} + + sks := []SortKey{ + {Expr: nil, Ascending: true}, + } + + sortIndexVector(idVec, keys, sks) + + expected := []uint64{1, 2, 0} + for i, v := range expected { + if idVec[i] != v { + t.Fatalf("expected %v at index %d, but got %v", v, i, idVec[i]) + } + } + }) + + t.Run("single_key_string", func(t *testing.T) { + b := array.NewStringBuilder(mem) + b.AppendValues([]string{"Charlie", "Alice", "Bob"}, nil) + arr := b.NewArray() + defer arr.Release() + + keys := []arrow.Array{arr} + idVec := []uint64{0, 1, 2} + + sks := []SortKey{{Ascending: true}} + + sortIndexVector(idVec, keys, sks) + + expected := []uint64{1, 2, 0} + for i, v := range expected { + if idVec[i] != v { + t.Fatalf("expected %v at index %d, but got %v", v, i, idVec[i]) + } + } + }) +} + +// ----------------------------------------------------------------------------- +// TEST 3: compareArrowValues() +// ----------------------------------------------------------------------------- + +func TestCompareArrowValues(t *testing.T) { + t.Parallel() + + mem := memory.NewGoAllocator() + + t.Run("int", func(t *testing.T) { + b := array.NewInt32Builder(mem) + b.AppendValues([]int32{10, 20}, nil) + arr := b.NewArray() + defer arr.Release() + + require.Equal(t, -1, compareArrowValues(arr, 0, 1)) + require.Equal(t, 1, compareArrowValues(arr, 1, 0)) + require.Equal(t, 0, compareArrowValues(arr, 0, 0)) + }) + + t.Run("uint", func(t *testing.T) { + b := array.NewUint32Builder(mem) + b.AppendValues([]uint32{5, 7}, nil) + arr := b.NewArray() + defer arr.Release() + + require.Equal(t, -1, compareArrowValues(arr, 0, 1)) + require.Equal(t, 1, compareArrowValues(arr, 1, 0)) + }) + + t.Run("float", func(t *testing.T) { + b := array.NewFloat64Builder(mem) + b.AppendValues([]float64{1.5, 1.7}, nil) + arr := b.NewArray() + defer arr.Release() + + require.Equal(t, -1, compareArrowValues(arr, 0, 1)) + require.Equal(t, 1, compareArrowValues(arr, 1, 0)) + }) + + t.Run("string", func(t *testing.T) { + b := array.NewStringBuilder(mem) + b.AppendValues([]string{"a", "b"}, nil) + arr := b.NewArray() + defer arr.Release() + + require.Equal(t, -1, compareArrowValues(arr, 0, 1)) + require.Equal(t, 1, compareArrowValues(arr, 1, 0)) + }) + + t.Run("bool", func(t *testing.T) { + b := array.NewBooleanBuilder(mem) + b.AppendValues([]bool{false, true}, nil) + arr := b.NewArray() + defer arr.Release() + + require.Equal(t, -1, compareArrowValues(arr, 0, 1)) + require.Equal(t, 1, compareArrowValues(arr, 1, 0)) + }) +} +func TestCompareArrowValues_AllTypes(t *testing.T) { + mem := memory.NewGoAllocator() + + // helper to assert cmp result + assert := func(name string, got, want int) { + if got != want { + t.Fatalf("%s: expected %d, got %d", name, want, got) + } + } + + // ---- STRING ---- + strB := array.NewStringBuilder(mem) + strB.Append("apple") + strB.Append("banana") + strArr := strB.NewArray().(*array.String) + + assert("string lt", compareArrowValues(strArr, 0, 1), -1) + assert("string gt", compareArrowValues(strArr, 1, 0), 1) + assert("string eq", compareArrowValues(strArr, 0, 0), 0) + + strArr.Release() + strB.Release() + + // ---- INT TYPES ---- + int8Arr := buildInt8(mem, []int8{1, 3}) + assert("int8 lt", compareArrowValues(int8Arr, 0, 1), -1) + assert("int8 gt", compareArrowValues(int8Arr, 1, 0), 1) + assert("int8 eq", compareArrowValues(int8Arr, 0, 0), 0) + int8Arr.Release() + + int16Arr := buildInt16(mem, []int16{5, 2}) + assert("int16 gt", compareArrowValues(int16Arr, 0, 1), 1) + int16Arr.Release() + + int32Arr := buildInt32(mem, []int32{10, 10}) + assert("int32 eq", compareArrowValues(int32Arr, 0, 1), 0) + int32Arr.Release() + + int64Arr := buildInt64(mem, []int64{-5, 7}) + assert("int64 lt", compareArrowValues(int64Arr, 0, 1), -1) + int64Arr.Release() + + // ---- UINT TYPES ---- + u8Arr := buildUint8(mem, []uint8{9, 3}) + assert("uint8 gt", compareArrowValues(u8Arr, 0, 1), 1) + u8Arr.Release() + + u16Arr := buildUint16(mem, []uint16{3, 3}) + assert("uint16 eq", compareArrowValues(u16Arr, 0, 1), 0) + u16Arr.Release() + + u32Arr := buildUint32(mem, []uint32{3, 10}) + assert("uint32 lt", compareArrowValues(u32Arr, 0, 1), -1) + u32Arr.Release() + + u64Arr := buildUint64(mem, []uint64{100, 2}) + assert("uint64 gt", compareArrowValues(u64Arr, 0, 1), 1) + u64Arr.Release() + + // ---- FLOAT TYPES ---- + f32Arr := buildFloat32(mem, []float32{1.5, 1.5}) + assert("float32 eq", compareArrowValues(f32Arr, 0, 1), 0) + f32Arr.Release() + + f64Arr := buildFloat64(mem, []float64{-1.0, 2.3}) + assert("float64 lt", compareArrowValues(f64Arr, 0, 1), -1) + f64Arr.Release() + + // ---- BOOLEAN ---- + boolArr := buildBool(mem, []bool{false, true}) + assert("bool lt", compareArrowValues(boolArr, 0, 1), -1) + assert("bool gt", compareArrowValues(boolArr, 1, 0), 1) + assert("bool eq", compareArrowValues(boolArr, 1, 1), 0) + boolArr.Release() + + // ---- NULL CASES ---- + nullB := array.NewInt32Builder(mem) + nullB.AppendNull() + nullB.Append(10) + nullArr := nullB.NewArray().(*array.Int32) + + assert("null < value", compareArrowValues(nullArr, 0, 1), -1) + assert("value > null", compareArrowValues(nullArr, 1, 0), 1) + assert("null == null", compareArrowValues(nullArr, 0, 0), 0) + + nullArr.Release() + nullB.Release() + + // ---- UNSUPPORTED TYPE PANIC ---- + // Build a fixed-size binary array to trigger panic + fsb := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 2}) + fsb.Append([]byte{1, 2}) + fsb.Append([]byte{3, 4}) + fsArr := fsb.NewArray() + + didPanic := false + func() { + defer func() { + if recover() != nil { + didPanic = true + } + }() + _ = compareArrowValues(fsArr, 0, 1) + }() + if !didPanic { + t.Fatalf("expected panic for unsupported Arrow type") + } + + fsArr.Release() + fsb.Release() +} + +// Top-K sort tests kept simple and grouped into two test functions +func buildInt8(mem memory.Allocator, vals []int8) *array.Int8 { + b := array.NewInt8Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Int8) + b.Release() + return arr +} + +func buildInt16(mem memory.Allocator, vals []int16) *array.Int16 { + b := array.NewInt16Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Int16) + b.Release() + return arr +} + +func buildInt32(mem memory.Allocator, vals []int32) *array.Int32 { + b := array.NewInt32Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Int32) + b.Release() + return arr +} + +func buildInt64(mem memory.Allocator, vals []int64) *array.Int64 { + b := array.NewInt64Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Int64) + b.Release() + return arr +} + +func buildUint8(mem memory.Allocator, vals []uint8) *array.Uint8 { + b := array.NewUint8Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Uint8) + b.Release() + return arr +} + +func buildUint16(mem memory.Allocator, vals []uint16) *array.Uint16 { + b := array.NewUint16Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Uint16) + b.Release() + return arr +} + +func buildUint32(mem memory.Allocator, vals []uint32) *array.Uint32 { + b := array.NewUint32Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Uint32) + b.Release() + return arr +} + +func buildUint64(mem memory.Allocator, vals []uint64) *array.Uint64 { + b := array.NewUint64Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Uint64) + b.Release() + return arr +} + +func buildFloat32(mem memory.Allocator, vals []float32) *array.Float32 { + b := array.NewFloat32Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Float32) + b.Release() + return arr +} + +func buildFloat64(mem memory.Allocator, vals []float64) *array.Float64 { + b := array.NewFloat64Builder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Float64) + b.Release() + return arr +} + +func buildBool(mem memory.Allocator, vals []bool) *array.Boolean { + b := array.NewBooleanBuilder(mem) + for _, v := range vals { + b.Append(v) + } + arr := b.NewArray().(*array.Boolean) + b.Release() + return arr +} + +// Consolidated TopK tests: two functions with multiple subtests, placed at file bottom. +func TestTopKSort_BasicAndValues(t *testing.T) { + t.Run("AgeDesc_Top5", func(t *testing.T) { + proj := aggProject() + ageExpr := Expr.NewColumnResolve("age") + ageSK := NewSortKey(ageExpr, false) + + sortExec, err := NewTopKSortExec(proj, CombineSortKeys(ageSK), 5) + if err != nil { + t.Fatalf("NewTopKSortExec error: %v", err) + } + rb, err := sortExec.Next(5) + if err != nil { + t.Fatalf("Next failed: %v", err) + } + if rb.RowCount != 5 { + t.Fatalf("expected 5 rows, got %d", rb.RowCount) + } + ages := rb.Columns[2].(*array.Int32) + expected := []int32{50, 48, 46, 45, 43} + for i := range expected { + if ages.Value(i) != expected[i] { + t.Fatalf("age mismatch at %d: expected %v got %v", i, expected[i], ages.Value(i)) + } + } + for _, c := range rb.Columns { + c.Release() + } + if err := sortExec.Close(); err != nil { + t.Fatalf("close error: %v", err) + } + }) + + t.Run("KGreaterThanRows_ReturnsAll", func(t *testing.T) { + proj := aggProject() + ageExpr := Expr.NewColumnResolve("age") + ageSK := NewSortKey(ageExpr, false) + sortExec, err := NewTopKSortExec(proj, CombineSortKeys(ageSK), 100) + if err != nil { + t.Fatalf("NewTopKSortExec error: %v", err) + } + rb, err := sortExec.Next(1000) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("Next error: %v", err) + } + if rb.RowCount == 0 { + t.Fatalf("expected rows when K > total rows") + } + for _, c := range rb.Columns { + c.Release() + } + if err := sortExec.Close(); err != nil { + t.Fatalf("close error: %v", err) + } + }) +} + +func TestTopKSort_CombinedAndPagination(t *testing.T) { + t.Run("CombinedKeys_Pagination_TotalMatchesK", func(t *testing.T) { + proj := aggProject() + nameExpr := Expr.NewColumnResolve("name") + nameSK := NewSortKey(nameExpr, true) + ageExpr := Expr.NewColumnResolve("age") + ageSK := NewSortKey(ageExpr, false) + sortExec, err := NewTopKSortExec(proj, CombineSortKeys(ageSK, nameSK), 7) + if err != nil { + t.Fatalf("NewTopKSortExec error: %v", err) + } + total := uint64(0) + for _, sz := range []uint16{3, 3, 3} { + rb, err := sortExec.Next(sz) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("Next error: %v", err) + } + total += rb.RowCount + for _, c := range rb.Columns { + c.Release() + } + if errors.Is(err, io.EOF) { + break + } + } + if total != 7 { + t.Fatalf("expected total 7 rows, got %d", total) + } + if err := sortExec.Close(); err != nil { + t.Fatalf("close error: %v", err) + } + }) } diff --git a/src/Backend/opti-sql-go/operators/aggr/sum.go b/src/Backend/opti-sql-go/operators/aggr/sum.go deleted file mode 100644 index abd1ad5..0000000 --- a/src/Backend/opti-sql-go/operators/aggr/sum.go +++ /dev/null @@ -1 +0,0 @@ -package aggr diff --git a/src/Backend/opti-sql-go/operators/aggr/sum_test.go b/src/Backend/opti-sql-go/operators/aggr/sum_test.go deleted file mode 100644 index 485b9bb..0000000 --- a/src/Backend/opti-sql-go/operators/aggr/sum_test.go +++ /dev/null @@ -1,7 +0,0 @@ -package aggr - -import "testing" - -func TestSum(t *testing.T) { - // Simple passing test -} diff --git a/src/Backend/opti-sql-go/operators/filter/filter.go b/src/Backend/opti-sql-go/operators/filter/filter.go index ddd8c1b..d09f4a2 100644 --- a/src/Backend/opti-sql-go/operators/filter/filter.go +++ b/src/Backend/opti-sql-go/operators/filter/filter.go @@ -3,6 +3,7 @@ package filter import ( "context" "errors" + "fmt" "io" "opti-sql-go/Expr" "opti-sql-go/operators" @@ -10,6 +11,7 @@ import ( "github.com/apache/arrow/go/v17/arrow" "github.com/apache/arrow/go/v17/arrow/array" "github.com/apache/arrow/go/v17/arrow/compute" + "github.com/apache/arrow/go/v17/arrow/memory" ) var ( @@ -22,6 +24,9 @@ type FilterExec struct { schema *arrow.Schema predicate Expr.Expression done bool + // + bufferedCols []arrow.Array // not yet returned + bufferedSize int64 } func NewFilterExec(input operators.Operator, pred Expr.Expression) (*FilterExec, error) { @@ -29,49 +34,84 @@ func NewFilterExec(input operators.Operator, pred Expr.Expression) (*FilterExec, return nil, errors.New("predicates passed to FilterExec are invalid") } return &FilterExec{ - input: input, - predicate: pred, - schema: input.Schema(), + input: input, + predicate: pred, + schema: input.Schema(), + bufferedCols: make([]arrow.Array, input.Schema().NumFields()), }, nil } func (f *FilterExec) Next(n uint16) (*operators.RecordBatch, error) { - if n == 0 { - return nil, errors.New("must pass in wanted batch size > 0") - } - if f.done { + if f.done && f.bufferedSize == 0 { return nil, io.EOF } - batch, err := f.input.Next(n) - if err != nil { - return nil, err - } - booleanMask, err := Expr.EvalExpression(f.predicate, batch) - if err != nil { - return nil, err - } - boolArr, ok := booleanMask.(*array.Boolean) // impossible for this to not be a boolean array,assuming validPredicates works as it should - if !ok { - return nil, errors.New("predicate did not evaluate to boolean array") - } - filteredCol := make([]arrow.Array, len(batch.Columns)) - for i, col := range batch.Columns { - filteredCol[i], err = applyBooleanMask(col, boolArr) + mem := memory.NewGoAllocator() + for f.bufferedSize < int64(n) && !f.done { + childBatch, err := f.input.Next(n) + if err != nil { + if errors.Is(err, io.EOF) { + f.done = true + break // might be some in the buffer still + } + return nil, err + } + booleanMask, err := Expr.EvalExpression(f.predicate, childBatch) if err != nil { return nil, err } + boolArr, ok := booleanMask.(*array.Boolean) // impossible for this to not be a boolean array,assuming validPredicates works as it should + if !ok { + return nil, errors.New("predicate did not evaluate to boolean array") + } + filteredCol := make([]arrow.Array, len(childBatch.Columns)) + for i, col := range childBatch.Columns { + filteredCol[i], err = ApplyBooleanMask(col, boolArr) + if err != nil { + return nil, err + } + } + booleanMask.Release() + // combine with buffered columns + for i, col := range f.bufferedCols { + if col == nil { + f.bufferedCols[i] = filteredCol[i] + continue + } + // otherwise concate old + new + combined, err := array.Concatenate([]arrow.Array{col, filteredCol[i]}, mem) + if err != nil { + return nil, err + } + + // Release old buffer column + col.Release() + + f.bufferedCols[i] = combined + } + if len(childBatch.Columns) > 0 { + size := int64(filteredCol[0].Len()) + f.bufferedSize += int64(size) + } + } + if f.bufferedSize == 0 { + return nil, io.EOF } - // release old columns - for _, c := range batch.Columns { - c.Release() + toEmit := min(int64(n), f.bufferedSize) + out, err := f.sliceFilterCols(toEmit, mem) + if err != nil { + return nil, err } - size := uint64(filteredCol[0].Len()) + // subtract emitted rows from buffer; guard against accidental negative values + + size := uint64(out[0].Len()) - return &operators.RecordBatch{ - Schema: batch.Schema, - Columns: filteredCol, + rc := &operators.RecordBatch{ + Schema: f.schema, + Columns: out, RowCount: size, - }, nil + } + return rc, nil } + func (f *FilterExec) Schema() *arrow.Schema { return f.schema } @@ -80,9 +120,9 @@ func (f *FilterExec) Close() error { return f.input.Close() } -func applyBooleanMask(col arrow.Array, mask *array.Boolean) (arrow.Array, error) { +func ApplyBooleanMask(col arrow.Array, mask *array.Boolean) (arrow.Array, error) { datum, err := compute.Filter( - context.TODO(), + context.Background(), compute.NewDatum(col), compute.NewDatum(mask), *compute.DefaultFilterOptions(), @@ -123,17 +163,83 @@ func validPredicates(pred Expr.Expression, schema *arrow.Schema) bool { if err != nil { return false } + fmt.Printf("dt1:\t%v\ndt2:\t%v\n", dt1, dt2) if !arrow.TypeEqual(dt1, dt2) { return false } - // recursively validate children + fmt.Printf("left:\t%v\nright:\t%v\n", p.Left, p.Right) return validPredicates(p.Left, schema) && validPredicates(p.Right, schema) case *Expr.LiteralResolve: return true + case *Expr.NullCheckExpr: + return validPredicates(p.Expr, schema) + case *Expr.ScalarFunction: + return true default: return false } } + +func (f *FilterExec) sliceFilterCols(n int64, mem memory.Allocator) ([]arrow.Array, error) { + out := make([]arrow.Array, len(f.bufferedCols)) + + // Build index arrays for: + // 1) rows to emit: 0 .. n-1 + // 2) rows to keep: n .. f.bufferedSize-1 + emitIdx := array.NewInt64Builder(mem) + keepIdx := array.NewInt64Builder(mem) + + total := f.bufferedSize + limit := n + if limit > total { + limit = total + } + + // emit rows [0 , limit) + for i := int64(0); i < limit; i++ { + emitIdx.Append(i) + } + + // keep rows [limit , total) + for i := limit; i < total; i++ { + keepIdx.Append(i) + } + + emitArr := emitIdx.NewArray() + keepArr := keepIdx.NewArray() + emitIdx.Release() + keepIdx.Release() + defer emitArr.Release() + defer keepArr.Release() + + // For each column: materialize output slice + update buffer + ctx := context.Background() + for i, col := range f.bufferedCols { + // emit slice + sliceOut, err := compute.TakeArray(ctx, col, emitArr) + if err != nil { + return nil, err + } + out[i] = sliceOut + + // keep remaining slice + keepSlice, err := compute.TakeArray(ctx, col, keepArr) + if err != nil { + return nil, err + } + + // release old buffer column + col.Release() + + // store updated buffer + f.bufferedCols[i] = keepSlice + } + + // update size + f.bufferedSize = total - limit + + return out, nil +} diff --git a/src/Backend/opti-sql-go/operators/filter/filter_test.go b/src/Backend/opti-sql-go/operators/filter/filter_test.go index 8e531c9..8e90489 100644 --- a/src/Backend/opti-sql-go/operators/filter/filter_test.go +++ b/src/Backend/opti-sql-go/operators/filter/filter_test.go @@ -24,7 +24,7 @@ func TestFilterInit_1(t *testing.T) { predicate := Expr.NewBinaryExpr( Expr.NewColumnResolve("age"), Expr.GreaterThan, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 30), ) _, err := NewFilterExec(proj, predicate) if err != nil { @@ -50,7 +50,7 @@ func TestFilterInit_1(t *testing.T) { predicate := Expr.NewBinaryExpr( Expr.NewColumnResolve("does_not_exist"), Expr.Equal, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(1)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 1), ) _, err := NewFilterExec(proj, predicate) if err == nil { @@ -75,7 +75,7 @@ func TestFilterExec_BasicPredicates(t *testing.T) { pred := Expr.NewBinaryExpr( Expr.NewColumnResolve("age"), Expr.GreaterThan, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 30), ) f, _ := NewFilterExec(proj, pred) @@ -136,7 +136,7 @@ func TestFilterExec_BasicPredicates(t *testing.T) { pred := Expr.NewBinaryExpr( Expr.NewColumnResolve("salary"), Expr.LessThan, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(60000)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(60000.0)), ) f, _ := NewFilterExec(proj, pred) @@ -199,7 +199,7 @@ func TestFilterExec_EdgeCases(t *testing.T) { pred := Expr.NewBinaryExpr( Expr.NewColumnResolve("age"), Expr.GreaterThan, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(20)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 20), ) f, _ := NewFilterExec(proj, pred) @@ -216,7 +216,7 @@ func TestFilterExec_EdgeCases(t *testing.T) { pred := Expr.NewBinaryExpr( Expr.NewColumnResolve("age"), Expr.GreaterThan, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(0)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 0), ) f, _ := NewFilterExec(proj, pred) @@ -235,18 +235,17 @@ func TestFilterExec_EdgeCases(t *testing.T) { pred := Expr.NewBinaryExpr( Expr.NewColumnResolve("age"), Expr.Equal, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(-1)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, -1), ) f, _ := NewFilterExec(proj, pred) - rb, err := f.Next(20) - if err != nil { - t.Fatalf("unexpected: %v", err) + _, err := f.Next(20) + if err == nil { + t.Fatalf("expected EOF error but got nil") } - - if rb.RowCount != 0 { - t.Fatalf("expected 0 rows, got %d", rb.RowCount) + if !errors.Is(err, io.EOF) { + t.Fatalf("expected EOF error but got %v", err) } }) @@ -269,7 +268,7 @@ func TestFilterExec_EdgeCases(t *testing.T) { func TestFilterExecVariantCase(t *testing.T) { t.Run("filter done", func(t *testing.T) { proj := basicProject() - predicate := Expr.NewBinaryExpr(Expr.NewColumnResolve("age"), Expr.GreaterThan, Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30))) + predicate := Expr.NewBinaryExpr(Expr.NewColumnResolve("age"), Expr.GreaterThan, Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 30)) f, _ := NewFilterExec(proj, predicate) _, err := f.Next(1) if err != nil { @@ -284,7 +283,7 @@ func TestFilterExecVariantCase(t *testing.T) { }) t.Run("filter schema ", func(t *testing.T) { proj := basicProject() - predicate := Expr.NewBinaryExpr(Expr.NewColumnResolve("age"), Expr.GreaterThan, Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30))) + predicate := Expr.NewBinaryExpr(Expr.NewColumnResolve("age"), Expr.GreaterThan, Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 30)) f, _ := NewFilterExec(proj, predicate) t.Logf("%s", f.Schema()) if !f.schema.Equal(proj.Schema()) { @@ -294,7 +293,7 @@ func TestFilterExecVariantCase(t *testing.T) { }) t.Run("filter close ", func(t *testing.T) { proj := basicProject() - predicate := Expr.NewBinaryExpr(Expr.NewColumnResolve("age"), Expr.GreaterThan, Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30))) + predicate := Expr.NewBinaryExpr(Expr.NewColumnResolve("age"), Expr.GreaterThan, Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 30)) f, _ := NewFilterExec(proj, predicate) if f.Close() != nil { t.Fatalf("expected nil error on close") @@ -302,7 +301,7 @@ func TestFilterExecVariantCase(t *testing.T) { }) t.Run("filter unsupported binary operator ", func(t *testing.T) { proj := basicProject() - predicate := Expr.NewBinaryExpr(Expr.NewColumnResolve("age"), Expr.Addition, Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(30))) + predicate := Expr.NewBinaryExpr(Expr.NewColumnResolve("age"), Expr.Addition, Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 30)) _, err := NewFilterExec(proj, predicate) if err == nil { t.Fatalf("expected error for unsupported binary operator") @@ -319,3 +318,28 @@ func TestFilterExecVariantCase(t *testing.T) { }) } + +func TestFilterBuffer(t *testing.T) { + t.Run("test", func(t *testing.T) { + + proj := basicProject() + predicate := Expr.NewBinaryExpr( + Expr.NewColumnResolve("age"), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 30), + ) + f, err := NewFilterExec(proj, predicate) + if err != nil { + t.Fatalf("failed to create filter exec: %v", err) + } + _, err = f.Next(5) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + _, err = f.Next(5) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + }) +} diff --git a/src/Backend/opti-sql-go/operators/filter/limit.go b/src/Backend/opti-sql-go/operators/filter/limit.go index e4c93a5..6a5aa86 100644 --- a/src/Backend/opti-sql-go/operators/filter/limit.go +++ b/src/Backend/opti-sql-go/operators/filter/limit.go @@ -1,14 +1,23 @@ package filter import ( + "context" + "errors" "io" + "math" + "opti-sql-go/Expr" "opti-sql-go/operators" + "strings" "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/compute" + "github.com/apache/arrow/go/v17/arrow/memory" ) var ( _ = (operators.Operator)(&LimitExec{}) + _ = (operators.Operator)(&DistinctExec{}) ) type LimitExec struct { @@ -69,3 +78,178 @@ func (l *LimitExec) Schema() *arrow.Schema { func (l *LimitExec) Close() error { return l.input.Close() } + +type DistinctExec struct { + input operators.Operator + schema *arrow.Schema + colExpr []Expr.Expression // resolves to column that we want distinct values of + seenValues map[string]struct{} // arrow.Array.value(i) (stored and compared as string , structs occupie no space + distinctValuesArray []arrow.Array // hold arrays of distinct values + consumedOffset uint64 // where did we leave off at when returning the distinct arrays to the caller + consumedInput bool // did we consume all the input record batches? + totalRows uint64 + done bool +} + +func NewDistinctExec(input operators.Operator, colExpr []Expr.Expression) (*DistinctExec, error) { + if len(colExpr) == 0 { + return nil, errors.New("distinct operator requires at least one column expression") + } + return &DistinctExec{ + input: input, + schema: input.Schema(), + colExpr: colExpr, + seenValues: make(map[string]struct{}), + distinctValuesArray: make([]arrow.Array, len(input.Schema().Fields())), + }, nil +} + +// pipeline breaker. consume all, if row combonation is already seen, dont include in output +func (d *DistinctExec) Next(n uint16) (*operators.RecordBatch, error) { + if d.done { + return nil, io.EOF + } + mem := memory.NewGoAllocator() + ctx := context.Background() + if !d.consumedInput { + for { + childBatch, err := d.input.Next(math.MaxUint16) + if err != nil { + if errors.Is(err, io.EOF) { + d.consumedInput = true + if d.distinctValuesArray[0] != nil { // nill check in case of no distict elements being found or even just input operator doesnt return anything + d.totalRows = uint64(d.distinctValuesArray[0].Len()) + } + break + } + return nil, err + } + // resolve the columns we care about + evaluatedArrays := make([]arrow.Array, len(d.colExpr)) + for i := range d.colExpr { + arr, err := Expr.EvalExpression(d.colExpr[i], childBatch) + if err != nil { + return nil, err + } + evaluatedArrays[i] = arr + } + var idxTracker []int32 + // Now iterate through each row in the batch + numRows := int(childBatch.RowCount) + for rowIdx := 0; rowIdx < numRows; rowIdx++ { + // Build a key from the combination of values in this row + var keyBuilder strings.Builder + for colIdx, arr := range evaluatedArrays { + if colIdx > 0 { + keyBuilder.WriteString("|") // separator between columns + } + // Check if value is null + if arr.IsNull(rowIdx) { + keyBuilder.WriteString("NULL") + } else { + keyBuilder.WriteString(arr.ValueStr(rowIdx)) + } + } + + key := keyBuilder.String() + if _, seen := d.seenValues[key]; !seen { + d.seenValues[key] = struct{}{} + idxTracker = append(idxTracker, int32(rowIdx)) + // check if its been seen, if it hasnt been add it to the table, + // and keep track of the index so we can grab the value from the array + } + } + takeArray := idxToArrowArray(idxTracker, mem) + for i := range len(childBatch.Columns) { + largeArray := childBatch.Columns[i] + uniqueElements, err := compute.TakeArray(ctx, largeArray, takeArray) + if err != nil { + return nil, err + } + joinedArray, err := joinArrays(d.distinctValuesArray[i], uniqueElements, mem) + if err != nil { + return nil, err + } + // uniqueElements.Release() + d.distinctValuesArray[i] = joinedArray + } + } + } + var readsize uint64 + remaining := d.totalRows - d.consumedOffset + if remaining == 0 { // we've consumed all the distinct arrays, operator is finished + d.done = true + return nil, io.EOF + } + // If remaining >= n, read n. Otherwise read what's left. + if remaining >= uint64(n) { + readsize = uint64(n) + } else { + readsize = remaining + } + distinctArraySlice, err := d.consumeDistinctArrays(readsize, mem) + if err != nil { + return nil, err + } + + var rc uint64 + if len(distinctArraySlice) == 0 { + rc = 0 + } else { + rc = uint64(distinctArraySlice[0].Len()) + } + return &operators.RecordBatch{ + Schema: d.schema, + Columns: distinctArraySlice, + RowCount: rc, + }, nil +} +func (d *DistinctExec) Schema() *arrow.Schema { return d.schema } +func (d *DistinctExec) Close() error { + operators.ReleaseArrays(d.distinctValuesArray) + return d.input.Close() +} +func (d *DistinctExec) consumeDistinctArrays(readSize uint64, mem memory.Allocator) ([]arrow.Array, error) { + ctx := context.Background() + resultColumns := make([]arrow.Array, len(d.schema.Fields())) + offsetArray := genoffsetTakeIdx(d.consumedOffset, readSize, mem) + defer offsetArray.Release() + for i := range d.distinctValuesArray { + col := d.distinctValuesArray[i] + slice, err := compute.TakeArray(ctx, col, offsetArray) + if err != nil { + return nil, err + } + resultColumns[i] = slice + } + d.consumedOffset += readSize + return resultColumns, nil +} + +func idxToArrowArray(v []int32, mem memory.Allocator) arrow.Array { + // turn to array first + b := array.NewInt32Builder(mem) + defer b.Release() + for _, val := range v { + b.Append(val) + } + arr := b.NewArray() + return arr +} +func joinArrays(a1, a2 arrow.Array, mem memory.Allocator) (arrow.Array, error) { + if a1 == nil || a1.Len() == 0 { + return a2, nil + } + if a2 == nil || a2.Len() == 0 { + return a1, nil + } + return array.Concatenate([]arrow.Array{a1, a2}, mem) +} +func genoffsetTakeIdx(offset, size uint64, mem memory.Allocator) arrow.Array { + b := array.NewUint64Builder(mem) + defer b.Release() + for i := range size { + b.Append(offset + i) + } + return b.NewArray() +} diff --git a/src/Backend/opti-sql-go/operators/filter/limit_test.go b/src/Backend/opti-sql-go/operators/filter/limit_test.go index 64cd006..9516a89 100644 --- a/src/Backend/opti-sql-go/operators/filter/limit_test.go +++ b/src/Backend/opti-sql-go/operators/filter/limit_test.go @@ -9,6 +9,7 @@ import ( "github.com/apache/arrow/go/v17/arrow" "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/memory" ) func generateTestColumns() ([]string, []any) { @@ -45,11 +46,44 @@ func generateTestColumns() ([]string, []any) { return names, columns } +func generateTestColumnsDistinct() ([]string, []any) { + names := []string{ + "city", + "state", + "product", + } + columns := []any{ + // city - lots of repeated values + []string{ + "Boston", "Boston", "New York", "Boston", "Chicago", + "New York", "Boston", "Chicago", "New York", "Boston", + "Chicago", "Boston", "New York", "Chicago", "Boston", + }, + // state - corresponds to cities + []string{ + "MA", "MA", "NY", "MA", "IL", + "NY", "MA", "IL", "NY", "MA", + "IL", "MA", "NY", "IL", "MA", + }, + // product - repeated products + []string{ + "Laptop", "Phone", "Laptop", "Mouse", "Laptop", + "Phone", "Laptop", "Phone", "Tablet", "Mouse", + "Laptop", "Phone", "Laptop", "Tablet", "Mouse", + }, + } + return names, columns +} func basicProject() *project.InMemorySource { names, col := generateTestColumns() v, _ := project.NewInMemoryProjectExec(names, col) return v } +func distinctProject() *project.InMemorySource { + names, col := generateTestColumnsDistinct() + v, _ := project.NewInMemoryProjectExec(names, col) + return v +} func maskAny(t *testing.T, src *project.InMemorySource, expr Expr.Expression, expected []bool) { t.Helper() @@ -536,3 +570,259 @@ func TestLikeEdgeCases(t *testing.T) { maskAny(t, src, expr, expected) }) } + +// Distinct test cases + +func TestDistinctInit(t *testing.T) { + t.Run("distinct no expressions", func(t *testing.T) { + + proj := distinctProject() + exprs := []Expr.Expression{} + _, err := NewDistinctExec(proj, exprs) + if err == nil { + t.Fatalf("expected error from passing in no expressions to distinct operator but got nil") + } + }) + t.Run("distinct init and interface check", func(t *testing.T) { + proj := distinctProject() + exprs := []Expr.Expression{ + Expr.NewColumnResolve("city"), + } + distinctExec, err := NewDistinctExec(proj, exprs) + if err != nil { + t.Fatalf("unexpected error creating new distinct operator") + } + s := distinctExec.Schema() + if !s.Equal(proj.Schema()) { + t.Fatalf("distinct schema should be the exact same as input but recieved %v instead of %v", s, proj.Schema()) + } + t.Logf("distinct operator %v\n", distinctExec) + if err := distinctExec.Close(); err != nil { + t.Fatalf("unexpected error occured closing operator %v\n", err) + } + distinctExec.done = true + _, err = distinctExec.Next(3) + if !errors.Is(err, io.EOF) { + t.Fatalf("expected io.EOF but got %v\n", err) + } + }) + t.Run("Basic Next operator test", func(t *testing.T) { + proj := distinctProject() + exprs := []Expr.Expression{ + Expr.NewColumnResolve("city"), + } + distinctExec, err := NewDistinctExec(proj, exprs) + if err != nil { + t.Fatalf("unexpected error creating new distinct operator") + } + rc, err := distinctExec.Next(5) + if err != nil { + t.Fatalf("error occured grabbing next values from distinct operator %v", err) + } + t.Logf("rc:\t%v\n", rc.PrettyPrint()) + + }) + t.Run("BasicNextOperatorWithMultipleDistinctColumns", func(t *testing.T) { + proj := distinctProject() + exprs := []Expr.Expression{ + Expr.NewColumnResolve("city"), + Expr.NewColumnResolve("state"), + } + distinctExec, err := NewDistinctExec(proj, exprs) + if err != nil { + t.Fatalf("unexpected error creating new distinct operator") + } + rc, err := distinctExec.Next(5) + if err != nil { + t.Fatalf("error occured grabbing next values from distinct operator %v", err) + } + t.Logf("rc:\t%v\n", rc.PrettyPrint()) + + }) +} +func TestDistinctNext(t *testing.T) { + t.Run("return limited columns", func(t *testing.T) { + proj := distinctProject() + exprs := []Expr.Expression{ + Expr.NewColumnResolve("city"), + Expr.NewColumnResolve("state"), + } + distinctExec, err := NewDistinctExec(proj, exprs) + if err != nil { + t.Fatalf("unexpected error creating new distinct operator") + } + batchsize := 1 + count := 0 + for { + rc, err := distinctExec.Next(uint16(batchsize)) + if err != nil { + if errors.Is(err, io.EOF) { + break + } + t.Fatalf("error occured grabbing next values from distinct operator %v", err) + } + t.Logf("\t%v\n", rc.PrettyPrint()) + if rc.RowCount != uint64(batchsize) { + t.Fatalf("expected record batch of size %d but got %d", batchsize, rc.RowCount) + } + count += int(rc.RowCount) + } + // distinctProject has 3 distinct (city,state) combinations + if count != 3 { + t.Fatalf("expected total distinct rows 3, got %d", count) + } + }) + + t.Run("single column distinct returns expected order", func(t *testing.T) { + proj := distinctProject() + exprs := []Expr.Expression{ + Expr.NewColumnResolve("city"), + } + distinctExec, err := NewDistinctExec(proj, exprs) + if err != nil { + t.Fatalf("unexpected error creating new distinct operator") + } + // request all in one go + rc, err := distinctExec.Next(10) + if err != nil { + t.Fatalf("Next failed: %v", err) + } + if rc.RowCount != 3 { + t.Fatalf("expected 3 distinct cities, got %d", rc.RowCount) + } + // Expect first-seen order: Boston, New York, Chicago + cityArr := rc.Columns[0].(*array.String) + expect := []string{"Boston", "New York", "Chicago"} + for i := 0; i < int(rc.RowCount); i++ { + if cityArr.Value(i) != expect[i] { + t.Fatalf("expected city %s at idx %d, got %s", expect[i], i, cityArr.Value(i)) + } + } + for _, c := range rc.Columns { + c.Release() + } + }) + + t.Run("Next returns EOF after consumption and Close works", func(t *testing.T) { + proj := distinctProject() + exprs := []Expr.Expression{ + Expr.NewColumnResolve("city"), + Expr.NewColumnResolve("state"), + } + distinctExec, err := NewDistinctExec(proj, exprs) + if err != nil { + t.Fatalf("unexpected error creating new distinct operator") + } + // consume all + _, err = distinctExec.Next(10) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error while consuming distinct results: %v", err) + // it's ok if we got results; call Next again until EOF + } + // subsequent Next should return EOF + _, err = distinctExec.Next(1) + if !errors.Is(err, io.EOF) { + t.Fatalf("expected EOF after consuming distinct results, got %v", err) + } + if err := distinctExec.Close(); err != nil { + t.Fatalf("unexpected error on Close: %v", err) + } + }) +} + +func TestJoinArrays(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("first array nil or empty - returns second", func(t *testing.T) { + builder := array.NewInt32Builder(mem) + defer builder.Release() + builder.AppendValues([]int32{1, 2, 3}, nil) + a2 := builder.NewArray() + defer a2.Release() + + // Test with nil + result, err := joinArrays(nil, a2, mem) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Len() != 3 { + t.Fatalf("expected length 3, got %d", result.Len()) + } + + // Test with empty array + emptyBuilder := array.NewInt32Builder(mem) + defer emptyBuilder.Release() + a1Empty := emptyBuilder.NewArray() + defer a1Empty.Release() + + result, err = joinArrays(a1Empty, a2, mem) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Len() != 3 { + t.Fatalf("expected length 3, got %d", result.Len()) + } + }) + + t.Run("second array nil or empty - returns first", func(t *testing.T) { + builder := array.NewInt32Builder(mem) + defer builder.Release() + builder.AppendValues([]int32{4, 5, 6}, nil) + a1 := builder.NewArray() + defer a1.Release() + + // Test with nil + result, err := joinArrays(a1, nil, mem) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Len() != 3 { + t.Fatalf("expected length 3, got %d", result.Len()) + } + + // Test with empty array + emptyBuilder := array.NewInt32Builder(mem) + defer emptyBuilder.Release() + a2Empty := emptyBuilder.NewArray() + defer a2Empty.Release() + + result, err = joinArrays(a1, a2Empty, mem) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Len() != 3 { + t.Fatalf("expected length 3, got %d", result.Len()) + } + }) + + t.Run("both arrays have data - concatenates", func(t *testing.T) { + builder1 := array.NewInt32Builder(mem) + defer builder1.Release() + builder1.AppendValues([]int32{1, 2, 3}, nil) + a1 := builder1.NewArray() + defer a1.Release() + + builder2 := array.NewInt32Builder(mem) + defer builder2.Release() + builder2.AppendValues([]int32{4, 5, 6}, nil) + a2 := builder2.NewArray() + defer a2.Release() + + result, err := joinArrays(a1, a2, mem) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result.Len() != 6 { + t.Fatalf("expected length 6, got %d", result.Len()) + } + + // Verify concatenated values + int32Result := result.(*array.Int32) + expectedValues := []int32{1, 2, 3, 4, 5, 6} + for i := 0; i < int32Result.Len(); i++ { + if int32Result.Value(i) != expectedValues[i] { + t.Fatalf("at index %d: expected %d, got %d", i, expectedValues[i], int32Result.Value(i)) + } + } + }) +} diff --git a/src/Backend/opti-sql-go/operators/project/custom.go b/src/Backend/opti-sql-go/operators/project/custom.go index e36fa0c..0816600 100644 --- a/src/Backend/opti-sql-go/operators/project/custom.go +++ b/src/Backend/opti-sql-go/operators/project/custom.go @@ -73,6 +73,35 @@ func (ms *InMemorySource) withFields(names ...string) error { ms.columns = cols return nil } +func NewInMemoryProjectExecFromArrays(names []string, arrays []arrow.Array) (*InMemorySource, error) { + if len(names) != len(arrays) { + return nil, operators.ErrInvalidSchema("number of column names and arrays do not match") + } + + fields := make([]arrow.Field, len(names)) + fieldToColIdx := make(map[string]int, len(names)) + + for i, arr := range arrays { + if arr == nil { + return nil, operators.ErrInvalidSchema(fmt.Sprintf("nil array for column %s", names[i])) + } + + fields[i] = arrow.Field{ + Name: names[i], + Type: arr.DataType(), + Nullable: true, // Arrow arrays may have null bitmaps + } + + fieldToColIdx[names[i]] = i + } + + return &InMemorySource{ + schema: arrow.NewSchema(fields, nil), + columns: arrays, + fieldToColIDx: fieldToColIdx, + }, nil +} + func (ms *InMemorySource) Next(n uint16) (*operators.RecordBatch, error) { if len(ms.columns) == 0 || ms.pos >= uint16(ms.columns[0].Len()) { return nil, io.EOF // EOF diff --git a/src/Backend/opti-sql-go/operators/project/parquet.go b/src/Backend/opti-sql-go/operators/project/parquet.go index 94b6e1d..50aa856 100644 --- a/src/Backend/opti-sql-go/operators/project/parquet.go +++ b/src/Backend/opti-sql-go/operators/project/parquet.go @@ -22,12 +22,10 @@ var ( ) type ParquetSource struct { - // existing fields schema *arrow.Schema projectionPushDown []string // columns to project up reader pqarrow.RecordReader - // for internal reading - done bool // if set to true always return io.EOF + done bool // if set to true always return io.EOF } func NewParquetSource(r parquet.ReaderAtSeeker) (*ParquetSource, error) { @@ -45,13 +43,13 @@ func NewParquetSource(r parquet.ReaderAtSeeker) (*ParquetSource, error) { arrowReader, err := pqarrow.NewFileReader( filerReader, - pqarrow.ArrowReadProperties{Parallel: true, BatchSize: int64(Config.Batch.Size)}, // TODO: Read in from config for this stuff + pqarrow.ArrowReadProperties{Parallel: true, BatchSize: int64(Config.Batch.Size)}, allocator, ) if err != nil { return nil, err } - rdr, err := arrowReader.GetRecordReader(context.TODO(), nil, nil) + rdr, err := arrowReader.GetRecordReader(context.Background(), nil, nil) if err != nil { return nil, err } @@ -84,7 +82,7 @@ func NewParquetSourcePushDown(r parquet.ReaderAtSeeker, columns []string) (*Parq arrowReader, err := pqarrow.NewFileReader( filerReader, - pqarrow.ArrowReadProperties{Parallel: true, BatchSize: int64(Config.Batch.Size)}, // TODO: Read in from config for this stuff + pqarrow.ArrowReadProperties{Parallel: true, BatchSize: int64(Config.Batch.Size)}, allocator, ) if err != nil { @@ -100,7 +98,7 @@ func NewParquetSourcePushDown(r parquet.ReaderAtSeeker, columns []string) (*Parq wantedColumnsIDX = append(wantedColumnsIDX, idx_array...) } - rdr, err := arrowReader.GetRecordReader(context.TODO(), wantedColumnsIDX, nil) + rdr, err := arrowReader.GetRecordReader(context.Background(), wantedColumnsIDX, nil) if err != nil { return nil, err } diff --git a/src/Backend/opti-sql-go/operators/project/parquet_test.go b/src/Backend/opti-sql-go/operators/project/parquet_test.go index ff28535..c051d9f 100644 --- a/src/Backend/opti-sql-go/operators/project/parquet_test.go +++ b/src/Backend/opti-sql-go/operators/project/parquet_test.go @@ -34,7 +34,6 @@ schema: metadata: ["PARQUET:field_id": "-1"] - lon: type=float64, nullable */ -// TODO: more to their own files later down the line func existIn(str string, arr []string) bool { for _, a := range arr { if a == str { diff --git a/src/Backend/opti-sql-go/operators/project/projectExec.go b/src/Backend/opti-sql-go/operators/project/projectExec.go index 9d93d96..abd3da8 100644 --- a/src/Backend/opti-sql-go/operators/project/projectExec.go +++ b/src/Backend/opti-sql-go/operators/project/projectExec.go @@ -20,7 +20,7 @@ var ( ) type ProjectExec struct { - child operators.Operator + input operators.Operator outputschema arrow.Schema expr []Expr.Expression done bool @@ -41,6 +41,17 @@ func NewProjectExec(input operators.Operator, exprs []Expr.Expression) (*Project Type: tp, Nullable: true, } + case *Expr.ColumnResolve: + tp, err := Expr.ExprDataType(ex, input.Schema()) + if err != nil { + return nil, fmt.Errorf("project exec: failed to get expression data type for expr %d: %w", i, err) + } + fields[i] = arrow.Field{ + Name: ex.Name, + Type: tp, + Nullable: true, + } + default: name := fmt.Sprintf("col_%d", i) Type, err := Expr.ExprDataType(e, input.Schema()) @@ -60,7 +71,7 @@ func NewProjectExec(input operators.Operator, exprs []Expr.Expression) (*Project outputschema := arrow.NewSchema(fields, nil) // return new exec return &ProjectExec{ - child: input, + input: input, outputschema: *outputschema, expr: exprs, }, nil @@ -73,7 +84,7 @@ func (p *ProjectExec) Next(n uint16) (*operators.RecordBatch, error) { return nil, io.EOF } - childBatch, err := p.child.Next(n) + childBatch, err := p.input.Next(n) if err != nil { return nil, err } @@ -94,9 +105,7 @@ func (p *ProjectExec) Next(n uint16) (*operators.RecordBatch, error) { outPutCols[i] = arr arr.Retain() } - for _, c := range childBatch.Columns { - c.Release() - } + operators.ReleaseArrays(childBatch.Columns) return &operators.RecordBatch{ Schema: &p.outputschema, Columns: outPutCols, @@ -104,7 +113,7 @@ func (p *ProjectExec) Next(n uint16) (*operators.RecordBatch, error) { }, nil } func (p *ProjectExec) Close() error { - return p.child.Close() + return p.input.Close() } func (p *ProjectExec) Schema() *arrow.Schema { return &p.outputschema diff --git a/src/Backend/opti-sql-go/operators/project/projectExecExpr_test.go b/src/Backend/opti-sql-go/operators/project/projectExecExpr_test.go index 354db56..3832a39 100644 --- a/src/Backend/opti-sql-go/operators/project/projectExecExpr_test.go +++ b/src/Backend/opti-sql-go/operators/project/projectExecExpr_test.go @@ -420,7 +420,7 @@ func TestProjectExec_CastLiteral_Column(t *testing.T) { exprs := []Expr.Expression{ Expr.NewCastExpr( - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int64, int64(4)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int64, 4), arrow.PrimitiveTypes.Float64, ), } @@ -486,7 +486,7 @@ func TestProjectExec_Column_Literal(t *testing.T) { Expr.NewBinaryExpr( Expr.NewColumnResolve("age"), Expr.Addition, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int8, int8(10)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int8, 10), ), ) @@ -511,7 +511,7 @@ func TestProjectExec_Column_Literal(t *testing.T) { Expr.NewBinaryExpr( Expr.NewColumnResolve("score"), Expr.Subtraction, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float32, float32(5.0)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float32, 5.0), ), ) @@ -536,7 +536,7 @@ func TestProjectExec_Column_Literal(t *testing.T) { Expr.NewBinaryExpr( Expr.NewColumnResolve("id"), Expr.Multiplication, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int64, int64(2)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int64, 2), ), ) @@ -558,7 +558,7 @@ func TestProjectExec_Column_Literal(t *testing.T) { Expr.NewBinaryExpr( Expr.NewColumnResolve("score"), Expr.Division, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float32, float32(2)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float32, 2.0), ), ) @@ -607,7 +607,7 @@ func TestProjectExec_AliasExpr(t *testing.T) { Expr.NewBinaryExpr( Expr.NewColumnResolve("age"), Expr.Addition, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int8, int8(10)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int8, 10), ), "boosted_age", ), @@ -635,7 +635,7 @@ func TestProjectExec_AliasExpr(t *testing.T) { exprs := Expr.NewExpressions( Expr.NewAlias( - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(7)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 7), "constant_value", ), ) @@ -659,9 +659,9 @@ func TestProjectExec_AliasExpr(t *testing.T) { memSrc, _ := NewInMemoryProjectExec(names, cols) inner := Expr.NewBinaryExpr( - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(2)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 2), Expr.Addition, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, int32(3)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 3), ) exprs := Expr.NewExpressions( @@ -741,7 +741,7 @@ func TestProjectExec_FunctionExpr(t *testing.T) { t.Run("LOWER('MonKey_x')", func(t *testing.T) { memSrc, _ := NewInMemoryProjectExec(names, cols) - expr := Expr.NewLiteralResolve(arrow.BinaryTypes.String, string("MoNKey_X")) + expr := Expr.NewLiteralResolve(arrow.BinaryTypes.String, "MoNKey_X") exprs := Expr.NewExpressions( Expr.NewScalarFunction( @@ -779,7 +779,7 @@ func TestProjectExec_FunctionExpr(t *testing.T) { Expr.NewBinaryExpr( Expr.NewColumnResolve("score"), Expr.Subtraction, - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float32, float32(100)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float32, float32(100.0)), ), ) @@ -811,7 +811,7 @@ func TestProjectExec_FunctionExpr(t *testing.T) { expr := Expr.NewScalarFunction( Expr.Round, Expr.NewBinaryExpr( - Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(2.5)), + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 2.5), Expr.Multiplication, Expr.NewColumnResolve("score"), ), @@ -834,10 +834,3 @@ func TestProjectExec_FunctionExpr(t *testing.T) { } }) } - -/* -complex expr -ex: alias(function(column |operator| literal) |operator| literal) -TODO: not the most important thing right now since we know basic expression are fine -*/ -func TestProjectExec_ComplexExpr(t *testing.T) {} diff --git a/src/Backend/opti-sql-go/operators/record.go b/src/Backend/opti-sql-go/operators/record.go index 60f695b..6678ef4 100644 --- a/src/Backend/opti-sql-go/operators/record.go +++ b/src/Backend/opti-sql-go/operators/record.go @@ -24,7 +24,7 @@ type Operator interface { type RecordBatch struct { Schema *arrow.Schema Columns []arrow.Array - RowCount uint64 // TODO: update to actually use this, in all operators + RowCount uint64 // } type SchemaBuilder struct { @@ -129,6 +129,7 @@ func (rb *RecordBatch) ColumnByName(name string) (arrow.Array, error) { } return rb.Columns[indices[0]], nil } + func (rbb *RecordBatchBuilder) GenIntArray(values ...int) arrow.Array { mem := memory.NewGoAllocator() builder := array.NewInt32Builder(mem) @@ -289,3 +290,122 @@ func (rbb *RecordBatchBuilder) GenLargeBinaryArray(values ...[]byte) arrow.Array } return builder.NewArray() } +func ReleaseArrays(a []arrow.Array) { + for _, col := range a { + if col != nil { + col.Release() + } + } +} + +func (rb *RecordBatch) PrettyPrint() string { + if rb == nil { + return "" + } + + // ------------------------------- + // 1. Extract column names + // ------------------------------- + colNames := make([]string, len(rb.Schema.Fields())) + for i, f := range rb.Schema.Fields() { + colNames[i] = f.Name + } + + // ------------------------------- + // 2. Extract rows into [][]string + // ------------------------------- + rows := make([][]string, rb.RowCount) + for r := 0; r < int(rb.RowCount); r++ { + row := make([]string, len(rb.Columns)) + for c, arr := range rb.Columns { + row[c] = formatValue(arr, r) + } + rows[r] = row + } + + // ------------------------------- + // 3. Compute column widths + // ------------------------------- + colWidths := make([]int, len(colNames)) + for i, name := range colNames { + colWidths[i] = len(name) + } + for _, row := range rows { + for i, v := range row { + if len(v) > colWidths[i] { + colWidths[i] = len(v) + } + } + } + + // ------------------------------- + // 4. Build horizontal border line + // ------------------------------- + border := "+" + for _, w := range colWidths { + border += strings.Repeat("-", w+2) + "+" + } + + // ------------------------------- + // 5. Build the final output + // ------------------------------- + var b strings.Builder + + b.WriteString(border + "\n") + + // Header + b.WriteString("|") + for i, name := range colNames { + b.WriteString(" " + padRight(name, colWidths[i]) + " |") + } + b.WriteString("\n") + + b.WriteString(border + "\n") + + // Rows + for _, row := range rows { + b.WriteString("|") + for i, v := range row { + b.WriteString(" " + padRight(v, colWidths[i]) + " |") + } + b.WriteString("\n") + } + + b.WriteString(border) + + return b.String() +} + +// ------------------------------- +// Helper Functions +// ------------------------------- + +func padRight(s string, width int) string { + if len(s) >= width { + return s + } + return s + strings.Repeat(" ", width-len(s)) +} + +func formatValue(arr arrow.Array, row int) string { + if arr.IsNull(row) { + return "NULL" + } + + switch col := arr.(type) { + case *array.Int32: + return fmt.Sprintf("%d", col.Value(row)) + case *array.Int64: + return fmt.Sprintf("%d", col.Value(row)) + case *array.Float32: + return fmt.Sprintf("%g", col.Value(row)) + case *array.Float64: + return fmt.Sprintf("%g", col.Value(row)) + case *array.String: + return col.Value(row) + case *array.Boolean: + return fmt.Sprintf("%t", col.Value(row)) + default: + return "" + } +} diff --git a/src/Backend/opti-sql-go/operators/test/intergration_test.go b/src/Backend/opti-sql-go/operators/test/intergration_test.go new file mode 100644 index 0000000..15786a9 --- /dev/null +++ b/src/Backend/opti-sql-go/operators/test/intergration_test.go @@ -0,0 +1,864 @@ +package test + +import ( + "errors" + "fmt" + "io" + "opti-sql-go/Expr" + "opti-sql-go/operators" + join "opti-sql-go/operators/Join" + aggr "opti-sql-go/operators/aggr" + "opti-sql-go/operators/filter" + "opti-sql-go/operators/project" + "os" + "testing" + + "github.com/apache/arrow/go/v17/arrow" +) + +/* +composes individual operators into one another to test multiple together +*/ +const ( + source1Path = "../../../test_data/csv/intergration_test_data_1.csv" + source2Path = "../../../test_data/csv/intergration_test_data_2.csv" +) + +/* +column names: +id,username,email_address,is_active,age_years,account_balance_usd,average_session_minutes,favorite_color +*/ +func source1Project() operators.Operator { + f, err := os.Open(source1Path) + if err != nil { + panic(fmt.Sprintf("failed to open source file: %v", err)) + } + p, _ := project.NewProjectCSVLeaf(f) + return p +} + +/* +colunn names: +id,department_name,manager_name,manager_email +*/ +func source2Project() operators.Operator { + f, err := os.Open(source2Path) + if err != nil { + panic(fmt.Sprintf("failed to open source file: %v", err)) + } + p, _ := project.NewProjectCSVLeaf(f) + return p +} +func TestPrettyPrintSources(t *testing.T) { + p1, p2 := source1Project(), source2Project() + rc1, _ := p1.Next(5) + rc2, _ := p2.Next(5) + + t.Logf("source 1 batch: %v\n", rc1.PrettyPrint()) + t.Logf("source 2 batch: %v\n", rc2.PrettyPrint()) +} + +// TestSelectFilterLimit contains two subtests that build pipelines +// combining Select (project), Filter, and Limit for source1 CSV. +// Each subtest constructs the pipeline, calls Next once, and prints the +// resulting batch via PrettyPrint. +/* +(1) +Operators : Select, Filter, Limit +sql query: +(1.A)SELECT id, username, age_years FROM source1 WHERE age_years > 30 LIMIT 10; +(1.B)SELECT username, age_years +FROM source1 +WHERE is_active = true AND age_years < 25 +LIMIT 3; +(1.C)SELECT id, favorite_color +FROM source1 +WHERE favorite_color = 'Red' +LIMIT 7; +*/ + +func TestSelectFilterLimit(t *testing.T) { + // (1.A) SELECT id, username, age_years FROM source1 WHERE age_years > 30 LIMIT 10; + t.Run("1A", func(t *testing.T) { + // (1.A) SELECT id, username, age_years FROM source1 WHERE age_years > 30 LIMIT 10; + src := source1Project() + t.Logf("\t%v\n", src.Schema()) + + pred := Expr.NewBinaryExpr( + Expr.NewColumnResolve("age_years"), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int64, 30), + ) + + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + projExprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewColumnResolve("username"), + Expr.NewColumnResolve("age_years"), + ) + proj, err := project.NewProjectExec(filt, projExprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + lim, err := filter.NewLimitExec(proj, 10) + if err != nil { + t.Fatalf("limit init failed: %v", err) + } + + batch, err := lim.Next(10) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + + if batch == nil { + t.Logf("(1A) got nil batch (possibly EOF)") + return + } + + t.Logf("(1A) batch:\n%v\n", batch.PrettyPrint()) + }) + + // (1.B) SELECT username, age_years FROM source1 WHERE is_active = true AND age_years < 25 LIMIT 3; + t.Run("1B", func(t *testing.T) { + src := source1Project() + + left := Expr.NewBinaryExpr( + Expr.NewColumnResolve("is_active"), + Expr.Equal, + Expr.NewLiteralResolve(arrow.FixedWidthTypes.Boolean, true), + ) + right := Expr.NewBinaryExpr( + Expr.NewColumnResolve("age_years"), + Expr.LessThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int64, 25), + ) + pred := Expr.NewBinaryExpr(left, Expr.And, right) + + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + projExprs := Expr.NewExpressions( + Expr.NewColumnResolve("username"), + Expr.NewColumnResolve("age_years"), + ) + proj, err := project.NewProjectExec(filt, projExprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + lim, err := filter.NewLimitExec(proj, 3) + if err != nil { + t.Fatalf("limit init failed: %v", err) + } + + batch, err := lim.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + + if batch == nil { + t.Logf("(1B) got nil batch (possibly EOF)") + return + } + + t.Logf("(1B) batch:\n%v\n", batch.PrettyPrint()) + }) + // (1.C) SELECT id, favorite_color FROM source1 WHERE favorite_color = 'Red' LIMIT 7; + t.Run("1C", func(t *testing.T) { + src := source1Project() + + pred := Expr.NewBinaryExpr( + Expr.NewColumnResolve("favorite_color"), + Expr.Equal, + Expr.NewLiteralResolve(arrow.BinaryTypes.String, "Red"), + ) + + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + projExprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewColumnResolve("favorite_color"), + ) + proj, err := project.NewProjectExec(filt, projExprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + lim, err := filter.NewLimitExec(proj, 7) + if err != nil { + t.Fatalf("limit init failed: %v", err) + } + + batch, err := lim.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + + if batch == nil { + t.Logf("(1C) got nil batch (possibly EOF)") + return + } + + t.Logf("(1C) batch:\n%v\n", batch.PrettyPrint()) + }) + +} + +// ------------------------------------------------------------------------- +// (2) Operators: Filter, Scalar functions +// (2.A) SELECT id, username, LOWER(favorite_color) as fav_color_lower FROM source1 WHERE UPPER(favorite_color) = 'BLUE'; +// (2.B) SELECT username, LOWER(email_address) AS email_lower FROM source1 WHERE UPPER(username) = 'ALICE'; +func TestFilterScalarFunctions(t *testing.T) { + // (2.A) SELECT id, username, LOWER(favorite_color) as fav_color_lower FROM source1 WHERE UPPER(favorite_color) = 'BLUE'; + t.Run("2A", func(t *testing.T) { + src := source1Project() + + pred := Expr.NewBinaryExpr( + Expr.NewScalarFunction(Expr.Upper, Expr.NewColumnResolve("favorite_color")), + Expr.Equal, + Expr.NewLiteralResolve(arrow.BinaryTypes.String, "BLUE"), + ) + + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewColumnResolve("username"), + Expr.NewAlias(Expr.NewScalarFunction(Expr.Lower, Expr.NewColumnResolve("favorite_color")), "fav_color_lower"), + ) + proj, err := project.NewProjectExec(filt, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + batch, err := proj.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(2A) got nil batch (possibly EOF)") + return + } + t.Logf("(2A) batch:\n%v\n", batch.PrettyPrint()) + }) + + // (2.B) SELECT username, LOWER(email_address) AS email_lower FROM source1 WHERE UPPER(username) = 'ALICE'; + t.Run("2B", func(t *testing.T) { + src := source1Project() + + pred := Expr.NewBinaryExpr( + Expr.NewScalarFunction(Expr.Upper, Expr.NewColumnResolve("username")), + Expr.Equal, + Expr.NewLiteralResolve(arrow.BinaryTypes.String, "ALICE"), + ) + + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("username"), + Expr.NewAlias(Expr.NewScalarFunction(Expr.Lower, Expr.NewColumnResolve("email_address")), "email_lower"), + ) + proj, err := project.NewProjectExec(filt, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + batch, err := proj.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch != nil { + t.Fatalf("was expecting an empty batch but recieved %s\n", batch.PrettyPrint()) + return + } + }) +} + +// ------------------------------------------------------------------------- +// (3) Operators: select, Sort +// (3.A) SELECT id, account_balance_usd, username FROM source1 ORDER BY account_balance_usd ASC +// (3.B) SELECT id, favorite_color FROM source1 ORDER BY favorite_color ASC; +func TestSelectSort(t *testing.T) { + // (3.A) SELECT id, account_balance_usd, username FROM source1 ORDER BY account_balance_usd ASC + t.Run("3A", func(t *testing.T) { + src := source1Project() + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewColumnResolve("account_balance_usd"), + Expr.NewColumnResolve("username"), + ) + proj, err := project.NewProjectExec(src, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + sk := aggr.NewSortKey(Expr.NewColumnResolve("account_balance_usd"), true) + sortExec, err := aggr.NewSortExec(proj, aggr.CombineSortKeys(sk)) + if err != nil { + t.Fatalf("sort init failed: %v", err) + } + batch, err := sortExec.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(3A) got nil batch (possibly EOF)") + return + } + t.Logf("(3A) batch:\n%v\n", batch.PrettyPrint()) + }) + + // (3.B) SELECT id, favorite_color FROM source1 ORDER BY favorite_color ASC; + t.Run("3B", func(t *testing.T) { + src := source1Project() + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewColumnResolve("favorite_color"), + ) + proj, err := project.NewProjectExec(src, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + sk := aggr.NewSortKey(Expr.NewColumnResolve("favorite_color"), true) + sortExec, err := aggr.NewSortExec(proj, aggr.CombineSortKeys(sk)) + if err != nil { + t.Fatalf("sort init failed: %v", err) + } + batch, err := sortExec.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(3B) got nil batch (possibly EOF)") + return + } + t.Logf("(3B) batch:\n%v\n", batch.PrettyPrint()) + }) +} + +// ------------------------------------------------------------------------- +// (4) Operators: Join(INNER), Select +// (4.A) SELECT s1.id, s1.username, s2.department_name FROM source1 AS s1 INNER JOIN source2 AS s2 ON s1.id = s2.id; +// (4.B) SELECT s1.id, s1.email_address, s2.department_name FROM source1 AS s1 INNER JOIN source2 AS s2 ON s1.id = s2.id; +func TestJoinSelect(t *testing.T) { + // (4.A) SELECT s1.id, s1.username, s2.department_name FROM source1 AS s1 INNER JOIN source2 AS s2 ON s1.favorite_color = s2.manager_name; + t.Run("4A", func(t *testing.T) { + src1 := source1Project() + src2 := source2Project() + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("id")}, + []Expr.Expression{Expr.NewColumnResolve("id")}, + ) + j, err := join.NewHashJoinExec(src1, src2, clause, join.InnerJoin, nil) + if err != nil { + t.Fatalf("join init failed: %v", err) + } + exprs := Expr.NewExpressions( + Expr.NewAlias(Expr.NewColumnResolve("left_id"), "id"), + Expr.NewColumnResolve("username"), + Expr.NewColumnResolve("department_name"), + ) + t.Logf("\t%v\n", j.Schema()) + proj, err := project.NewProjectExec(j, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + batch, err := proj.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(4A) got nil batch (possibly EOF)") + return + } + t.Logf("(4A) batch:\n%v\n", batch.PrettyPrint()) + }) + + // (4.B) SELECT s1.id, s1.email_address, s2.department_name FROM source1 AS s1 INNER JOIN source2 AS s2 ON s1.id = s2.id; + t.Run("4B", func(t *testing.T) { + src1 := source1Project() + src2 := source2Project() + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("id")}, + []Expr.Expression{Expr.NewColumnResolve("id")}, + ) + j, err := join.NewHashJoinExec(src1, src2, clause, join.InnerJoin, nil) + if err != nil { + t.Fatalf("join init failed: %v", err) + } + exprs := Expr.NewExpressions( + Expr.NewAlias(Expr.NewColumnResolve("left_id"), "cool_guy_id"), + Expr.NewColumnResolve("email_address"), + Expr.NewColumnResolve("department_name"), + ) + proj, err := project.NewProjectExec(j, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + batch, err := proj.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(4B) got nil batch (possibly EOF)") + return + } + t.Logf("(4B) batch:\n%v\n", batch.PrettyPrint()) + }) +} + +func TestGroupByAggregation(t *testing.T) { + // (5.A) SELECT favorite_color, AVG(age_years) AS avg_age, SUM(account_balance_usd) AS total_balance FROM source1 GROUP BY favorite_color order by avg_age; + t.Run("5A", func(t *testing.T) { + src := source1Project() + + groupBy := []Expr.Expression{Expr.NewColumnResolve("favorite_color")} + aggs := []aggr.AggregateFunctions{ + aggr.NewAggregateFunctions(aggr.Avg, Expr.NewColumnResolve("age_years")), + aggr.NewAggregateFunctions(aggr.Sum, Expr.NewColumnResolve("account_balance_usd")), + } + + gb, err := aggr.NewGroupByExec(src, aggs, groupBy) + if err != nil { + t.Fatalf("groupby init failed: %v", err) + } + sortExec, err := aggr.NewSortExec(gb, aggr.CombineSortKeys(aggr.NewSortKey(Expr.NewColumnResolve("avg_Column(age_years)"), true))) + if err != nil { + t.Fatalf("sort init failed: %v", err) + } + + batch, err := sortExec.Next(1000) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(5A) got nil batch (possibly EOF)") + return + } + t.Logf("(5A) batch:\n%v\n", batch.PrettyPrint()) + }) + + // (5.B) SELECT is_active, COUNT(*) AS active_count, AVG(age_years) AS avg_age FROM source1 GROUP BY is_active; + t.Run("5B", func(t *testing.T) { + src := source1Project() + groupBy := []Expr.Expression{Expr.NewColumnResolve("is_active")} + aggs := []aggr.AggregateFunctions{ + aggr.NewAggregateFunctions(aggr.Count, Expr.NewColumnResolve("id")), + aggr.NewAggregateFunctions(aggr.Avg, Expr.NewColumnResolve("age_years")), + } + + gb, err := aggr.NewGroupByExec(src, aggs, groupBy) + if err != nil { + t.Fatalf("groupby init failed: %v", err) + } + + batch, err := gb.Next(1000) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(5B) got nil batch (possibly EOF)") + return + } + t.Logf("(5B) batch:\n%v\n", batch.PrettyPrint()) + }) +} + +// TestDistinctSort runs DISTINCT + Sort pipelines for source1 +// (6.A)SELECT DISTINCT favorite_color +// FROM source1 +// ORDER BY favorite_color DESC; +// (6.B)SELECT DISTINCT is_active +// FROM source1 +// ORDER BY is_active DESC; +func TestDistinctSort(t *testing.T) { + // (6.A) SELECT DISTINCT favorite_color FROM source1 ORDER BY favorite_color DESC; + t.Run("6A", func(t *testing.T) { + src := source1Project() + + cols := []Expr.Expression{Expr.NewColumnResolve("favorite_color")} + distinct, err := filter.NewDistinctExec(src, cols) + if err != nil { + t.Fatalf("distinct init failed: %v", err) + } + + sk := aggr.NewSortKey(Expr.NewColumnResolve("favorite_color"), false) // DESC + sortExec, err := aggr.NewSortExec(distinct, aggr.CombineSortKeys(sk)) + if err != nil { + t.Fatalf("sort init failed: %v", err) + } + proj, err := project.NewProjectExec(sortExec, Expr.NewExpressions(Expr.NewColumnResolve("favorite_color"))) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + batch, err := proj.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(6A) got nil batch (possibly EOF)") + return + } + t.Logf("(6A) batch:\n%v\n", batch.PrettyPrint()) + }) + + // (6.B) SELECT DISTINCT is_active FROM source1 ORDER BY is_active DESC; + t.Run("6B", func(t *testing.T) { + src := source1Project() + + cols := []Expr.Expression{Expr.NewColumnResolve("is_active")} + distinct, err := filter.NewDistinctExec(src, cols) + if err != nil { + t.Fatalf("distinct init failed: %v", err) + } + + sk := aggr.NewSortKey(Expr.NewColumnResolve("is_active"), false) // DESC + sortExec, err := aggr.NewSortExec(distinct, aggr.CombineSortKeys(sk)) + if err != nil { + t.Fatalf("sort init failed: %v", err) + } + proj, err := project.NewProjectExec(sortExec, Expr.NewExpressions(Expr.NewColumnResolve("is_active"))) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + batch, err := proj.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(6B) got nil batch (possibly EOF)") + return + } + t.Logf("(6B) batch:\n%v\n", batch.PrettyPrint()) + }) +} + +// TestJoinFilterProjLimit runs join + filter + project + limit pipelines +// (7.A)SELECT s1.id, s1.username, s2.department_name FROM source1 AS s1 INNER JOIN source2 AS s2 ON s1.id = s2.id WHERE s1.age_years > 30 LIMIT 5; +// (7.B)SELECT s1.username, s2.manager_email FROM source1 AS s1 JOIN source2 AS s2 ON s1.id = s2.id WHERE s2.department_name = 'Engineering' LIMIT 3; +// (7.C)SELECT s1.id, s2.manager_name FROM source1 s1 JOIN source2 s2 ON s1.id = s2.id WHERE s1.account_balance_usd > 10000 LIMIT 2; +func TestJoinFilterProjLimit(t *testing.T) { + // (7.A)SELECT s1.id, s1.username, s2.department_name FROM source1 AS s1 INNER JOIN source2 AS s2 ON s1.id = s2.id WHERE s1.age_years > 30 LIMIT 5; + t.Run("7A", func(t *testing.T) { + src1 := source1Project() + src2 := source2Project() + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("id")}, + []Expr.Expression{Expr.NewColumnResolve("id")}, + ) + j, err := join.NewHashJoinExec(src1, src2, clause, join.InnerJoin, nil) + if err != nil { + t.Fatalf("join init failed: %v", err) + } + pred := Expr.NewBinaryExpr( + Expr.NewColumnResolve("age_years"), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int64, 30), + ) + + filt, err := filter.NewFilterExec(j, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + exprs := Expr.NewExpressions( + Expr.NewAlias(Expr.NewColumnResolve("left_id"), "id"), + Expr.NewColumnResolve("username"), + Expr.NewAlias(Expr.NewColumnResolve("department_name"), "deptartment"), + ) + proj, err := project.NewProjectExec(filt, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + lim, err := filter.NewLimitExec(proj, 5) + if err != nil { + t.Fatalf("limit init failed: %v", err) + } + + batch, err := lim.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(7A) got nil batch (possibly EOF)") + return + } + t.Logf("(7A) batch:\n%v\n", batch.PrettyPrint()) + }) + + // (7.B)SELECT s1.username, s2.manager_email FROM source1 AS s1 JOIN source2 AS s2 ON s1.id = s2.id WHERE s2.department_name = 'Engineering' LIMIT 3; + t.Run("7B", func(t *testing.T) { + src1 := source1Project() + src2 := source2Project() + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("id")}, + []Expr.Expression{Expr.NewColumnResolve("id")}, + ) + j, err := join.NewHashJoinExec(src1, src2, clause, join.InnerJoin, nil) + if err != nil { + t.Fatalf("join init failed: %v", err) + } + + pred := Expr.NewBinaryExpr( + Expr.NewColumnResolve("department_name"), + Expr.Equal, + Expr.NewLiteralResolve(arrow.BinaryTypes.String, "Engineering"), + ) + + filt, err := filter.NewFilterExec(j, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("username"), + Expr.NewColumnResolve("manager_email"), + ) + proj, err := project.NewProjectExec(filt, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + lim, err := filter.NewLimitExec(proj, 3) + if err != nil { + t.Fatalf("limit init failed: %v", err) + } + + batch, err := lim.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(7B) got nil batch (possibly EOF)") + return + } + t.Logf("(7B) batch:\n%v\n", batch.PrettyPrint()) + }) + + // (7.C)SELECT s1.id, s2.manager_name FROM source1 s1 JOIN source2 s2 ON s1.id = s2.id WHERE s1.account_balance_usd > 10000 LIMIT 2; + t.Run("7C", func(t *testing.T) { + src1 := source1Project() + src2 := source2Project() + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("id")}, + []Expr.Expression{Expr.NewColumnResolve("id")}, + ) + j, err := join.NewHashJoinExec(src1, src2, clause, join.InnerJoin, nil) + if err != nil { + t.Fatalf("join init failed: %v", err) + } + + pred := Expr.NewBinaryExpr( + Expr.NewColumnResolve("account_balance_usd"), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 10000.0), + ) + + filt, err := filter.NewFilterExec(j, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("left_id"), + Expr.NewColumnResolve("manager_name"), + ) + proj, err := project.NewProjectExec(filt, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + lim, err := filter.NewLimitExec(proj, 2) + if err != nil { + t.Fatalf("limit init failed: %v", err) + } + + batch, err := lim.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(7C) got nil batch (possibly EOF)") + return + } + t.Logf("(7C) batch:\n%v\n", batch.PrettyPrint()) + }) +} + +/* +(8) +Operators: ScalarFunction(ABS, ROUND), Filter, Projection + +SQL: +(8.A)SELECT id, ROUND(ABS(average_session_minutes)) AS rounded_session +FROM source1 +WHERE ABS(average_session_minutes) > 5; +(8.B)SELECT username, ROUND(account_balance_usd) AS rounded_balance +FROM source1 +WHERE ABS(account_balance_usd) > 5000; +*/ + +// TestScalarAbsRound runs scalar ABS/ROUND with Filter + Projection +// (8.A)SELECT id, ROUND(ABS(average_session_minutes)) AS rounded_session FROM source1 WHERE ABS(average_session_minutes) > 5; +// (8.B)SELECT username, ROUND(account_balance_usd) AS rounded_balance FROM source1 WHERE ABS(account_balance_usd) > 5000; +func TestScalarAbsRound(t *testing.T) { + // (8.A)SELECT id, ROUND(ABS(average_session_minutes)) AS rounded_session FROM source1 WHERE ABS(average_session_minutes) > 5; + t.Run("8A", func(t *testing.T) { + src := source1Project() + + // predicate: ABS(average_session_minutes) > 5 + pred := Expr.NewBinaryExpr( + Expr.NewScalarFunction(Expr.Abs, Expr.NewColumnResolve("average_session_minutes")), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 5.0), + ) + + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + // projection: id, ROUND(ABS(average_session_minutes)) as rounded_session + roundExpr := Expr.NewScalarFunction(Expr.Round, Expr.NewScalarFunction(Expr.Abs, Expr.NewColumnResolve("average_session_minutes"))) + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewAlias(roundExpr, "rounded_session"), + ) + proj, err := project.NewProjectExec(filt, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + batch, err := proj.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(8A) got nil batch (possibly EOF)") + return + } + t.Logf("(8A) batch:\n%v\n", batch.PrettyPrint()) + }) + + // (8.B)SELECT username, ROUND(account_balance_usd) AS rounded_balance FROM source1 WHERE ABS(account_balance_usd) > 5000; + t.Run("8B", func(t *testing.T) { + src := source1Project() + + // predicate: ABS(account_balance_usd) > 5000 + pred := Expr.NewBinaryExpr( + Expr.NewScalarFunction(Expr.Abs, Expr.NewColumnResolve("account_balance_usd")), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 5000.0), + ) + + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + roundExpr := Expr.NewScalarFunction(Expr.Round, Expr.NewColumnResolve("account_balance_usd")) + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("username"), + Expr.NewAlias(roundExpr, "rounded_balance"), + ) + proj, err := project.NewProjectExec(filt, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + batch, err := proj.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(8B) got nil batch (possibly EOF)") + return + } + t.Logf("(8B) batch:\n%v\n", batch.PrettyPrint()) + }) +} + +// TestSelectMultiSort runs multi-column ORDER BY tests +// (9.A)SELECT id, username, age_years FROM source1 ORDER BY age_years DESC, username ASC; +// (9.B)SELECT id, email_address, age_years FROM source1 ORDER BY age_years ASC, email_address DESC; +func TestSelectMultiSort(t *testing.T) { + // (9.A) + t.Run("9A", func(t *testing.T) { + src := source1Project() + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewColumnResolve("username"), + Expr.NewColumnResolve("age_years"), + ) + proj, err := project.NewProjectExec(src, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + sk1 := aggr.NewSortKey(Expr.NewColumnResolve("age_years"), false) // DESC + sk2 := aggr.NewSortKey(Expr.NewColumnResolve("username"), true) // ASC + sortExec, err := aggr.NewSortExec(proj, aggr.CombineSortKeys(sk1, sk2)) + if err != nil { + t.Fatalf("sort init failed: %v", err) + } + + batch, err := sortExec.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(9A) got nil batch (possibly EOF)") + return + } + t.Logf("(9A) batch:\n%v\n", batch.PrettyPrint()) + }) + + // (9.B) + t.Run("9B", func(t *testing.T) { + src := source1Project() + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewColumnResolve("email_address"), + Expr.NewColumnResolve("age_years"), + ) + proj, err := project.NewProjectExec(src, exprs) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + sk1 := aggr.NewSortKey(Expr.NewColumnResolve("age_years"), true) // ASC + sk2 := aggr.NewSortKey(Expr.NewColumnResolve("email_address"), false) // DESC + sortExec, err := aggr.NewSortExec(proj, aggr.CombineSortKeys(sk1, sk2)) + if err != nil { + t.Fatalf("sort init failed: %v", err) + } + + batch, err := sortExec.Next(100) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Logf("(9B) got nil batch (possibly EOF)") + return + } + t.Logf("(9B) batch:\n%v\n", batch.PrettyPrint()) + }) +} diff --git a/src/Backend/opti-sql-go/operators/test/t1_test.go b/src/Backend/opti-sql-go/operators/test/t1_test.go new file mode 100644 index 0000000..dd728fb --- /dev/null +++ b/src/Backend/opti-sql-go/operators/test/t1_test.go @@ -0,0 +1,1558 @@ +package test + +import ( + "errors" + "io" + "math" + "opti-sql-go/Expr" + "opti-sql-go/operators" + join "opti-sql-go/operators/Join" + "opti-sql-go/operators/aggr" + "opti-sql-go/operators/filter" + "opti-sql-go/operators/project" + "strings" + "testing" + + "github.com/apache/arrow/go/v15/arrow/memory" + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" +) + +/* +indivdial unit test for each operator +serves as documentation as to how to use each operator +*/ + +// test for all operators together +// using in memory format at first +func generateIntegrationDataset1(mem memory.Allocator) ([]string, []arrow.Array) { + names := []string{ + "id", "first_name", "last_name", "age", "salary", "department", "region", + } + + // id + idB := array.NewInt32Builder(mem) + idB.AppendValues( + []int32{ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + }, + []bool{ + true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, + }, + ) + idArr := idB.NewArray() + + // first_name + fnB := array.NewStringBuilder(mem) + fnB.AppendValues([]string{ + "Alice", "Bob", "Charlie", "Diana", "Eve", + "Frank", "Grace", "Hank", "Ivy", "Jake", + "Karen", "Leo", "Mona", "Nate", "Olivia", + "Paul", "Quinn", "Ruth", "Steve", "Tina", + }, []bool{ + true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, + }) + fnArr := fnB.NewArray() + + // last_name + lnB := array.NewStringBuilder(mem) + lnB.AppendValues([]string{ + "Smith", "Jones", "Stone", "Lopez", "King", + "Hall", "Young", "Wright", "Hill", "Green", + "Adams", "Clark", "Allen", "Baker", "Cox", + "Diaz", "Evans", "Ford", "Gray", "Hart", + }, []bool{ + true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, + }) + lnArr := lnB.NewArray() + + // age + ageB := array.NewInt32Builder(mem) + ageB.AppendValues([]int32{ + 29, 34, 41, 26, 33, + 45, 38, 28, 52, 31, + 27, 49, 36, 42, 30, + 40, 50, 39, 55, 25, + }, []bool{ + true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, + }) + ageArr := ageB.NewArray() + + // salary + salB := array.NewFloat64Builder(mem) + salB.AppendValues([]float64{ + 70000, 80000, 65000, 72000, 59000, + 82000, 91000, 54000, 68000, 60000, + 75000, 88000, 56000, 69000, 62000, + 93000, 97000, 58000, 89000, 61000, + }, []bool{ + true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, + }) + salArr := salB.NewArray() + + // department (some NULLs) + deptB := array.NewStringBuilder(mem) + deptB.AppendValues([]string{ + "HR", "Engineering", "Sales", "Finance", "HR", + "Engineering", "Sales", "Finance", "HR", "Engineering", + "Sales", "Finance", "HR", "Engineering", "Sales", + "Finance", "HR", "Engineering", "Sales", "Finance", + }, []bool{ + true, true, true, false, true, + true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, true, + }) + deptArr := deptB.NewArray() + + // region (with NULLs) + regB := array.NewStringBuilder(mem) + regB.AppendValues([]string{ + "US", "EU", "US", "APAC", "LATAM", + "US", "EU", "APAC", "LATAM", "US", + "EU", "US", "LATAM", "EU", "APAC", + "US", "EU", "LATAM", "US", "EU", + }, []bool{ + true, true, true, true, true, + true, true, false, true, true, + true, true, true, true, true, + true, true, true, true, false, + }) + regArr := regB.NewArray() + + return names, []arrow.Array{idArr, fnArr, lnArr, ageArr, salArr, deptArr, regArr} +} + +func generateIntegrationDataset2(mem memory.Allocator) ([]string, []arrow.Array) { + names := []string{"dept_id", "department", "region", "budget", "manager"} + + // dept_id + idB := array.NewInt32Builder(mem) + idB.AppendValues([]int32{ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + }, []bool{ + true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, + }) + idArr := idB.NewArray() + + // department + deptB := array.NewStringBuilder(mem) + deptB.AppendValues([]string{ + "HR", "Engineering", "Sales", "Finance", "Marketing", + "Support", "Research", "Security", "Legal", "Operations", + "HR", "Engineering", "Sales", "Finance", "Marketing", + "Support", "Research", "Security", "Legal", "Operations", + }, []bool{ + true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, + }) + deptArr := deptB.NewArray() + + // region (with NULLs) + regB := array.NewStringBuilder(mem) + regB.AppendValues([]string{ + "US", "EU", "LATAM", "APAC", "US", + "EU", "LATAM", "APAC", "US", "EU", + "LATAM", "US", "EU", "APAC", "US", + "LATAM", "US", "EU", "APAC", "US", + }, []bool{ + true, true, true, true, true, + true, true, true, true, true, + true, true, true, true, false, + true, true, true, true, true, + }) + regArr := regB.NewArray() + + // budget + budB := array.NewFloat64Builder(mem) + budB.AppendValues([]float64{ + 1e6, 2e6, 3e6, 1.5e6, 1.2e6, + 900000, 850000, 780000, 950000, 1100000, + 1e6, 2e6, 3e6, 1.5e6, 1.2e6, + 900000, 850000, 780000, 950000, 1100000, + }, []bool{ + true, true, true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, true, true, + }) + budArr := budB.NewArray() + + // manager (with NULLs) + manB := array.NewStringBuilder(mem) + manB.AppendValues([]string{ + "Anna", "Ben", "Chris", "Dana", "Eli", + "Faye", "George", "Holly", "Ian", "Jane", + "Karl", "Lilly", "Mason", "Nora", "Owen", + "Pam", "Quinn", "Rose", "Sam", "Tara", + }, []bool{ + true, true, true, true, true, + true, true, true, false, true, + true, true, true, true, true, + true, true, true, true, true, + }) + manArr := manB.NewArray() + + return names, []arrow.Array{idArr, deptArr, regArr, budArr, manArr} +} +func NewIntegrationSource1(mem memory.Allocator) (*project.InMemorySource, error) { + names, cols := generateIntegrationDataset1(mem) + return project.NewInMemoryProjectExecFromArrays(names, cols) +} + +func NewIntegrationSource2(mem memory.Allocator) (*project.InMemorySource, error) { + names, cols := generateIntegrationDataset2(mem) + return project.NewInMemoryProjectExecFromArrays(names, cols) +} + +/* +============================================================================ +Project tests +============================================================================ +*/ +func TestProjectExec(t *testing.T) { + t.Run("integration_project_exec", func(t *testing.T) { + mem := memory.NewGoAllocator() + + src, err := NewIntegrationSource1(mem) + if err != nil { + t.Fatalf("failed to create integration source: %v", err) + } + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewAlias(Expr.NewColumnResolve("age"), "age"), + Expr.NewColumnResolve("salary"), + Expr.NewColumnResolve("department"), + ) + basicProj, err := project.NewProjectExec(src, exprs) + if err != nil { + t.Fatalf("unexpected error\t%v\n", basicProj) + } + //t.Logf("%v\n", basicProj.Schema()) + rc, err := basicProj.Next(100) + if err != nil { + if !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error %v\n", err) + } + } + if rc.RowCount != 20 { + t.Fatalf("expected 20 rows, got %d", rc.RowCount) + } + }) + t.Run("projection_with_alias", func(t *testing.T) { + mem := memory.NewGoAllocator() + src, _ := NewIntegrationSource1(mem) + + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewAlias(Expr.NewColumnResolve("salary"), "emp_salary"), + ) + + proj, err := project.NewProjectExec(src, exprs) + if err != nil { + t.Fatalf("error: %v", err) + } + + batch, _ := proj.Next(50) + + // verify alias appears in schema + if batch.Schema.Fields()[1].Name != "emp_salary" { + t.Fatalf("expected alias emp_salary, got %s", batch.Schema.Fields()[1].Name) + } + }) + t.Run("projection_expression_math", func(t *testing.T) { + mem := memory.NewGoAllocator() + src, _ := NewIntegrationSource1(mem) + + exprs := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + Expr.NewAlias( + Expr.NewBinaryExpr( + Expr.NewColumnResolve("salary"), + Expr.Multiplication, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 1.10), + ), + "adjusted_salary", + ), + ) + + proj, err := project.NewProjectExec(src, exprs) + if err != nil { + t.Fatalf("error: %v", err) + } + + batch, _ := proj.Next(50) + + adjCol := batch.Columns[1].(*array.Float64) + _, origin := generateIntegrationDataset1(mem) + sal := origin[4].(*array.Float64) + // check: for a non-null salary (row 0 = 50000) + if adjCol.Len() != sal.Len() { + t.Fatalf("expected adjusted salary length %d, got %d", sal.Len(), adjCol.Len()) + } + for i := 0; i < adjCol.Len(); i++ { + if !sal.IsNull(i) { + expected := sal.Value(i) * 1.10 + if adjCol.Value(i) != expected { + t.Fatalf("row %d: expected adjusted salary %f, got %f", i, expected, adjCol.Value(i)) + } + } + } + }) + t.Run("projection_upper_first_name", func(t *testing.T) { + mem := memory.NewGoAllocator() + + src, err := NewIntegrationSource1(mem) + if err != nil { + t.Fatalf("failed to create integration source: %v", err) + } + + exprs := Expr.NewExpressions( + Expr.NewAlias( + Expr.NewScalarFunction(Expr.Upper, Expr.NewColumnResolve("first_name")), + "first_name_upper", + ), + ) + + proj, err := project.NewProjectExec(src, exprs) + if err != nil { + t.Fatalf("unexpected project exec error: %v", err) + } + + batch, err := proj.Next(100) // pull all rows at once + if err != nil { + t.Fatalf("unexpected error on Next: %v", err) + } + if batch == nil { + t.Fatalf("expected a batch but got nil") + } + + // ---- get projected column (index 0) ---- + upperCol := batch.Columns[0].(*array.String) + + // ---- get original dataset to compare ---- + _, originCols := generateIntegrationDataset1(mem) + firstNameCol := originCols[1].(*array.String) // index 1 is first_name + + if upperCol.Len() != firstNameCol.Len() { + t.Fatalf("length mismatch: expected %d got %d", + firstNameCol.Len(), upperCol.Len()) + } + + // ---- validate uppercase projection ---- + for i := 0; i < upperCol.Len(); i++ { + if firstNameCol.IsNull(i) { + if !upperCol.IsNull(i) { + t.Fatalf("row %d: expected NULL but got value", i) + } + continue + } + + expected := strings.ToUpper(firstNameCol.Value(i)) + got := upperCol.Value(i) + + if expected != got { + t.Fatalf("row %d: expected %q, got %q", i, expected, got) + } + } + }) + +} + +/* +============================================================================ +Filter tests +============================================================================ +*/ +func TestFilterExec(t *testing.T) { + mem := memory.NewGoAllocator() + + // ---------------------------------------------------------------------- + t.Run("filter_age_gt_30", func(t *testing.T) { + names, cols := generateIntegrationDataset1(mem) + src, err := project.NewInMemoryProjectExecFromArrays(names, cols) + if err != nil { + t.Fatalf("failed to create in-memory source: %v", err) + } + pred := Expr.NewBinaryExpr( + Expr.NewColumnResolve("age"), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Int32, 30), + ) + + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + batch, err := filt.Next(1000) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + + if batch == nil { + t.Fatalf("expected rows, got nil batch") + } + ageCol, _ := batch.ColumnByName("age") + for i := 0; i < ageCol.Len(); i++ { + ageValue := ageCol.(*array.Int32).Value(i) + if ageValue <= 30 { + t.Fatalf("expected age > 30, got %d", ageValue) + } + } + + }) + + // ---------------------------------------------------------------------- + t.Run("filter_engineering_and_salary_gt_70000", func(t *testing.T) { + names, cols := generateIntegrationDataset1(mem) + src, err := project.NewInMemoryProjectExecFromArrays(names, cols) + if err != nil { + t.Fatalf("failed to create in-memory source: %v", err) + } + pred := Expr.NewBinaryExpr( + Expr.NewBinaryExpr( + Expr.NewColumnResolve("department"), + Expr.Equal, + Expr.NewLiteralResolve(arrow.BinaryTypes.String, "Engineering"), + ), + Expr.And, + Expr.NewBinaryExpr( + Expr.NewColumnResolve("salary"), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 70000.0), + ), + ) + // department = 'Engineering' AND salary > 70000 + + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + batch, err := filt.Next(1000) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + if batch == nil { + t.Fatalf("expected non-nil batch") + } + + // validate + deptCol, _ := batch.ColumnByName("department") + salCol, _ := batch.ColumnByName("salary") + depColumn, _ := deptCol.(*array.String) + salColumn, _ := salCol.(*array.Float64) + for i := 0; i < int(batch.RowCount); i++ { + if depColumn.Value(i) != "Engineering" { + t.Fatalf("expected department 'Engineering', got %s", depColumn.Value(i)) + } + if salColumn.Value(i) <= 70000 { + t.Fatalf("expected salary > 70000, got %f", salColumn.Value(i)) + } + } + }) + + // ---------------------------------------------------------------------- + t.Run("filter_region_is_null", func(t *testing.T) { + names, cols := generateIntegrationDataset1(mem) + src, err := project.NewInMemoryProjectExecFromArrays(names, cols) + if err != nil { + t.Fatalf("failed to create in-memory source: %v", err) + } + // We're filtering region IS NULL + pred := Expr.NewNullCheckExpr(Expr.NewColumnResolve("region")) + + filt, err := filter.NewFilterExec(src, pred) + if err != nil { + t.Fatalf("filter init failed: %v", err) + } + + batch, err := filt.Next(1000) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + + if batch == nil { + // possible: no NULLS + t.Fatalf("expected atleast one null") + return + } + t.Logf("batch: \t%v\n", batch.PrettyPrint()) + // validate + regionCol, _ := batch.ColumnByName("region") + regionArr := regionCol.(*array.String) + for i := 0; i < int(batch.RowCount); i++ { + if regionArr.IsNull(i) { + t.Fatalf("expected NULL region but got value=%s", regionArr.Value(i)) + } + } + }) + +} + +/* +============================================================================ +Sort tests +============================================================================ +*/ +func TestSortTest(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("sort_salary_ascending", func(t *testing.T) { + src, _ := NewIntegrationSource1(mem) + + sortKeys := []aggr.SortKey{ + {Expr: Expr.NewColumnResolve("salary"), Ascending: true}, + } + + sortExec, err := aggr.NewSortExec(src, sortKeys) + if err != nil { + t.Fatalf("failed to create sort exec: %v", err) + } + + batch, err := sortExec.Next(1000) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + + salaryArr := batch.Columns[4].(*array.Float64) + + for i := 1; i < salaryArr.Len(); i++ { + if salaryArr.IsNull(i-1) || salaryArr.IsNull(i) { + continue + } + if salaryArr.Value(i) < salaryArr.Value(i-1) { + t.Fatalf("salary not sorted ASC at row %d: %f < %f", + i, salaryArr.Value(i), salaryArr.Value(i-1)) + } + } + + }) + + // ───────────────────────────────────────────────────────────── + + t.Run("sort_lastname_descending", func(t *testing.T) { + src, _ := NewIntegrationSource1(mem) + + sortKeys := []aggr.SortKey{ + {Expr: Expr.NewColumnResolve("last_name"), Ascending: false}, + } + + sortExec, err := aggr.NewSortExec(src, sortKeys) + if err != nil { + t.Fatalf("failed to create sort exec: %v", err) + } + + batch, err := sortExec.Next(1000) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + + lastArr := batch.Columns[2].(*array.String) + + for i := 1; i < lastArr.Len(); i++ { + if lastArr.IsNull(i-1) || lastArr.IsNull(i) { + continue + } + + // descending → current <= previous + if lastArr.Value(i) > lastArr.Value(i-1) { + t.Fatalf("last_name not sorted DESC at %d: %s > %s", + i, lastArr.Value(i), lastArr.Value(i-1)) + } + } + }) + + // ───────────────────────────────────────────────────────────── + + t.Run("sort_department_then_salary_desc", func(t *testing.T) { + src, _ := NewIntegrationSource1(mem) + + sortKeys := []aggr.SortKey{ + {Expr: Expr.NewColumnResolve("department"), Ascending: true}, // asc + {Expr: Expr.NewColumnResolve("salary"), Ascending: false}, // desc + } + + sortExec, err := aggr.NewSortExec(src, sortKeys) + if err != nil { + t.Fatalf("failed to create sort exec: %v", err) + } + + batch, err := sortExec.Next(1000) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("unexpected error: %v", err) + } + + deptArr := batch.Columns[5].(*array.String) + salaryArr := batch.Columns[4].(*array.Float64) + + for i := 1; i < deptArr.Len(); i++ { + if deptArr.IsNull(i) || deptArr.IsNull(i-1) { + continue + } + + prevDept := deptArr.Value(i - 1) + currDept := deptArr.Value(i) + + // department ascending grouping + if currDept < prevDept { + t.Fatalf("department not sorted ASC at %d: %s < %s", + i, currDept, prevDept) + } + + // if same department → salary must be descending + if currDept == prevDept { + if !salaryArr.IsNull(i) && !salaryArr.IsNull(i-1) { + if salaryArr.Value(i) > salaryArr.Value(i-1) { + t.Fatalf("salary not DESC within department '%s' at row %d", + currDept, i) + } + } + } + } + }) +} + +/* +============================================================================ +Aggregations tests +============================================================================ +*/ +func TestIntegrationAggregations(t *testing.T) { + t.Run("sum_avg_min_max_salary", func(t *testing.T) { + mem := memory.NewGoAllocator() + + // Load integration dataset + _, cols := generateIntegrationDataset1(mem) + salaryArr := cols[4].(*array.Float64) + + // Expected values + var sum float64 + min := math.MaxFloat64 + max := -math.MaxFloat64 + count := 0 + + for i := 0; i < salaryArr.Len(); i++ { + if salaryArr.IsNull(i) { + continue + } + v := salaryArr.Value(i) + sum += v + count++ + if v < min { + min = v + } + if v > max { + max = v + } + } + avg := sum / float64(count) + + // Build aggregation operator + src, _ := NewIntegrationSource1(mem) + + salCol := Expr.NewColumnResolve("salary") + + agg, err := aggr.NewGlobalAggrExec(src, + []aggr.AggregateFunctions{aggr.NewAggregateFunctions(aggr.Sum, salCol), + aggr.NewAggregateFunctions(aggr.Avg, salCol), + aggr.NewAggregateFunctions(aggr.Min, salCol), + aggr.NewAggregateFunctions(aggr.Max, salCol)}) + if err != nil { + t.Fatalf("aggregation init failed: %v", err) + } + + batch, err := agg.Next(100) + if err != nil { + t.Fatalf("aggregation next failed: %v", err) + } + + // Extract columns from result + sumArr := batch.Columns[0].(*array.Float64) + avgArr := batch.Columns[1].(*array.Float64) + minArr := batch.Columns[2].(*array.Float64) + maxArr := batch.Columns[3].(*array.Float64) + + if sumArr.Value(0) != sum { + t.Fatalf("SUM mismatch: expected %f, got %f", sum, sumArr.Value(0)) + } + if avgArr.Value(0) != avg { + t.Fatalf("AVG mismatch: expected %f, got %f", avg, avgArr.Value(0)) + } + if minArr.Value(0) != min { + t.Fatalf("MIN mismatch: expected %f, got %f", min, minArr.Value(0)) + } + if maxArr.Value(0) != max { + t.Fatalf("MAX mismatch: expected %f, got %f", max, maxArr.Value(0)) + } + }) + + // ───────────────────────────────────────────────────────────── + + t.Run("sum_age", func(t *testing.T) { + mem := memory.NewGoAllocator() + _, cols := generateIntegrationDataset1(mem) + ageArr := cols[3].(*array.Int32) + + // Expected SUM(age) + var sum int32 + for i := 0; i < ageArr.Len(); i++ { + if !ageArr.IsNull(i) { + sum += ageArr.Value(i) + } + } + + src, _ := NewIntegrationSource1(mem) + + agg, err := aggr.NewGlobalAggrExec( + src, + []aggr.AggregateFunctions{ + aggr.NewAggregateFunctions( + aggr.Sum, Expr.NewColumnResolve("age")), + }, + ) + if err != nil { + t.Fatalf("agg init failed: %v", err) + } + + batch, _ := agg.Next(100) + sumArr := batch.Columns[0].(*array.Float64) // SUM(int32) -> int64 + + if sumArr.Value(0) != float64(sum) { + t.Fatalf("SUM(age) mismatch: expected %v, got %v", sum, sumArr.Value(0)) + } + }) + + // ───────────────────────────────────────────────────────────── + + t.Run("min_max_age", func(t *testing.T) { + mem := memory.NewGoAllocator() + _, cols := generateIntegrationDataset1(mem) + ageArr := cols[3].(*array.Int32) + + min := int32(math.MaxInt32) + max := int32(math.MinInt32) + + for i := 0; i < ageArr.Len(); i++ { + if ageArr.IsNull(i) { + continue + } + v := ageArr.Value(i) + if v < min { + min = v + } + if v > max { + max = v + } + } + + src, _ := NewIntegrationSource1(mem) + + agg, err := aggr.NewGlobalAggrExec(src, + []aggr.AggregateFunctions{ + aggr.NewAggregateFunctions(aggr.Min, Expr.NewColumnResolve("age")), + aggr.NewAggregateFunctions(aggr.Max, Expr.NewColumnResolve("age")), + }) + if err != nil { + t.Fatalf("agg init failed: %v", err) + } + + batch, _ := agg.Next(100) + + minArr := batch.Columns[0].(*array.Float64) + maxArr := batch.Columns[1].(*array.Float64) + + if minArr.Value(0) != float64(min) { + t.Fatalf("MIN(age) mismatch: expected %v, got %v", min, minArr.Value(0)) + } + if maxArr.Value(0) != float64(max) { + t.Fatalf("MAX(age) mismatch: expected %v, got %v", max, maxArr.Value(0)) + } + }) +} + +/* +============================================================================ +Group-by tests +============================================================================ +*/ + +func TestGroupByExec(t *testing.T) { + mem := memory.NewGoAllocator() + + // Utility helper to get origin dataset quickly + _, originCols := generateIntegrationDataset1(mem) + + // ------------------------------------------------------------ + t.Run("group_by_department_count", func(t *testing.T) { + src, _ := NewIntegrationSource1(mem) + + dept := Expr.NewColumnResolve("department") + + groupByExpr := []Expr.Expression{dept} + aggs := []aggr.AggregateFunctions{ + {AggrFunc: aggr.Count, Child: Expr.NewColumnResolve("id")}, + } + + gb, err := aggr.NewGroupByExec(src, aggs, groupByExpr) + if err != nil { + t.Fatalf("gb init failed: %v", err) + } + + batch, err := gb.Next(1024) + if err != nil { + t.Fatalf("group by Next failed: %v", err) + } + + deptCol := batch.Columns[0].(*array.String) + countCol := batch.Columns[1].(*array.Float64) // count returns float64 in your impl + + // Validate counts by manually counting departments + origDept := originCols[5].(*array.String) + expected := make(map[string]int) + + for i := 0; i < origDept.Len(); i++ { + if origDept.IsNull(i) { + expected["NULL"]++ + } else { + expected[origDept.Value(i)]++ + } + } + + for i := 0; i < int(batch.RowCount); i++ { + key := "NULL" + if !deptCol.IsNull(i) { + key = deptCol.Value(i) + } + got := int(countCol.Value(i)) + want := expected[key] + + if got != want { + t.Fatalf("group %s: expected %d, got %d", key, want, got) + } + } + }) + + // ------------------------------------------------------------ + t.Run("group_by_department_region_sum_salary", func(t *testing.T) { + src, _ := NewIntegrationSource1(mem) + + dept := Expr.NewColumnResolve("department") + region := Expr.NewColumnResolve("region") + + groupByExpr := []Expr.Expression{dept, region} + aggs := []aggr.AggregateFunctions{ + {AggrFunc: aggr.Sum, Child: Expr.NewColumnResolve("salary")}, + } + + gb, err := aggr.NewGroupByExec(src, aggs, groupByExpr) + if err != nil { + t.Fatalf("init failed: %v", err) + } + + batch, err := gb.Next(1024) + if err != nil { + t.Fatalf("Next failed: %v", err) + } + + deptCol := batch.Columns[0].(*array.String) + regionCol := batch.Columns[1].(*array.String) + sumCol := batch.Columns[2].(*array.Float64) + + origDept := originCols[5].(*array.String) + origRegion := originCols[6].(*array.String) + origSalary := originCols[4].(*array.Float64) + + expected := make(map[string]float64) + + for i := 0; i < origSalary.Len(); i++ { + d := "NULL" + if !origDept.IsNull(i) { + d = origDept.Value(i) + } + + r := "NULL" + if !origRegion.IsNull(i) { + r = origRegion.Value(i) + } + + key := d + "|" + r + expected[key] += origSalary.Value(i) + } + + for i := 0; i < int(batch.RowCount); i++ { + d := "NULL" + if !deptCol.IsNull(i) { + d = deptCol.Value(i) + } + + r := "NULL" + if !regionCol.IsNull(i) { + r = regionCol.Value(i) + } + + key := d + "|" + r + got := sumCol.Value(i) + want := expected[key] + + if got != want { + t.Fatalf("(%s,%s): expected sum=%f, got %f", d, r, want, got) + } + } + }) + + // ------------------------------------------------------------ + t.Run("group_by_with_null_keys", func(t *testing.T) { + mem := memory.NewGoAllocator() + src, _ := NewIntegrationSource1(mem) + + region := Expr.NewColumnResolve("region") + + groupByExpr := []Expr.Expression{region} + aggs := []aggr.AggregateFunctions{ + {AggrFunc: aggr.Count, Child: Expr.NewColumnResolve("id")}, + } + + gb, _ := aggr.NewGroupByExec(src, aggs, groupByExpr) + + batch, err := gb.Next(1024) + if err != nil { + t.Fatalf("Next failed: %v", err) + } + + regionCol := batch.Columns[0].(*array.String) + countCol := batch.Columns[1].(*array.Float64) + + origRegion := originCols[6].(*array.String) + expected := make(map[string]int) + + for i := 0; i < origRegion.Len(); i++ { + key := "NULL" + if !origRegion.IsNull(i) { + key = origRegion.Value(i) + } + expected[key]++ + } + + for i := 0; i < int(batch.RowCount); i++ { + k := "NULL" + if !regionCol.IsNull(i) { + k = regionCol.Value(i) + } + + got := int(countCol.Value(i)) + want := expected[k] + + if got != want { + t.Fatalf("region=%s expected %d got %d", k, want, got) + } + } + }) +} + +/* +============================================================================ +Having tests +============================================================================ +*/ +func TestHavingExec(t *testing.T) { + mem := memory.NewGoAllocator() + + // helper — build group by department avg salary + buildDeptAvg := func() operators.Operator { + src, _ := NewIntegrationSource1(mem) + + aggs := []aggr.AggregateFunctions{ + {AggrFunc: aggr.Avg, Child: Expr.NewColumnResolve("salary")}, + } + + gb, _ := aggr.NewGroupByExec(src, aggs, + []Expr.Expression{Expr.NewColumnResolve("department")}, + ) + return gb + } + + // ------------------------------------------------------------ + t.Run("having_avg_salary_gt_75000", func(t *testing.T) { + gb := buildDeptAvg() + + having := Expr.NewBinaryExpr( + Expr.NewColumnResolve("avg_Column(salary)"), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 75000.0), + ) + + hv, _ := aggr.NewHavingExec(gb, having) + batch, err := hv.Next(500) + if err != nil { + t.Fatalf("having next failed: %v", err) + } + t.Logf("batch:\t%v\n", batch.PrettyPrint()) + + deptCol := batch.Columns[0].(*array.String) + avgCol := batch.Columns[1].(*array.Float64) + + for i := 0; i < int(batch.RowCount); i++ { + if avgCol.Value(i) <= 75000 { + t.Fatalf("expected avg > 75k, got %f for dept %s", + avgCol.Value(i), deptCol.Value(i)) + } + } + }) + + // ------------------------------------------------------------ + t.Run("having_no_group_passes", func(t *testing.T) { + gb := buildDeptAvg() + + having := Expr.NewBinaryExpr( + Expr.NewColumnResolve("avg_Column(salary)"), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, 999999.0), + ) + + hv, _ := aggr.NewHavingExec(gb, having) + batch, _ := hv.Next(100) + + if batch.RowCount != 0 { + t.Fatalf("expected empty result") + } + }) + + // ------------------------------------------------------------ + t.Run("having_everything_passes", func(t *testing.T) { + gb := buildDeptAvg() + + having := Expr.NewBinaryExpr( + Expr.NewColumnResolve("avg_Column(salary)"), + Expr.GreaterThan, + Expr.NewLiteralResolve(arrow.PrimitiveTypes.Float64, float64(0.0)), + ) + + hv, _ := aggr.NewHavingExec(gb, having) + batch, _ := hv.Next(1000) + + if batch.RowCount == 0 { + t.Fatalf("expected some rows") + } + }) +} + +/* +============================================================================ +Distinct tests +============================================================================ +*/ +func TestDistinctExec(t *testing.T) { + mem := memory.NewGoAllocator() + + // Utility: load dataset + names, cols := generateIntegrationDataset1(mem) + src, err := project.NewInMemoryProjectExecFromArrays(names, cols) + if err != nil { + t.Fatalf("failed to create source: %v", err) + } + + // ------------------------------- + // 1) DISTINCT on department + // ------------------------------- + t.Run("distinct_department", func(t *testing.T) { + expr := Expr.NewExpressions( + Expr.NewColumnResolve("department"), + ) + + de, err := filter.NewDistinctExec(src, expr) + if err != nil { + t.Fatalf("distinct init failed: %v", err) + } + + batch, err := de.Next(100) + if err != nil { + t.Fatalf("distinct next failed: %v", err) + } + + //deptArr := batch.Columns[5].(*array.String) + + // get expected unique departments from original dataset + origDept := cols[5].(*array.String) + expected := make(map[string]struct{}) + for i := 0; i < origDept.Len(); i++ { + if origDept.IsNull(i) { + expected["NULL"] = struct{}{} + } else { + expected[origDept.Value(i)] = struct{}{} + } + } + + if int(batch.RowCount) != len(expected) { + t.Fatalf("expected %d distinct departments, got %d", + len(expected), batch.RowCount) + } + }) + + // ------------------------------- + // 2) DISTINCT on region + // ------------------------------- + t.Run("distinct_region", func(t *testing.T) { + // reload source (distinct consumes input) + src2, _ := project.NewInMemoryProjectExecFromArrays(names, cols) + + expr := Expr.NewExpressions( + Expr.NewColumnResolve("region"), + ) + + de, err := filter.NewDistinctExec(src2, expr) + if err != nil { + t.Fatalf("distinct init failed: %v", err) + } + + batch, err := de.Next(100) + if err != nil { + t.Fatalf("distinct next failed: %v", err) + } + + regionArr := batch.Columns[6].(*array.String) + + orig := cols[6].(*array.String) + expected := make(map[string]struct{}) + for i := 0; i < orig.Len(); i++ { + if orig.IsNull(i) { + expected["NULL"] = struct{}{} + } else { + expected[orig.Value(i)] = struct{}{} + } + } + + if int(regionArr.Len()) != len(expected) { + t.Fatalf("expected %d distinct regions, got %d", + len(expected), regionArr.Len()) + } + }) + + // ------------------------------- + // 3) DISTINCT(id) → should return all 20 rows + // ------------------------------- + t.Run("distinct_id_all_unique", func(t *testing.T) { + src3, _ := project.NewInMemoryProjectExecFromArrays(names, cols) + + expr := Expr.NewExpressions( + Expr.NewColumnResolve("id"), + ) + + de, err := filter.NewDistinctExec(src3, expr) + if err != nil { + t.Fatalf("distinct init failed: %v", err) + } + + batch, err := de.Next(100) + if err != nil { + t.Fatalf("distinct next failed: %v", err) + } + + if batch.RowCount != 20 { + t.Fatalf("expected 20 distinct id rows, got %d", batch.RowCount) + } + }) +} + +/* +============================================================================ +Limit tests +============================================================================ +*/ +func TestLimitExec(t *testing.T) { + mem := memory.NewGoAllocator() + names, cols := generateIntegrationDataset1(mem) + + // ---------------------------------- + // 1) LIMIT 5 + // ---------------------------------- + t.Run("limit_5", func(t *testing.T) { + src, _ := project.NewInMemoryProjectExecFromArrays(names, cols) + + lim, err := filter.NewLimitExec(src, 5) + if err != nil { + t.Fatalf("limit init failed: %v", err) + } + + batch, err := lim.Next(100) + if err != nil { + t.Fatalf("limit next error: %v", err) + } + + if batch.RowCount != 5 { + t.Fatalf("expected 5 rows, got %d", batch.RowCount) + } + + // verify first 5 IDs match original dataset + idArr := batch.Columns[0].(*array.Int32) + origID := cols[0].(*array.Int32) + + for i := 0; i < 5; i++ { + if idArr.Value(i) != origID.Value(i) { + t.Fatalf("row %d: expected id=%d, got id=%d", + i, origID.Value(i), idArr.Value(i)) + } + } + }) + + // ---------------------------------- + // 2) LIMIT EXACT = 20 + // ---------------------------------- + t.Run("limit_exact", func(t *testing.T) { + src, _ := project.NewInMemoryProjectExecFromArrays(names, cols) + + lim, err := filter.NewLimitExec(src, 20) + if err != nil { + t.Fatalf("limit init failed: %v", err) + } + + batch, err := lim.Next(100) + if err != nil { + t.Fatalf("limit error: %v", err) + } + + if batch.RowCount != 20 { + t.Fatalf("expected 20 rows, got %d", batch.RowCount) + } + }) + + // ---------------------------------- + // 3) LIMIT larger than dataset + // ---------------------------------- + t.Run("limit_too_large", func(t *testing.T) { + src, _ := project.NewInMemoryProjectExecFromArrays(names, cols) + + lim, err := filter.NewLimitExec(src, 50) + if err != nil { + t.Fatalf("limit init failed: %v", err) + } + + batch, err := lim.Next(100) + if err != nil { + t.Fatalf("limit next failed: %v", err) + } + + if batch.RowCount != 20 { + t.Fatalf("expected 20 rows when limit > dataset size, got %d", batch.RowCount) + } + }) +} + +/* +============================================================================ +Scalar function tests +============================================================================ +*/ +func TestScalarStringFunctions(t *testing.T) { + mem := memory.NewGoAllocator() + + // We will run: SELECT department, UPPER(department), LOWER(department) + // Using ScalarFunction(Upper, col("department")) + // And ScalarFunction(Lower, col("department")) + + t.Run("UpperFunction", func(t *testing.T) { + src, _ := NewIntegrationSource1(mem) + colDept := Expr.NewColumnResolve("department") + + upperExpr := Expr.NewScalarFunction(Expr.Upper, colDept) + + // Evaluate: UPPER(department) + batch, err := src.Next(100) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + arr, err := Expr.EvalScalarFunction(upperExpr, batch) + if err != nil { + t.Fatalf("upper eval failed: %v", err) + } + + out := arr.(*array.String) + + // Compare with strings.ToUpper + deptCol, _ := Expr.EvalExpression(colDept, batch) + deptArr := deptCol.(*array.String) + + for i := 0; i < int(out.Len()); i++ { + if deptArr.IsNull(i) { + if !out.IsNull(i) { + t.Fatalf("expected null at %d", i) + } + continue + } + expected := strings.ToUpper(deptArr.Value(i)) + if out.Value(i) != expected { + t.Fatalf("UPPER mismatch at row %d: got %s, expected %s", + i, out.Value(i), expected) + } + } + }) + + t.Run("LowerFunction", func(t *testing.T) { + src, _ := NewIntegrationSource1(mem) + colDept := Expr.NewColumnResolve("department") + + lowerExpr := Expr.NewScalarFunction(Expr.Lower, colDept) + + // Evaluate: LOWER(department) + batch, err := src.Next(100) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + arr, err := Expr.EvalScalarFunction(lowerExpr, batch) + if err != nil { + t.Fatalf("lower eval failed: %v", err) + } + + out := arr.(*array.String) + + deptCol, _ := Expr.EvalExpression(colDept, batch) + deptArr := deptCol.(*array.String) + + for i := 0; i < int(out.Len()); i++ { + if deptArr.IsNull(i) { + if !out.IsNull(i) { + t.Fatalf("expected null at %d", i) + } + continue + } + expected := strings.ToLower(deptArr.Value(i)) + if out.Value(i) != expected { + t.Fatalf("LOWER mismatch at row %d: got %s, expected %s", + i, out.Value(i), expected) + } + } + }) + t.Run("Abs", func(t *testing.T) { + src, _ := NewIntegrationSource1(mem) + + fn := Expr.NewScalarFunction(Expr.Abs, Expr.NewColumnResolve("salary")) + exec, err := project.NewProjectExec(src, []Expr.Expression{fn}) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + batch, err := exec.Next(50) + if err != nil { + t.Fatalf("exec failed: %v", err) + } + + out := batch.Columns[0].(*array.Float64) + + for i := 0; i < out.Len(); i++ { + val := out.Value(i) + if val < 0 { + t.Fatalf("abs result should never be negative, got %v", val) + } + } + }) + + // ───────────────────────────────────────────── + // ROUND(salary) + // ───────────────────────────────────────────── + t.Run("Round", func(t *testing.T) { + src, _ := NewIntegrationSource1(mem) + _, col := generateIntegrationDataset1(mem) + + fn := Expr.NewScalarFunction(Expr.Round, Expr.NewColumnResolve("salary")) + exec, err := project.NewProjectExec(src, []Expr.Expression{fn}) + if err != nil { + t.Fatalf("project init failed: %v", err) + } + + batch, err := exec.Next(50) + if err != nil { + t.Fatalf("exec failed: %v", err) + } + + out := batch.Columns[0].(*array.Float64) + orig := col[4].(*array.Float64) // salary column + + for i := 0; i < out.Len(); i++ { + expected := math.Round(orig.Value(i)) + got := out.Value(i) + + if expected != got { + t.Fatalf("round mismatch at %d: expected=%v got=%v", i, expected, got) + } + } + }) +} + +/* +============================================================================ +Hash join tests +============================================================================ +*/ +func TestHashJoinExec(t *testing.T) { + mem := memory.NewGoAllocator() + + t.Run("InnerJoin_SimpleDept", func(t *testing.T) { + src1, _ := NewIntegrationSource1(mem) + src2, _ := NewIntegrationSource2(mem) + + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("department")}, + []Expr.Expression{Expr.NewColumnResolve("department")}, + ) + + j, err := join.NewHashJoinExec(src1, src2, clause, join.InnerJoin, nil) + if err != nil { + t.Fatalf("inner join init failed: %v", err) + } + + batch, err := j.Next(1000) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + if batch.RowCount == 0 { + t.Fatalf("inner join returned zero rows (expected matches)") + } + }) + + t.Run("LeftJoin_AllLeftPreserved", func(t *testing.T) { + src1, _ := NewIntegrationSource1(mem) + src2, _ := NewIntegrationSource2(mem) + + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("region")}, + []Expr.Expression{Expr.NewColumnResolve("region")}, + ) + + j, err := join.NewHashJoinExec(src1, src2, clause, join.LeftJoin, nil) + if err != nil { + t.Fatalf("left join init failed: %v", err) + } + + batch, err := j.Next(1000) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + if batch.RowCount < 20 { + t.Fatalf("left join should preserve all 20 left rows, got %d", batch.RowCount) + } + }) + + t.Run("RightJoin_AllRightPreserved", func(t *testing.T) { + src1, _ := NewIntegrationSource1(mem) + src2, _ := NewIntegrationSource2(mem) + + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("region")}, + []Expr.Expression{Expr.NewColumnResolve("region")}, + ) + + j, err := join.NewHashJoinExec(src1, src2, clause, join.RightJoin, nil) + if err != nil { + t.Fatalf("right join init failed: %v", err) + } + + batch, err := j.Next(1000) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + if batch.RowCount < 20 { + t.Fatalf("right join should preserve all 20 right rows, got %d", batch.RowCount) + } + }) + + t.Run("InnerJoin_NoMatches", func(t *testing.T) { + src1, _ := NewIntegrationSource1(mem) + src2, _ := NewIntegrationSource2(mem) + + // Join on unrelated keys → expect zero matches + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("age")}, + []Expr.Expression{Expr.NewColumnResolve("dept_id")}, + ) + + j, err := join.NewHashJoinExec(src1, src2, clause, join.InnerJoin, nil) + if err != nil { + t.Fatalf("inner join init failed: %v", err) + } + + batch, err := j.Next(1000) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + if batch.RowCount != 0 { + t.Fatalf("expected zero matches, got %d", batch.RowCount) + } + }) + + t.Run("MultiColumnJoin", func(t *testing.T) { + src1, _ := NewIntegrationSource1(mem) + src2, _ := NewIntegrationSource2(mem) + + clause := join.NewJoinClause( + []Expr.Expression{ + Expr.NewColumnResolve("department"), + Expr.NewColumnResolve("region"), + }, + []Expr.Expression{ + Expr.NewColumnResolve("department"), + Expr.NewColumnResolve("region"), + }, + ) + + j, err := join.NewHashJoinExec(src1, src2, clause, join.InnerJoin, nil) + if err != nil { + t.Fatalf("multi-col join init failed: %v", err) + } + + batch, err := j.Next(1000) + if err != nil { + t.Fatalf("unexpected: %v", err) + } + + if batch.RowCount == 0 { + t.Fatalf("multi-column join should match some rows") + } + }) + + t.Run("InnerJoin_CheckSchemaPrefixed", func(t *testing.T) { + src1, _ := NewIntegrationSource1(mem) + src2, _ := NewIntegrationSource2(mem) + + clause := join.NewJoinClause( + []Expr.Expression{Expr.NewColumnResolve("department")}, + []Expr.Expression{Expr.NewColumnResolve("department")}, + ) + + j, err := join.NewHashJoinExec(src1, src2, clause, join.InnerJoin, nil) + if err != nil { + t.Fatalf("join init failed: %v", err) + } + + schema := j.Schema() + + // Check prefixing (department exists on both sides) + foundLeft := false + foundRight := false + + for _, f := range schema.Fields() { + if f.Name == "left_department" { + foundLeft = true + } + if f.Name == "right_department" { + foundRight = true + } + } + + if !foundLeft || !foundRight { + t.Fatalf("schema prefixing failed: left_department=%v right_department=%v", foundLeft, foundRight) + } + }) +} diff --git a/src/Backend/opti-sql-go/substrait/substrait_test.go b/src/Backend/opti-sql-go/substrait/substrait_test.go index 122ad0b..fe23790 100644 --- a/src/Backend/opti-sql-go/substrait/substrait_test.go +++ b/src/Backend/opti-sql-go/substrait/substrait_test.go @@ -31,13 +31,13 @@ func TestDummyInput(t *testing.T) { dummyRequest := &QueryExecutionRequest{ SqlStatement: "SELECT * FROM table", SubstraitLogical: []byte("CgJTUxIMCgpTZWxlY3QgKiBGUk9NIHRhYmxl"), - Id: "GenerateDTODOHaasdavdasvasdvada", + Id: "GenerateDTMoneyOHaasdavdasvasdvada", Source: &SourceType{ S3Source: "s3://my-bucket/data/table.parquet", Mime: "application/vnd.apache.parquet", }, } - resp, err := ss.ExecuteQuery(context.TODO(), dummyRequest) + resp, err := ss.ExecuteQuery(context.Background(), dummyRequest) if err != nil { t.Errorf("Expected no error, got %v", err) } diff --git a/src/Backend/test_data/csv/intergration_test_data_1.csv b/src/Backend/test_data/csv/intergration_test_data_1.csv new file mode 100644 index 0000000..a5e8985 --- /dev/null +++ b/src/Backend/test_data/csv/intergration_test_data_1.csv @@ -0,0 +1,1001 @@ +id,username,email_address,is_active,age_years,account_balance_usd,average_session_minutes,favorite_color +1,kabrahmer0,jbranson0@joomla.org,false,23,78568.75,108.7,Blue +2,rackred1,eskinn1@usa.gov,true,66,21933.55,19.7,Violet +3,hdeacon2,cmccomish2@xing.com,true,73,69730.89,9.2,Maroon +4,mdany3,cvreede3@pagesperso-orange.fr,true,45,80371.32,19.0,Maroon +5,mgile4,cdumbrall4@pinterest.com,true,63,82536.72,57.3,Indigo +6,ypedlow5,hmelchior5@domainmarket.com,false,96,2276.7,187.8,Aquamarine +7,bpotier6,ibaseke6@state.tx.us,false,77,32239.63,75.7,Maroon +8,tglenn7,tpawley7@github.io,true,42,33917.35,106.2,Teal +9,naleksankin8,cwarstall8@washington.edu,false,20,61450.41,231.2,Orange +10,ktrevain9,apinchen9@mapquest.com,false,58,29065.08,226.7,Turquoise +11,tfuttya,etwigginsa@uiuc.edu,true,28,76633.92,157.7,Turquoise +12,bodoranb,tmadineb@wix.com,false,46,38205.06,151.9,Puce +13,lmacqueenc,hriccardc@mayoclinic.com,true,28,13588.26,105.1,Teal +14,lkwietakd,emalesd@123-reg.co.uk,false,28,51587.99,67.8,Purple +15,dbethoe,ccricke@mit.edu,false,33,43569.4,236.0,Red +16,gcochranf,bbartoszekf@washington.edu,false,61,55799.32,168.6,Khaki +17,ekeoghaneg,charleg@scientificamerican.com,true,14,4585.2,228.0,Goldenrod +18,rlillieh,gskaseh@tripadvisor.com,true,58,46285.12,101.5,Violet +19,pblondellei,sjosskovitzi@csmonitor.com,true,33,90800.0,16.2,Blue +20,kchappellj,bmetcalfej@hatena.ne.jp,false,74,96359.33,82.1,Goldenrod +21,mgolbyk,plangcasterk@t-online.de,false,80,21171.21,55.5,Indigo +22,aebornl,bwaliszewskil@utexas.edu,false,64,2323.61,129.2,Indigo +23,bkimblym,blellom@nasa.gov,true,24,89531.03,226.3,Green +24,ohupkan,gquesnen@telegraph.co.uk,false,79,84280.86,162.3,Puce +25,tbroginio,tgreasero@opera.com,true,52,83599.32,220.3,Puce +26,mcometsonp,bguinnanep@networkadvertising.org,false,16,11008.19,35.7,Aquamarine +27,mcooksonq,abremenq@nytimes.com,false,46,66430.38,67.8,Mauv +28,gcarmenr,ebladonr@bizjournals.com,false,78,14283.62,67.9,Puce +29,malmonds,bsootss@archive.org,false,97,57417.63,12.0,Turquoise +30,cmacpharlaint,awelbandt@wix.com,true,68,78044.56,219.8,Green +31,bgagenu,jbrickneru@clickbank.net,true,87,75215.47,27.5,Orange +32,cgraverv,tvittetv@who.int,false,55,48576.54,154.3,Khaki +33,ckleinhandlerw,gboissierw@forbes.com,false,76,24205.9,80.2,Aquamarine +34,mnewtonx,balenichicovx@nyu.edu,false,56,71490.0,236.2,Green +35,opembley,sfennelly@is.gd,false,98,79604.64,72.6,Khaki +36,lstoodersz,emaudsleyz@joomla.org,true,31,28501.29,30.6,Pink +37,rpickavance10,aoregan10@microsoft.com,true,92,85816.31,111.8,Pink +38,bhapgood11,ctrebilcock11@prweb.com,false,36,3128.05,51.0,Yellow +39,cogden12,emoreman12@globo.com,false,77,43674.85,130.1,Blue +40,dkurt13,ccecere13@china.com.cn,false,79,92882.13,154.4,Fuscia +41,nvangeffen14,tservis14@blogtalkradio.com,true,36,44125.22,68.3,Turquoise +42,jmatzl15,amcalees15@prnewswire.com,false,14,64389.09,210.7,Teal +43,kcovolini16,eskuce16@geocities.jp,false,58,61266.66,53.6,Indigo +44,rdhennin17,zhartil17@biblegateway.com,false,62,20083.83,49.7,Yellow +45,lnetti18,sgergolet18@xing.com,false,90,95479.39,89.5,Violet +46,acamblin19,neverest19@google.co.uk,true,42,69372.65,114.4,Pink +47,bsanbroke1a,ehalbert1a@prlog.org,true,77,39295.87,169.1,Goldenrod +48,lcassius1b,lhuntly1b@ow.ly,true,37,57540.0,33.8,Fuscia +49,golivier1c,barger1c@typepad.com,true,85,36823.38,127.2,Teal +50,tdubock1d,amanske1d@fema.gov,true,60,97834.72,78.0,Violet +51,jhardes1e,egreason1e@storify.com,false,36,72901.15,49.5,Mauv +52,djorge1f,emetterick1f@storify.com,false,40,3074.71,60.9,Yellow +53,sprobin1g,rcolisbe1g@cdbaby.com,true,99,15836.96,213.6,Orange +54,breaper1h,msouthall1h@soup.io,true,79,-2334.0,235.5,Yellow +55,bmcmillam1i,mdeevey1i@wired.com,true,34,53132.39,76.8,Pink +56,bmar1j,dyitzhakof1j@odnoklassniki.ru,false,28,83065.6,229.3,Teal +57,mgirling1k,apoor1k@marketwatch.com,false,87,8214.74,56.5,Orange +58,kdoorbar1l,mcholwell1l@uiuc.edu,false,28,49805.07,125.8,Indigo +59,cgatesman1m,fmacneil1m@ow.ly,true,70,51264.57,233.0,Violet +60,mskerman1n,afrancescuzzi1n@odnoklassniki.ru,true,79,95239.67,104.1,Teal +61,vbinney1o,cshorey1o@amazon.de,false,54,16618.88,191.9,Aquamarine +62,hcholerton1p,gpaylor1p@utexas.edu,false,55,43522.18,154.9,Khaki +63,fwyon1q,lbuckby1q@smh.com.au,false,96,61202.83,36.0,Violet +64,sshillabeare1r,rrack1r@pinterest.com,false,38,19007.32,32.4,Goldenrod +65,vkaiser1s,dthorp1s@digg.com,false,30,7656.38,191.8,Yellow +66,mrosenhaupt1t,nburnage1t@marriott.com,true,84,8990.69,162.1,Fuscia +67,klebrun1u,kchaudron1u@mtv.com,false,91,53217.02,81.0,Turquoise +68,vmoncey1v,gdevereux1v@paginegialle.it,true,82,99251.37,24.3,Red +69,ateissier1w,dlanghor1w@redcross.org,true,65,10992.34,132.3,Blue +70,hiwanicki1x,sspinola1x@elpais.com,true,85,78351.87,114.2,Pink +71,mmeckiff1y,wmatteacci1y@wired.com,true,26,57171.24,188.7,Pink +72,cmcbrearty1z,hkingett1z@123-reg.co.uk,false,88,-3016.95,11.8,Pink +73,beynon20,jcran20@chronoengine.com,true,71,21921.98,196.7,Yellow +74,pstannas21,mcornels21@ed.gov,true,99,66552.01,12.6,Goldenrod +75,njosuweit22,btumbelty22@go.com,false,66,23107.05,189.4,Puce +76,kpagelsen23,mtolan23@de.vu,true,40,27701.03,61.6,Pink +77,spetrichat24,rdeldello24@networksolutions.com,false,48,8620.33,180.9,Maroon +78,akerswell25,lvalintine25@japanpost.jp,true,71,-108.19,192.5,Mauv +79,vbraga26,rdumingos26@salon.com,false,41,64557.11,203.6,Maroon +80,lduckit27,nitzhayek27@archive.org,false,76,51903.82,87.7,Green +81,gmidgely28,lkiln28@etsy.com,true,80,97275.62,104.5,Red +82,cgarratt29,rmockes29@google.ru,true,91,5067.63,210.2,Red +83,rglossop2a,rgrouvel2a@arstechnica.com,true,26,62.53,183.6,Purple +84,dluttger2b,eretchless2b@answers.com,true,19,32428.17,231.8,Orange +85,elangridge2c,wgrodden2c@mozilla.org,false,82,33110.19,25.2,Goldenrod +86,oheathcott2d,maxcel2d@cargocollective.com,true,64,83145.34,176.7,Goldenrod +87,lshrive2e,lbloxland2e@free.fr,true,54,36226.59,137.6,Khaki +88,carchley2f,csomerton2f@furl.net,false,61,55457.75,213.9,Red +89,tbrew2g,nvesque2g@bloglines.com,true,59,9234.99,61.3,Puce +90,zcruise2h,jhuxton2h@sohu.com,true,45,22015.28,84.7,Orange +91,ejailler2i,rgillitt2i@freewebs.com,false,71,24201.93,157.0,Blue +92,hsquier2j,rbegg2j@shutterfly.com,true,54,21285.15,55.6,Blue +93,kgandy2k,cmcclean2k@unblog.fr,true,87,47796.6,97.6,Mauv +94,cpadberry2l,bpaskerful2l@google.fr,true,25,12861.17,201.1,Khaki +95,mdjurdjevic2m,mtadlow2m@fc2.com,false,71,37159.23,111.3,Blue +96,npeddie2n,hkilfeather2n@disqus.com,true,36,-4415.24,202.0,Blue +97,tpraton2o,rsiburn2o@creativecommons.org,true,81,4969.55,25.1,Mauv +98,hmoulding2p,lsayers2p@4shared.com,true,43,89837.87,113.5,Aquamarine +99,heastmead2q,jdurrell2q@people.com.cn,true,64,36745.97,123.6,Crimson +100,hlambis2r,abastard2r@fda.gov,true,14,58177.16,192.5,Maroon +101,gwitterick2s,kmadders2s@wikia.com,true,73,89899.81,186.4,Indigo +102,cjedraszek2t,abenda2t@utexas.edu,true,37,65280.64,172.8,Purple +103,cwooton2u,kbeaze2u@craigslist.org,true,63,94274.92,193.2,Goldenrod +104,gunthank2v,sferre2v@whitehouse.gov,true,49,40119.54,110.3,Turquoise +105,bfilson2w,bhayles2w@hc360.com,true,14,44503.45,157.1,Indigo +106,ldebruyn2x,lfurneaux2x@desdev.cn,false,24,59364.66,207.5,Pink +107,lshegog2y,jkinnin2y@toplist.cz,true,29,44611.61,197.8,Pink +108,lswoffer2z,mlambert2z@ibm.com,false,17,75611.62,222.2,Mauv +109,tsmitheman30,ryanyushkin30@prnewswire.com,false,18,24549.77,99.8,Khaki +110,subsdale31,lparker31@shop-pro.jp,false,97,99572.38,168.4,Orange +111,lelfe32,fhorsefield32@arstechnica.com,true,58,39755.35,38.2,Violet +112,abulbeck33,tedeson33@huffingtonpost.com,true,40,23526.09,91.3,Goldenrod +113,omaffi34,ccrossley34@nps.gov,true,48,37339.46,35.9,Pink +114,kbecerro35,sfranca35@plala.or.jp,false,76,83744.37,96.0,Crimson +115,cdeelay36,nharring36@kickstarter.com,false,20,62170.7,139.4,Khaki +116,streherne37,rbishell37@indiatimes.com,true,62,54012.77,168.8,Mauv +117,tralling38,mraspison38@instagram.com,true,99,21780.62,230.9,Mauv +118,jpanswick39,aherche39@live.com,true,63,45818.69,17.1,Teal +119,aavramovich3a,mkinkead3a@artisteer.com,false,85,-3534.1,110.4,Red +120,bhiom3b,lcheckley3b@foxnews.com,false,89,65567.85,135.4,Turquoise +121,rbayldon3c,kbosworth3c@mashable.com,true,50,95253.56,23.6,Khaki +122,fbagnell3d,bjosephov3d@reference.com,false,57,68390.99,78.4,Teal +123,hshambrooke3e,edegregorio3e@umn.edu,false,52,15771.1,203.8,Crimson +124,ggribbon3f,rpagett3f@prweb.com,false,16,2917.55,56.2,Turquoise +125,edavana3g,cdedney3g@cmu.edu,false,31,55667.94,36.1,Yellow +126,hwaskett3h,wantyshev3h@angelfire.com,true,50,63517.05,60.0,Turquoise +127,tdurtnall3i,ftwittey3i@house.gov,false,49,83952.8,35.1,Green +128,omaddyson3j,rcordie3j@myspace.com,true,97,99567.41,168.1,Crimson +129,gmanntschke3k,mmuzzini3k@ifeng.com,true,91,54316.77,91.4,Violet +130,ndaniele3l,cclaughton3l@pen.io,false,69,65027.94,213.7,Blue +131,npeagram3m,cpottle3m@blogtalkradio.com,true,51,31631.27,136.5,Blue +132,mbore3n,npieterick3n@springer.com,false,48,58073.82,95.5,Maroon +133,skensington3o,mbury3o@pagesperso-orange.fr,false,61,13374.65,52.0,Aquamarine +134,bfarman3p,amclaverty3p@narod.ru,true,61,57229.14,124.1,Mauv +135,shutchins3q,sgrumble3q@accuweather.com,true,33,46414.27,208.5,Khaki +136,cgauthorpp3r,gdowsett3r@jigsy.com,true,30,43875.9,159.3,Indigo +137,gblaw3s,aaicken3s@dion.ne.jp,true,43,154.23,107.3,Orange +138,mcorkett3t,fodea3t@foxnews.com,false,47,94738.45,203.1,Indigo +139,mscogings3u,gdicky3u@imageshack.us,true,54,31648.6,4.9,Teal +140,atabram3v,ebeardow3v@twitter.com,false,34,92957.65,19.4,Puce +141,dlucken3w,awinter3w@sciencedirect.com,true,34,1043.49,20.9,Teal +142,bguiduzzi3x,zcatley3x@is.gd,true,56,49087.72,175.5,Crimson +143,adyos3y,aroney3y@craigslist.org,true,68,60016.89,203.2,Purple +144,jsichardt3z,bchadband3z@uol.com.br,true,70,33414.39,215.6,Violet +145,kballsdon40,learingey40@acquirethisname.com,true,34,61189.33,183.7,Turquoise +146,nrattrie41,zniesing41@mlb.com,false,20,3713.34,106.0,Violet +147,pstarmore42,moda42@jugem.jp,true,16,37980.99,173.5,Pink +148,lsalmon43,emichell43@businessweek.com,false,90,69007.42,21.3,Indigo +149,hmcglynn44,mdorward44@independent.co.uk,false,48,62759.77,112.1,Turquoise +150,mlindfors45,jluke45@php.net,false,75,10456.42,26.4,Aquamarine +151,rjeanin46,scolwell46@constantcontact.com,false,69,90824.92,80.8,Yellow +152,titchingham47,jhallt47@who.int,false,50,79100.32,115.9,Aquamarine +153,ppledger48,fcamfield48@apache.org,false,24,28749.64,9.1,Turquoise +154,mbeauchop49,hglasheen49@dailymail.co.uk,false,26,37414.35,45.5,Fuscia +155,rbertlin4a,ulikly4a@comcast.net,false,37,71606.35,225.8,Purple +156,gmillom4b,avenard4b@hud.gov,true,76,94234.72,36.3,Goldenrod +157,asherman4c,fbellenie4c@thetimes.co.uk,true,26,26478.22,198.4,Khaki +158,ubottjer4d,kmcmychem4d@epa.gov,false,81,73783.54,131.1,Purple +159,itimeby4e,aduesbury4e@va.gov,false,40,58556.11,213.9,Green +160,amatson4f,smccool4f@bbc.co.uk,true,49,61713.07,87.0,Orange +161,aaddinall4g,fspellacey4g@live.com,false,33,34527.69,72.2,Turquoise +162,csach4h,ewinning4h@vk.com,true,43,23477.9,229.8,Violet +163,tyeskov4i,fbloxsom4i@forbes.com,false,97,3535.98,113.1,Pink +164,cmcclory4j,mforce4j@technorati.com,false,96,22636.5,98.3,Khaki +165,hilyasov4k,educe4k@netlog.com,true,66,50561.37,66.0,Aquamarine +166,pkollatsch4l,aopdenorth4l@bloglovin.com,false,28,9517.82,27.8,Yellow +167,ndeminico4m,iodee4m@51.la,false,22,60746.86,85.3,Puce +168,tbenedyktowicz4n,jburgoyne4n@blog.com,true,53,32835.66,77.0,Pink +169,mpiner4o,fkepp4o@woothemes.com,true,42,1608.82,23.4,Puce +170,bcastelletto4p,cklugman4p@google.de,true,85,98194.54,94.9,Goldenrod +171,lregitz4q,bwillars4q@google.nl,false,21,83442.35,19.5,Purple +172,egallemore4r,cwatters4r@kickstarter.com,true,16,10599.53,39.3,Orange +173,pmoyne4s,mschulter4s@independent.co.uk,false,23,21132.22,1.3,Crimson +174,edymocke4t,lwindress4t@newsvine.com,false,48,79372.58,216.4,Goldenrod +175,dpietrusiak4u,rcamosso4u@geocities.jp,false,77,64270.28,211.4,Mauv +176,pdeeson4v,goflaherty4v@dion.ne.jp,false,20,3941.61,184.8,Orange +177,erabbe4w,bakett4w@bravesites.com,true,60,51406.26,141.8,Teal +178,mferron4x,mbrooke4x@youku.com,false,25,37622.42,210.8,Teal +179,atottle4y,bclinton4y@purevolume.com,true,37,43558.76,145.2,Red +180,bedwardson4z,dscotfurth4z@dion.ne.jp,true,66,27769.27,68.5,Yellow +181,acarlucci50,fcomiam50@economist.com,false,54,6018.01,208.2,Goldenrod +182,stwallin51,cwallice51@tripod.com,true,32,26271.66,228.3,Blue +183,lmarchiso52,cledstone52@networksolutions.com,true,45,65987.77,46.4,Yellow +184,cpelling53,nniccolls53@dyndns.org,true,30,17758.16,53.4,Goldenrod +185,zgricks54,eeacle54@nih.gov,true,93,65139.32,156.9,Fuscia +186,cclemenson55,diashvili55@angelfire.com,true,91,54823.75,77.4,Aquamarine +187,lbladder56,rdrust56@house.gov,false,23,16407.74,65.4,Orange +188,bwieprecht57,bbench57@naver.com,false,58,7563.83,47.6,Purple +189,hdarinton58,gbrader58@dyndns.org,true,58,10792.6,17.0,Mauv +190,dbowller59,mboughtflower59@boston.com,false,19,78897.16,28.2,Purple +191,cdivis5a,dblabber5a@accuweather.com,true,50,95641.31,97.7,Yellow +192,spenhalewick5b,ggott5b@cloudflare.com,true,89,74511.66,124.7,Indigo +193,mredwood5c,lmckeveney5c@facebook.com,true,95,67450.15,11.8,Yellow +194,amacmurray5d,jfibbitts5d@sitemeter.com,true,33,51196.3,70.0,Violet +195,mkimbling5e,lspeake5e@wired.com,true,22,98791.96,90.2,Crimson +196,ctrye5f,djameson5f@canalblog.com,false,70,91481.94,201.4,Orange +197,carkin5g,jorgel5g@marketwatch.com,true,53,42292.02,23.2,Aquamarine +198,jmedland5h,dbonnier5h@yellowpages.com,true,58,42698.85,84.1,Green +199,fmallall5i,lgimblet5i@whitehouse.gov,true,85,22927.08,152.8,Puce +200,bkupker5j,dgjerde5j@newsvine.com,true,89,93930.62,216.3,Green +201,agill5k,zcarlill5k@privacy.gov.au,false,58,41174.32,115.8,Crimson +202,agofford5l,ldilliston5l@washingtonpost.com,true,96,96695.38,14.1,Purple +203,npender5m,eruger5m@jimdo.com,false,36,75419.63,101.3,Crimson +204,awaterman5n,ghessing5n@businesswire.com,true,54,86699.71,59.7,Orange +205,oaudenis5o,faucoate5o@sun.com,true,19,46836.54,207.5,Aquamarine +206,mwindress5p,hchesman5p@loc.gov,false,97,53488.69,66.9,Indigo +207,hdeyes5q,zkobu5q@wordpress.com,false,72,77479.93,84.4,Violet +208,vbadder5r,pgomez5r@instagram.com,true,24,90940.2,114.6,Aquamarine +209,mmckeon5s,awalkley5s@posterous.com,false,79,78872.2,201.0,Puce +210,alukes5t,gway5t@ning.com,false,56,80344.01,80.7,Red +211,bstruan5u,abarneville5u@google.ca,false,74,38649.41,216.4,Purple +212,rcopner5v,disaak5v@addtoany.com,true,83,51194.82,59.8,Mauv +213,mcapinetti5w,dpoetz5w@independent.co.uk,true,80,51265.78,139.0,Puce +214,ccobain5x,ajervoise5x@state.tx.us,true,71,8492.27,123.8,Turquoise +215,dtinn5y,tfrape5y@answers.com,true,98,80338.69,213.3,Aquamarine +216,gtackell5z,cveracruysse5z@macromedia.com,true,56,28970.75,39.8,Indigo +217,hjoyce60,dgatley60@github.com,true,58,52875.49,131.9,Pink +218,cantonsson61,mmactimpany61@smugmug.com,false,91,90280.01,205.0,Teal +219,jgynne62,edobrowolski62@shareasale.com,false,31,54928.08,8.9,Turquoise +220,ebooty63,gwickey63@thetimes.co.uk,true,63,25248.03,78.3,Purple +221,jhaly64,egirardengo64@adobe.com,false,79,25530.67,193.5,Violet +222,ciskowicz65,mvorley65@narod.ru,false,27,10025.95,8.5,Khaki +223,oream66,mhaining66@nih.gov,false,55,160.47,193.3,Goldenrod +224,cottey67,vtocher67@psu.edu,true,29,-192.32,202.5,Fuscia +225,cmccurlye68,kbarck68@shop-pro.jp,true,53,87737.09,40.1,Green +226,bdavidowich69,tmallalieu69@ebay.co.uk,false,17,-2836.75,21.3,Blue +227,aneesam6a,bjills6a@yale.edu,false,39,86645.31,104.2,Turquoise +228,nivetts6b,graincin6b@webeden.co.uk,true,92,19425.8,10.9,Blue +229,rmagnus6c,hmenloe6c@mozilla.org,true,36,50849.97,194.1,Fuscia +230,mshovell6d,jmeasures6d@ibm.com,true,28,15777.1,193.4,Red +231,fcourtney6e,cqueen6e@omniture.com,false,23,99505.5,221.2,Maroon +232,tbabin6f,mkennally6f@liveinternet.ru,true,97,55648.38,166.8,Purple +233,dkimber6g,kmapholm6g@live.com,false,69,-4440.54,99.2,Green +234,vmerner6h,jfeehily6h@vkontakte.ru,false,99,69185.02,108.8,Khaki +235,bblouet6i,arollings6i@nih.gov,true,23,66210.49,118.3,Puce +236,chowson6j,rgaukroger6j@washingtonpost.com,false,33,80917.55,161.7,Fuscia +237,kburner6k,kpetcher6k@yolasite.com,false,96,65515.39,36.5,Maroon +238,cscandroot6l,lfranchyonok6l@mapquest.com,true,60,50872.94,148.0,Blue +239,ptottie6m,bwheatcroft6m@live.com,false,23,3631.02,18.8,Puce +240,fsheer6n,nbuncher6n@pcworld.com,true,29,44745.12,165.7,Red +241,nhavis6o,jpatrie6o@privacy.gov.au,true,90,89669.26,36.5,Purple +242,hmarcq6p,rvoas6p@example.com,false,55,64169.54,231.0,Fuscia +243,tmccoveney6q,aizhakov6q@theatlantic.com,true,33,31649.37,196.6,Aquamarine +244,htwine6r,egabbotts6r@odnoklassniki.ru,false,48,-1530.03,190.0,Teal +245,gwallsam6s,mabrahamsen6s@cam.ac.uk,false,68,5942.94,112.7,Mauv +246,hdrury6t,hblaisdell6t@hp.com,false,35,64476.46,43.7,Fuscia +247,aosbourn6u,bgaskarth6u@shareasale.com,false,53,77685.37,190.7,Pink +248,wstockell6v,bdrakard6v@uol.com.br,false,29,22633.82,105.6,Blue +249,dstuttman6w,mgathercoal6w@addthis.com,false,86,-4607.94,206.0,Puce +250,nbirkhead6x,isansbury6x@sciencedaily.com,false,76,89007.14,45.3,Maroon +251,jheminsley6y,iraleston6y@foxnews.com,true,72,68734.49,119.4,Maroon +252,mtesauro6z,tgrimme6z@xing.com,false,90,7259.04,218.6,Pink +253,hgrzegorzewicz70,bstotherfield70@cnet.com,true,14,8121.6,223.8,Violet +254,slinay71,cdudney71@uiuc.edu,true,52,74486.69,61.9,Pink +255,mscarre72,aminthorpe72@wikipedia.org,false,30,66347.44,54.2,Indigo +256,kpestricke73,ygilbertson73@bloglovin.com,false,51,66214.09,232.6,Puce +257,cstennes74,hburdas74@theatlantic.com,false,89,56913.91,21.8,Goldenrod +258,mhegarty75,atoquet75@paypal.com,true,29,19756.01,23.6,Purple +259,lvannoort76,daldwich76@tamu.edu,true,73,40920.67,196.4,Green +260,iwindows77,tfarrant77@youtu.be,true,89,19572.3,32.3,Goldenrod +261,ileaburn78,sallsebrook78@mapquest.com,false,37,-1352.52,71.8,Pink +262,hmicklewicz79,bwing79@list-manage.com,true,19,91391.64,203.3,Fuscia +263,rharpur7a,gtyre7a@list-manage.com,false,16,40233.5,176.4,Aquamarine +264,gaddyman7b,tsummersby7b@mlb.com,false,38,79050.35,195.7,Puce +265,cpigny7c,nsurcombe7c@simplemachines.org,true,39,89263.61,83.2,Pink +266,phudghton7d,cwippermann7d@sphinn.com,true,39,47281.64,146.2,Violet +267,jcoats7e,thuzzey7e@liveinternet.ru,true,36,75739.29,239.8,Fuscia +268,adaburn7f,mlugden7f@psu.edu,false,22,47520.54,198.3,Pink +269,bklimkovich7g,bleyfield7g@amazon.co.jp,true,47,22986.66,24.2,Mauv +270,rsilberschatz7h,soosthoutdevree7h@toplist.cz,false,72,35317.44,184.0,Maroon +271,mogborne7i,dpresshaugh7i@berkeley.edu,true,31,42618.97,175.9,Blue +272,mmordon7j,rscotchmoor7j@thetimes.co.uk,false,22,19997.18,161.1,Puce +273,nbattey7k,neuston7k@dagondesign.com,true,54,32305.92,176.1,Fuscia +274,eglayzer7l,gkellert7l@godaddy.com,false,16,92262.24,45.5,Turquoise +275,swhyley7m,nbeeching7m@domainmarket.com,true,82,66351.52,75.7,Khaki +276,ssidey7n,scauderlie7n@toplist.cz,true,23,70017.51,2.1,Pink +277,sbour7o,hleake7o@springer.com,false,50,43686.67,95.2,Fuscia +278,bcastillon7p,gkunkler7p@youku.com,true,43,16395.55,186.6,Blue +279,lkreutzer7q,abarrasse7q@xinhuanet.com,false,57,76744.28,125.9,Crimson +280,tfishley7r,hpantone7r@ca.gov,true,21,95976.15,148.6,Red +281,dhardwicke7s,kkytley7s@ocn.ne.jp,true,27,16.36,118.5,Red +282,garnely7t,tcolomb7t@marketwatch.com,false,85,59346.93,56.6,Puce +283,btosdevin7u,cealles7u@cbsnews.com,false,56,82188.87,63.2,Indigo +284,mtysall7v,dwoodstock7v@rambler.ru,false,43,61900.67,131.2,Yellow +285,bwalrond7w,lcartmel7w@twitpic.com,false,96,95177.28,151.1,Orange +286,dbeharrell7x,mlouis7x@adobe.com,true,82,30894.94,147.3,Teal +287,cdanick7y,equidenham7y@multiply.com,true,19,70158.64,237.1,Khaki +288,tbrent7z,ocantillon7z@zimbio.com,true,53,91377.16,6.1,Purple +289,clovel80,amudge80@sitemeter.com,true,15,34484.24,35.2,Khaki +290,hlewins81,schallin81@flickr.com,false,33,22534.97,226.4,Pink +291,ebaulcombe82,anicolls82@dot.gov,false,96,91459.42,4.0,Puce +292,apaxforde83,jboston83@house.gov,false,21,51945.52,135.1,Purple +293,esanger84,kdelle84@multiply.com,true,57,12465.82,149.7,Mauv +294,bmcelane85,gparzis85@comsenz.com,false,59,29506.16,54.6,Orange +295,bdamerell86,dblewitt86@npr.org,false,80,5435.29,191.4,Pink +296,mlincoln87,nkolinsky87@theglobeandmail.com,false,77,8355.01,232.1,Violet +297,mdeighan88,truddy88@instagram.com,true,90,44335.95,156.5,Purple +298,eaizikovitch89,ewetheril89@illinois.edu,false,50,11132.05,77.2,Maroon +299,gsamarth8a,tbutchard8a@aboutads.info,true,71,77145.4,224.6,Orange +300,apidgeley8b,pbangle8b@cdbaby.com,true,15,72959.09,49.2,Mauv +301,brobilart8c,rkenlin8c@guardian.co.uk,true,20,85888.93,145.9,Turquoise +302,jghest8d,ksharrocks8d@google.it,false,17,15868.69,70.5,Green +303,aalflatt8e,hlawtey8e@oaic.gov.au,true,39,40233.11,113.8,Puce +304,gharriday8f,rfoote8f@facebook.com,false,31,20352.91,179.8,Yellow +305,civushkin8g,cvanichkov8g@sfgate.com,true,94,48255.7,61.5,Pink +306,cfake8h,hfidelli8h@wunderground.com,true,50,82547.51,73.9,Teal +307,mgillian8i,gstudeart8i@weebly.com,false,59,11179.96,156.2,Khaki +308,fkilmartin8j,gcleeve8j@archive.org,true,68,95577.29,180.2,Khaki +309,tgrabert8k,pcrookston8k@toplist.cz,false,55,10720.78,140.0,Crimson +310,phosburn8l,chebson8l@blog.com,false,68,8318.83,46.6,Mauv +311,mbingle8m,vyitzhok8m@quantcast.com,false,65,37332.78,224.8,Maroon +312,aheardman8n,bluttger8n@goo.gl,false,45,2328.98,229.6,Green +313,tkeld8o,dwinstanley8o@umich.edu,false,22,44863.97,173.0,Aquamarine +314,ekynson8p,mmarney8p@sogou.com,true,61,54342.33,27.8,Maroon +315,cclearley8q,bbyatt8q@twitpic.com,true,82,12524.27,133.9,Puce +316,bspellworth8r,dredwood8r@miitbeian.gov.cn,true,71,92700.62,200.3,Turquoise +317,btremontana8s,dcator8s@ed.gov,false,74,12226.59,174.3,Aquamarine +318,hwindram8t,kmacgowan8t@stumbleupon.com,true,93,21708.68,44.2,Khaki +319,tacedo8u,ijull8u@artisteer.com,false,59,91644.72,100.9,Teal +320,lhews8v,sgrieves8v@exblog.jp,true,87,68881.02,85.4,Mauv +321,hcamillo8w,ssherrett8w@yahoo.co.jp,true,64,6920.87,66.5,Khaki +322,bbusek8x,pcarcass8x@nymag.com,false,60,26202.56,43.4,Goldenrod +323,hreddell8y,sivanchenkov8y@nydailynews.com,true,43,44546.17,5.9,Maroon +324,nloughlin8z,llynes8z@wikipedia.org,false,46,68732.94,79.5,Pink +325,ajakel90,adegliantoni90@google.de,false,95,84739.75,119.5,Puce +326,aharrinson91,bitzkowicz91@list-manage.com,true,51,88479.9,75.2,Crimson +327,lmackim92,epaumier92@domainmarket.com,false,50,95020.24,146.7,Mauv +328,hoag93,llegon93@arstechnica.com,false,52,23818.47,234.8,Pink +329,jlarenson94,bwarwicker94@virginia.edu,false,94,88994.02,92.8,Yellow +330,cmatchell95,sspawton95@aboutads.info,true,45,11952.11,198.2,Mauv +331,ncarryer96,kdibson96@jugem.jp,false,44,48955.43,109.3,Yellow +332,sjunkinson97,hricciardello97@hexun.com,false,18,72618.62,89.4,Mauv +333,cdysart98,sdell98@skyrock.com,true,55,41894.68,154.3,Puce +334,tvazquez99,askelly99@vk.com,false,88,-646.24,167.9,Pink +335,tewart9a,vviegas9a@fotki.com,true,37,57049.98,188.2,Maroon +336,ctames9b,bwillgress9b@chron.com,false,51,51317.06,47.3,Green +337,tkoch9c,dadshed9c@facebook.com,false,49,96924.02,115.3,Yellow +338,hbucklan9d,gsalzburger9d@merriam-webster.com,true,13,90897.24,95.4,Fuscia +339,vlaba9e,kmeach9e@geocities.jp,true,91,23248.53,203.0,Indigo +340,llowdeane9f,fscowcroft9f@delicious.com,true,27,20693.19,110.1,Red +341,phenke9g,jberntsson9g@odnoklassniki.ru,true,94,38336.26,177.6,Puce +342,grolston9h,freddlesden9h@sohu.com,false,61,64714.37,28.4,Purple +343,aunderdown9i,lcorben9i@discuz.net,false,36,71082.12,67.9,Fuscia +344,zpepye9j,adrover9j@cisco.com,false,49,10848.48,159.7,Pink +345,mpretley9k,bparr9k@hatena.ne.jp,false,67,57757.87,213.1,Maroon +346,foselton9l,jnewton9l@wikispaces.com,true,30,-975.71,155.0,Teal +347,hashpole9m,mhowship9m@craigslist.org,true,69,21864.13,181.0,Green +348,wwelburn9n,palder9n@japanpost.jp,false,96,45250.91,1.9,Fuscia +349,kharkins9o,gmcgonagle9o@noaa.gov,true,16,42088.83,188.1,Indigo +350,nbasden9p,rmetham9p@github.com,true,43,38385.3,73.0,Puce +351,bruzicka9q,akeyser9q@a8.net,false,38,41579.61,217.2,Indigo +352,sguntrip9r,rpineaux9r@ezinearticles.com,true,77,5117.34,199.4,Red +353,fkynnd9s,asoda9s@hp.com,true,54,26313.24,3.0,Red +354,bofogarty9t,emccracken9t@wsj.com,false,42,55115.44,66.0,Pink +355,kvinter9u,wlarkings9u@cbc.ca,false,81,83879.15,15.1,Turquoise +356,ssaphir9v,lwindrum9v@google.it,true,64,30807.45,58.6,Yellow +357,csigart9w,eotson9w@blinklist.com,true,38,64804.79,68.4,Khaki +358,ybusson9x,mwilletts9x@unesco.org,false,52,56660.74,212.4,Mauv +359,hgemmill9y,mvinas9y@telegraph.co.uk,true,19,28393.0,206.3,Khaki +360,rduckitt9z,avaggs9z@youku.com,false,35,66979.19,117.3,Green +361,sjaquesta0,gcrunkhorna0@a8.net,true,60,6175.05,156.3,Pink +362,scuniffea1,tavona1@jugem.jp,true,67,4997.31,224.4,Mauv +363,ekingscotea2,kmelbournea2@sakura.ne.jp,false,15,59573.94,56.8,Violet +364,amolanda3,rkrolika3@elpais.com,true,49,40952.75,203.3,Puce +365,hbeasanta4,kthrippa4@huffingtonpost.com,true,38,3459.39,222.8,Purple +366,jtheurera5,sfeviera5@naver.com,false,74,59931.1,9.6,Violet +367,trickera6,mfinniea6@elpais.com,false,38,87301.7,122.7,Turquoise +368,dleana7,loflahertya7@sakura.ne.jp,false,81,65030.51,30.6,Orange +369,bduraka8,aasletta8@artisteer.com,true,26,60213.51,232.0,Khaki +370,mblodga9,fescota9@geocities.jp,true,29,84100.4,197.9,Pink +371,rgadieaa,mperrataa@reddit.com,true,56,-2741.1,101.6,Indigo +372,hoakenfallab,divanchovab@wisc.edu,true,68,5326.02,50.9,Turquoise +373,vbuggeac,ehaggerwoodac@booking.com,true,44,86193.16,89.0,Maroon +374,rbrumhamad,bfrearsad@technorati.com,true,73,78288.5,186.9,Mauv +375,dsauntae,asiddonsae@wix.com,true,23,78866.35,160.6,Fuscia +376,msalkildaf,gerridgeaf@shareasale.com,false,47,67684.6,143.2,Turquoise +377,cdelaneyag,tmassonag@indiegogo.com,true,96,22564.78,66.7,Red +378,mondracekah,drubanenkoah@flickr.com,false,82,88382.87,5.7,Mauv +379,gmumai,tdobellai@usnews.com,true,13,22103.78,41.2,Purple +380,btondeuraj,rhollowaj@seesaa.net,false,57,47580.03,89.6,Orange +381,gdreinanak,bshevlaneak@oakley.com,true,29,49443.9,61.6,Yellow +382,npickardal,sloughlinal@imgur.com,false,14,65970.84,200.2,Violet +383,rjerkeam,hwithnallam@ed.gov,false,85,37582.52,38.3,Maroon +384,mhousdenan,ltunnan@princeton.edu,true,33,-1364.5,87.6,Maroon +385,afiggao,aelkinao@house.gov,true,13,79874.07,30.1,Orange +386,gjolliffap,flavenap@umich.edu,true,73,27510.64,87.9,Pink +387,kbroscombeaq,aslemmondsaq@google.cn,false,20,78309.9,12.1,Green +388,clinceyar,pfraginoar@gravatar.com,false,44,97077.5,60.7,Crimson +389,klorrieas,agrellieras@si.edu,true,74,66507.05,125.6,Violet +390,cblaslat,rmcilwrickat@umn.edu,true,60,62973.12,71.3,Violet +391,jjellemanau,kcadleau@yandex.ru,true,43,44572.72,190.2,Puce +392,tstartav,jwardlawav@jigsy.com,true,86,31839.35,193.1,Goldenrod +393,emcpakeaw,dobreenaw@businesswire.com,false,86,59105.01,96.5,Red +394,mgeogheganax,csurmeyersax@diigo.com,true,94,72436.34,16.3,Teal +395,fmitforday,creignarday@discuz.net,true,76,69619.25,210.9,Blue +396,tcrewsaz,afrowaz@privacy.gov.au,false,44,898.7,133.2,Goldenrod +397,rbicknellb0,sboothmanb0@japanpost.jp,true,78,64146.3,168.4,Goldenrod +398,cturesb1,bstrachanb1@google.pl,true,47,42103.48,19.6,Green +399,stynemouthb2,dmushettb2@princeton.edu,false,38,15531.69,170.7,Violet +400,kbrazerb3,rbethob3@marketwatch.com,false,84,80793.42,100.5,Yellow +401,nmouncherb4,kjuryb4@eepurl.com,true,67,97164.11,111.0,Turquoise +402,gcallanderb5,mmacdonoghb5@diigo.com,true,40,8068.21,37.5,Yellow +403,btorrecillab6,abrislanb6@freewebs.com,false,94,46709.31,18.1,Blue +404,mstancerb7,bgoodhayb7@ca.gov,true,45,24584.88,72.2,Blue +405,laytonb8,tsullerb8@taobao.com,true,22,84505.29,62.9,Violet +406,bmcasgillb9,aedgerb9@wired.com,false,68,90372.28,34.8,Pink +407,rvasicba,edanaherba@imdb.com,true,45,97016.41,115.2,Yellow +408,eaxtonnebb,agrigoriscubb@lycos.com,true,80,45154.03,9.3,Teal +409,mstonehambc,jstuttmanbc@clickbank.net,false,14,61716.73,52.0,Green +410,cginnalybd,bflewettbd@wp.com,false,13,44319.39,91.4,Violet +411,ncooleybe,lbisterfeldbe@sbwire.com,false,79,24488.12,18.2,Yellow +412,lganniclifftbf,isaysebf@bloomberg.com,true,69,50918.29,159.5,Crimson +413,nmorlandbg,lhirchebg@biblegateway.com,true,96,71208.1,11.2,Khaki +414,lwiggingtonbh,ksewartbh@domainmarket.com,false,72,9810.99,41.2,Fuscia +415,gcritchlowbi,aboldockbi@scribd.com,false,74,79339.5,49.7,Turquoise +416,ktwiggebj,hhaackbj@friendfeed.com,true,54,67528.23,105.8,Puce +417,bjolleybk,cmiltonbk@cbc.ca,false,14,84296.1,114.4,Yellow +418,pfaulkesbl,sbyassbl@deviantart.com,true,14,48540.93,15.5,Yellow +419,bthomingabm,rfrancillobm@diigo.com,true,31,69763.88,68.8,Crimson +420,sfidelebn,tgelsthorpebn@berkeley.edu,false,92,74216.48,99.8,Fuscia +421,ccopnarbo,jgregolibo@csmonitor.com,true,85,5272.03,195.7,Blue +422,zsimoensbp,smarchantbp@umn.edu,false,61,97579.11,56.9,Crimson +423,gbrackpoolbq,fdorrobq@desdev.cn,false,54,55674.94,226.2,Violet +424,abaishbr,amcquillenbr@foxnews.com,true,51,87600.11,11.6,Aquamarine +425,mrunhambs,eecclesharebs@sitemeter.com,false,75,14877.28,102.2,Crimson +426,gligginsbt,cdrysdallbt@archive.org,false,90,78363.69,33.0,Fuscia +427,cgoligherbu,sjedraszekbu@shutterfly.com,false,22,10308.25,40.0,Puce +428,rpincottbv,tpietzkebv@statcounter.com,false,41,51972.96,192.6,Blue +429,dbehnbw,krunciemanbw@mozilla.com,true,14,58830.53,156.3,Mauv +430,cschnitterbx,fbirchwoodbx@moonfruit.com,false,91,31120.89,188.0,Fuscia +431,arubertby,cbaglinby@unicef.org,true,96,82876.82,163.7,Pink +432,amaccomebz,wphilippardbz@etsy.com,false,46,50503.63,40.2,Orange +433,klowmassc0,rlauxmannc0@thetimes.co.uk,true,56,31769.36,33.6,Fuscia +434,evennersc1,fharnesc1@prnewswire.com,false,46,4024.85,28.4,Khaki +435,clammersc2,sdengelc2@google.com.br,false,67,70758.56,106.9,Goldenrod +436,fguytonc3,skleinschmidtc3@wiley.com,false,19,82524.39,139.6,Teal +437,clysaghtc4,nkestellc4@artisteer.com,true,63,48171.04,111.5,Crimson +438,gheretyc5,cpriskc5@rediff.com,true,77,41566.35,70.5,Crimson +439,rmacfaydenc6,jjanoschekc6@seattletimes.com,false,32,72329.47,0.8,Mauv +440,gcaselickc7,dantrimc7@usnews.com,true,36,55473.45,49.8,Khaki +441,lhalfacreec8,adownc8@biblegateway.com,false,55,91352.59,87.0,Purple +442,hcampionec9,cjanjusevicc9@i2i.jp,false,94,5622.52,194.4,Teal +443,acreadyca,prubkeca@biglobe.ne.jp,false,24,98545.91,237.6,Maroon +444,gbluescb,ahuntingdoncb@sciencedirect.com,true,43,48213.68,233.5,Indigo +445,eworleycc,fthainecc@unesco.org,true,27,91966.14,2.1,Yellow +446,mdellentycd,fgirodincd@zdnet.com,true,27,67506.28,119.6,Maroon +447,cmeadmorece,hcoleiroce@spiegel.de,true,64,62494.96,126.9,Blue +448,breincf,mpaulatcf@shop-pro.jp,true,13,3586.78,5.1,Maroon +449,smcgrillcg,mmackinnoncg@google.ru,false,27,84078.07,91.9,Aquamarine +450,etinghillch,vgablerch@opensource.org,true,90,66555.31,45.9,Teal +451,vsnarci,mvaseyci@xing.com,false,55,44496.92,12.3,Teal +452,alesekcj,nrosenthalcj@ox.ac.uk,true,14,84908.63,49.4,Puce +453,dvangiffenck,preekieck@tamu.edu,false,86,-4656.46,133.3,Aquamarine +454,dblowickcl,dhandkecl@cmu.edu,false,53,83013.4,161.3,Violet +455,kavocm,rplottcm@prnewswire.com,true,91,57038.88,233.5,Purple +456,xteaguecn,amcloughlincn@mit.edu,true,41,24449.76,102.1,Goldenrod +457,peldridgeco,jkermonco@examiner.com,false,83,24252.27,43.4,Blue +458,lreamancp,ogoakscp@indiegogo.com,false,85,13434.49,124.0,Mauv +459,lgasquoinecq,aleececq@skype.com,true,22,75570.45,30.3,Red +460,jpepperillcr,ddunhillcr@kickstarter.com,true,92,94767.55,211.6,Purple +461,pnabbscs,balkercs@cmu.edu,true,95,27097.71,60.5,Orange +462,ggraceyct,senglishct@newyorker.com,true,99,54343.78,12.1,Khaki +463,kdastcu,hfraynecu@free.fr,false,86,14267.99,56.9,Turquoise +464,kkrolmancv,kmarcamcv@drupal.org,false,30,20297.42,89.8,Aquamarine +465,tfustcw,sburchcw@umn.edu,false,59,84527.6,55.0,Pink +466,dmcclarycx,cjoblincx@liveinternet.ru,false,63,59220.3,185.1,Mauv +467,pgaincy,mdivisekcy@prweb.com,false,85,71012.99,35.4,Goldenrod +468,sdamiatacz,mcresercz@sina.com.cn,false,54,99816.64,15.3,Maroon +469,kbeininckd0,ipaolillod0@pcworld.com,false,45,-4173.66,191.1,Aquamarine +470,edavenalld1,kshoried1@newsvine.com,true,82,1277.98,157.5,Maroon +471,aduchenned2,mhillhoused2@163.com,true,36,26023.31,133.0,Indigo +472,jverillod3,dsawerd3@youtube.com,true,64,3535.4,191.8,Teal +473,hpolkinghorned4,btuffeyd4@elegantthemes.com,false,86,36677.58,32.0,Fuscia +474,bbarfieldd5,mbellissd5@mashable.com,false,15,58698.35,83.9,Turquoise +475,gstaveleyd6,nmcgrirld6@sourceforge.net,false,28,80702.44,179.9,Mauv +476,ccozbyd7,kmckeaneyd7@geocities.jp,true,39,82585.43,211.4,Maroon +477,fswalteridged8,wcoitd8@chron.com,false,99,7611.06,172.1,Yellow +478,ahigbind9,gyarrelld9@weebly.com,false,39,71771.24,90.7,Yellow +479,asesonda,kcarsbergda@meetup.com,true,63,26044.56,191.6,Green +480,rlavistedb,jpearceydb@ycombinator.com,true,13,50080.28,31.0,Puce +481,gpigottdc,ekirkwooddc@japanpost.jp,false,87,69816.31,88.4,Fuscia +482,cbacklerdd,ochilcottdd@bloglines.com,true,53,35472.31,6.8,Fuscia +483,kbalasde,featde@senate.gov,false,92,53611.51,103.3,Violet +484,ffelipdf,bkiesseldf@phoca.cz,false,93,7678.89,12.5,Indigo +485,qgimenezdg,hburghalldg@mashable.com,false,30,3030.1,39.9,Maroon +486,rjobeydh,dvanesdh@foxnews.com,true,82,29836.2,221.7,Crimson +487,khedlingdi,kparlattdi@state.tx.us,true,52,85312.44,163.2,Orange +488,vpietrowskidj,djozefdj@ning.com,true,26,25924.48,218.6,Pink +489,sdarkedk,mhebbesdk@friendfeed.com,false,16,81693.36,90.3,Teal +490,gbanthorpedl,hbuncedl@house.gov,true,61,15400.9,46.5,Crimson +491,rhairesnapedm,kjentondm@live.com,false,37,26461.52,147.4,Orange +492,hmccaguedn,ppelmandn@chicagotribune.com,false,86,67490.87,44.7,Goldenrod +493,csandalldo,jruddindo@sogou.com,true,58,91239.02,158.2,Blue +494,dberrdp,mtripetdp@bravesites.com,false,26,61265.42,203.8,Blue +495,jbrockmandq,mstledgerdq@mozilla.com,false,96,63215.72,45.4,Goldenrod +496,dsommerlanddr,dellicedr@theguardian.com,true,76,20800.83,122.6,Mauv +497,abusbyds,wwaterstoneds@dell.com,false,93,68359.11,70.5,Teal +498,lscothorndt,dcluleedt@yale.edu,false,19,79359.34,175.2,Red +499,mlewintondu,tnutleydu@photobucket.com,false,32,-2300.81,73.0,Green +500,lelhamdv,wsobtkadv@squidoo.com,false,89,37893.73,181.8,Crimson +501,kscimonidw,lkyndreddw@behance.net,true,67,85584.98,230.6,Green +502,tavrasindx,paylesburydx@nydailynews.com,false,98,41192.29,74.8,Indigo +503,smyhilldy,kbasondy@bbc.co.uk,true,45,38736.67,143.0,Green +504,dabrahmerdz,cdagnandz@de.vu,true,62,9378.66,20.0,Red +505,kkeemere0,gbeamisse0@nytimes.com,false,98,4162.71,18.7,Fuscia +506,riacovuccie1,calejandroe1@shareasale.com,false,91,91952.04,52.6,Pink +507,sruppertze2,icolericke2@mit.edu,true,86,34428.37,62.8,Purple +508,xbeaushawe3,ssirettee3@woothemes.com,true,66,11770.33,235.1,Green +509,tpicoppe4,pderycote4@gnu.org,false,23,2189.34,103.3,Pink +510,learingeye5,mmckillope5@ezinearticles.com,false,82,88726.34,143.4,Blue +511,tletsone6,jdottrelle6@icio.us,true,37,49164.09,146.9,Red +512,dcrannye7,xcarlane7@abc.net.au,false,71,93356.56,117.1,Aquamarine +513,lmaxwalee8,pwhitakere8@tripadvisor.com,false,76,1833.27,58.5,Orange +514,csalvadore9,rbalfe9@quantcast.com,false,42,52992.26,75.4,Orange +515,llethburyea,ysoreyea@utexas.edu,false,50,11336.56,172.7,Green +516,lcalverteb,ahawkeridgeeb@nymag.com,true,83,98451.86,91.3,Green +517,atimsonec,ktozerec@examiner.com,true,88,8666.01,0.7,Pink +518,tloffilled,gkenforded@unblog.fr,false,33,32990.38,52.0,Blue +519,smelchioree,atunnacliffeee@topsy.com,true,94,91006.84,181.3,Green +520,hbeaversef,swildsmithef@123-reg.co.uk,true,13,66133.92,26.0,Mauv +521,nmatteinieg,sbatecokeg@economist.com,false,98,77972.77,37.1,Turquoise +522,fwakeeh,dadanezeh@dot.gov,false,15,64248.62,9.4,Yellow +523,creggianiei,bstrugnellei@ocn.ne.jp,false,78,34758.36,126.8,Red +524,gkiggelej,nfarloweej@disqus.com,true,96,42426.05,132.9,Violet +525,jcasswellek,agerwoodek@diigo.com,false,40,19057.94,82.9,Turquoise +526,ahallingel,hamerighiel@wunderground.com,true,13,54084.8,22.7,Mauv +527,mlagaduem,amacknockiterem@elpais.com,false,68,863.69,179.8,Aquamarine +528,bboddingtonen,aizakofen@huffingtonpost.com,false,73,68249.86,64.3,Mauv +529,ceyameo,wtibbettseo@aol.com,false,88,13860.6,62.0,Crimson +530,bprosekep,hibbisonep@hao123.com,false,50,10094.89,132.8,Crimson +531,gmckeveneyeq,bbyreseq@cyberchimps.com,false,94,4845.16,237.6,Purple +532,ctwidaleer,dkurtener@so-net.ne.jp,true,57,93976.79,10.6,Red +533,dnotoes,blembrickes@huffingtonpost.com,false,79,14319.39,217.5,Aquamarine +534,jsoaperet,hredleyet@canalblog.com,false,24,19354.76,169.8,Khaki +535,kgleweu,bdenyukineu@51.la,false,22,35109.3,233.3,Mauv +536,kyareev,ldearanev@archive.org,false,76,43341.02,140.4,Pink +537,nmannersew,rlongworthyew@sourceforge.net,true,61,69056.45,150.3,Fuscia +538,bpendrichex,jdamarellex@godaddy.com,true,59,18328.73,77.8,Purple +539,llinfordey,jfenderey@slate.com,true,80,27543.27,173.0,Crimson +540,bcorderyez,wgrundez@hugedomains.com,true,89,28790.81,4.5,Crimson +541,bdabneyf0,adartnallf0@simplemachines.org,false,44,2838.02,188.7,Red +542,ahuguetf1,marstingallf1@spiegel.de,true,79,2149.74,37.0,Teal +543,reskrietf2,jmurleyf2@nyu.edu,false,61,17501.06,52.7,Mauv +544,hfaleyf3,btwinnf3@istockphoto.com,true,32,20482.91,77.1,Mauv +545,kconveryf4,btolmanf4@biglobe.ne.jp,true,22,36218.53,103.2,Turquoise +546,ggoffef5,jstpierref5@paginegialle.it,false,70,17740.9,171.1,Green +547,dmedwayf6,mpaddlef6@hubpages.com,true,42,69034.54,225.7,Orange +548,dstandingf7,wfarlowef7@ebay.com,false,74,98539.49,12.1,Turquoise +549,abagleyf8,scasseyf8@bravesites.com,true,90,68274.04,33.1,Mauv +550,bmenlowf9,cdelgardillof9@wikipedia.org,false,85,67469.33,128.1,Maroon +551,ssambrookfa,sgeckefa@phpbb.com,false,37,56617.84,132.8,Orange +552,hwalworthfb,kkleintfb@fastcompany.com,true,68,37200.79,2.9,Violet +553,ghamperfc,lmaccathayfc@shop-pro.jp,false,72,58659.9,67.7,Puce +554,pkoresfd,bwippermannfd@chronoengine.com,true,35,55984.59,216.8,Turquoise +555,ldyetfe,aclemofe@example.com,true,25,12718.57,227.6,Aquamarine +556,ebogueff,kbenfieldff@blogs.com,true,20,56766.15,235.9,Pink +557,cmaynefg,ssomertonfg@va.gov,false,27,3308.83,131.1,Fuscia +558,ftreagusfh,mgrishukovfh@furl.net,true,75,-4161.96,113.6,Violet +559,wlacostefi,svolantefi@sourceforge.net,true,70,33537.64,142.5,Red +560,jogeneayfj,jgoodafj@squidoo.com,false,85,33917.02,35.1,Fuscia +561,npashenfk,meveringhamfk@zimbio.com,true,65,69465.96,76.6,Indigo +562,fshawlfl,bgarveyfl@vinaora.com,false,67,95207.61,153.8,Crimson +563,rsultanfm,atulleyfm@github.io,true,57,58044.47,14.4,Yellow +564,slukeschfn,iscogganfn@who.int,false,22,73806.5,156.1,Red +565,cpotteridgefo,narentsfo@infoseek.co.jp,false,77,63771.41,236.2,Mauv +566,vjanotafp,runwinfp@ehow.com,true,81,61250.82,198.1,Orange +567,vbrodeaufq,acouroyfq@un.org,true,63,57957.38,186.3,Goldenrod +568,dmaciejafr,jbandierafr@printfriendly.com,false,57,30884.44,101.0,Pink +569,togrowganefs,wrallinshawfs@hubpages.com,true,19,49263.3,17.1,Yellow +570,acocklandft,mmakeyft@va.gov,false,94,9313.52,195.9,Turquoise +571,tgeraldfu,rcuttellfu@bigcartel.com,true,50,87898.58,158.2,Indigo +572,fjosupeitfv,jpelerinfv@tripod.com,false,41,27115.87,126.8,Green +573,bbuxceyfw,ailesfw@topsy.com,true,63,41515.85,117.1,Puce +574,msinkinsfx,ttrusslerfx@seattletimes.com,true,65,72000.4,191.5,Purple +575,adraudefy,ytusonfy@zdnet.com,true,60,88127.41,230.1,Blue +576,amannevillefz,nfeedhamfz@macromedia.com,false,50,65633.21,95.7,Pink +577,mvogelg0,bchalcroftg0@qq.com,false,34,50920.56,18.5,Yellow +578,lorring1,gfaccinig1@photobucket.com,true,21,64347.95,132.6,Pink +579,arudyardg2,akomorowskig2@netvibes.com,false,13,4256.31,170.7,Red +580,mbrundleg3,ebartolettig3@twitter.com,false,63,60552.96,131.6,Teal +581,fglasscoog4,ndaughtryg4@dagondesign.com,true,67,11550.21,81.9,Violet +582,gpampling5,mjenseng5@thetimes.co.uk,true,89,86453.75,149.0,Pink +583,sespinag6,abrundallg6@symantec.com,false,94,65972.88,120.0,Green +584,emckerrong7,smillwallg7@mit.edu,true,88,45074.19,124.3,Violet +585,hpetzg8,ebrendelg8@gmpg.org,false,37,92724.01,133.8,Maroon +586,lsiddaleyg9,caxupg9@cisco.com,false,51,67792.0,108.5,Mauv +587,fministerga,ccroosega@quantcast.com,true,84,65051.04,133.2,Teal +588,creekiegb,ccarpmilegb@blogtalkradio.com,true,51,31816.56,54.8,Indigo +589,rleahygc,ejosselgc@parallels.com,false,51,81587.34,66.5,Maroon +590,mantczakgd,drameletgd@patch.com,false,41,-3646.83,81.6,Purple +591,sdorricottge,ceveringhamge@yandex.ru,true,53,41317.45,143.2,Mauv +592,wvedeneevgf,cevendengf@engadget.com,false,49,49402.97,232.4,Khaki +593,fgorsegg,cplattgg@blogspot.com,true,35,47432.17,20.0,Orange +594,alapidusgh,awridegh@indiatimes.com,false,20,78788.0,179.6,Pink +595,tcosgrovegi,gnorthcotegi@cloudflare.com,false,31,28108.13,212.9,Red +596,gelacoategj,adebiasigj@washingtonpost.com,false,58,62853.69,66.1,Indigo +597,gmarplesgk,mwyldgk@yahoo.co.jp,true,54,49065.95,158.6,Puce +598,dbussellgl,imatysgl@google.de,true,90,7724.75,8.5,Mauv +599,dbuessengm,bfolkardgm@surveymonkey.com,false,49,48455.57,233.9,Puce +600,gchavegn,vmilesopgn@jimdo.com,true,99,13469.13,184.1,Yellow +601,jinglesfieldgo,dcolcombgo@linkedin.com,true,45,1004.92,6.2,Teal +602,mmumfordgp,kmountergp@archive.org,false,28,-4416.49,121.5,Indigo +603,bmoricangq,wmcgeochgq@wordpress.org,false,17,12077.01,14.4,Violet +604,bfinlaisongr,acliffordgr@unicef.org,false,89,89849.04,107.0,Maroon +605,mpettettgs,tvadergs@issuu.com,true,96,42486.98,148.5,Green +606,ldoggettgt,ceallisgt@illinois.edu,false,61,43463.64,99.6,Green +607,ilarradgu,bstaplesgu@unc.edu,true,60,49913.31,157.4,Teal +608,hlivesaygv,pdowleygv@studiopress.com,true,13,7664.88,93.2,Goldenrod +609,svoasgw,rpetraccigw@shareasale.com,false,29,40766.57,75.7,Mauv +610,ncouthgx,bglendinninggx@hexun.com,false,99,79519.59,21.9,Goldenrod +611,ogallandersgy,ldruettgy@chron.com,false,96,13354.06,131.9,Goldenrod +612,vescreetgz,slowdyanegz@yahoo.com,false,88,40116.08,239.8,Goldenrod +613,rforesighth0,jdanneh0@devhub.com,false,97,79150.18,78.3,Orange +614,gbradburneh1,wsebireh1@google.pl,false,66,37487.31,193.7,Aquamarine +615,tbessh2,jfoulkesh2@nasa.gov,true,68,65805.53,136.1,Aquamarine +616,hbraveryh3,btalmanh3@apple.com,true,94,48039.97,65.4,Crimson +617,bharomeh4,jbarchrameevh4@dedecms.com,true,67,5737.12,132.0,Crimson +618,rbarrassh5,aholyardh5@youtu.be,false,91,63675.2,87.3,Goldenrod +619,rpurkinsh6,acleevesh6@4shared.com,true,23,10570.63,30.3,Aquamarine +620,hshapirah7,mlengthornh7@bluehost.com,true,15,5091.39,126.0,Khaki +621,mgallemoreh8,mdaveridgeh8@bigcartel.com,false,51,39541.9,53.5,Red +622,jtremeerh9,rmoulsdaleh9@microsoft.com,false,41,3757.36,209.6,Goldenrod +623,mmccarlha,iwehnerha@posterous.com,false,49,90839.15,168.9,Crimson +624,sbarkshb,dduxbarryhb@nature.com,false,88,89436.71,47.7,Teal +625,scrusehc,ddykashc@wikimedia.org,false,94,67142.77,12.4,Khaki +626,apharrohd,scantihd@drupal.org,true,18,39180.88,108.4,Mauv +627,hdurwardhe,dhammerbergerhe@seattletimes.com,false,86,71736.66,126.7,Pink +628,erousthf,lelixhf@who.int,false,56,58459.94,34.9,Fuscia +629,mdavidofhg,rspellarhg@usgs.gov,false,38,61614.1,89.9,Turquoise +630,ttuxellhh,mspatarihh@feedburner.com,false,78,51309.61,131.5,Violet +631,dbalehi,rroomehi@cpanel.net,true,86,62390.84,199.1,Aquamarine +632,fbugdallhj,gmartyhj@about.me,false,33,2007.57,53.7,Yellow +633,enatthk,nstebbinshk@hibu.com,true,37,50801.2,60.0,Crimson +634,sdrinkallhl,cmosebyhl@youtube.com,false,74,51150.22,144.6,Maroon +635,dsmallshawhm,randrieuhm@gravatar.com,false,56,85903.73,196.3,Yellow +636,nwinnetthn,modwyerhn@exblog.jp,true,63,32291.14,122.5,Turquoise +637,shelstromho,cduckhamho@google.co.uk,false,33,97661.21,85.5,Blue +638,ckeelyhp,kpinnockhp@statcounter.com,false,81,15296.61,128.7,Teal +639,dgennerichhq,tpietrowiczhq@nasa.gov,true,85,77357.36,50.8,Orange +640,jsimionihr,rpetriehr@examiner.com,true,35,19017.44,128.5,Aquamarine +641,kespinahs,icosgriffhs@webnode.com,true,14,6159.76,207.7,Turquoise +642,pdurradht,agoodwellht@canalblog.com,false,62,59773.15,123.4,Indigo +643,sjapphu,hcrichhu@godaddy.com,false,86,11573.15,44.1,Indigo +644,narringtonhv,jcoslitthv@stanford.edu,false,37,30502.54,96.5,Purple +645,kpoolmanhw,igamilhw@ning.com,false,56,56780.97,162.7,Crimson +646,skobierieckihx,jbyhx@usatoday.com,false,85,41612.15,79.1,Pink +647,icouthhy,csarllhy@go.com,true,54,31181.74,89.7,Maroon +648,jjoselandhz,dboarderhz@house.gov,false,36,80954.36,86.9,Yellow +649,irichmondi0,fklimushevi0@msu.edu,false,42,37682.61,234.9,Maroon +650,ssheffieldi1,lschlagtmansi1@dyndns.org,true,52,-3665.29,166.6,Orange +651,fmulcahyi2,ehavardi2@ihg.com,true,49,41254.87,44.2,Crimson +652,djouannyi3,sbrommagei3@google.cn,false,60,71442.22,132.4,Khaki +653,sroughi4,dorrilli4@virginia.edu,true,55,97207.32,18.4,Fuscia +654,emalamorei5,hharnelli5@harvard.edu,false,19,1795.59,43.7,Turquoise +655,zbitteni6,gbeedomi6@flickr.com,false,59,-364.24,157.1,Orange +656,idittsoni7,gdebruini7@reuters.com,false,43,41980.16,179.8,Maroon +657,mshirili8,gdamanti8@hud.gov,false,35,84967.11,28.4,Aquamarine +658,jsawnwyi9,hskipperi9@columbia.edu,false,89,46970.78,18.4,Goldenrod +659,hsulteria,odunnania@privacy.gov.au,false,86,14882.28,73.6,Goldenrod +660,mfoatib,gclouterib@archive.org,false,45,582.32,171.5,Blue +661,pogleasaneic,vfavellic@blogtalkradio.com,true,86,39702.75,14.2,Blue +662,vjuraid,oauselloid@csmonitor.com,false,92,16612.18,106.2,Mauv +663,ntoweie,jchrstineie@tinypic.com,false,67,36379.08,68.2,Khaki +664,mgossingtonif,sbeiningif@last.fm,true,52,79030.26,14.3,Fuscia +665,cdhoogheig,aflattmanig@goo.ne.jp,true,58,77394.03,97.2,Fuscia +666,ccochraneih,cfeaveryearih@dell.com,true,83,70191.19,6.9,Violet +667,eabrahamowitczii,odigiacomettinoii@google.pl,false,33,4397.91,198.4,Orange +668,cmaginnij,waberkirderij@plala.or.jp,false,24,1057.09,88.1,Purple +669,jpetranik,cdawltreyik@census.gov,true,44,63915.45,8.6,Pink +670,eswadlingil,wwhytemanil@europa.eu,true,56,71911.47,190.2,Maroon +671,eshemeltim,kwanderschekim@networkadvertising.org,false,45,44066.59,212.0,Red +672,fmolandin,chassallin@dailymail.co.uk,true,22,14561.19,238.5,Blue +673,nwillicottio,despinolaio@edublogs.org,false,86,32655.61,50.6,Goldenrod +674,lcarlisleip,pyesichevip@wisc.edu,false,66,75660.77,30.3,Pink +675,lalessandriniiq,cransoniq@youtu.be,false,74,47901.36,39.4,Fuscia +676,rholtawayir,nnetleyir@howstuffworks.com,true,77,32315.92,144.8,Blue +677,aprandiis,esibbsonis@upenn.edu,false,87,99274.22,102.8,Puce +678,tleetit,ftawtonit@flavors.me,true,20,77658.35,185.3,Orange +679,nstollmeieriu,bdearaniu@yahoo.com,false,19,63735.29,3.8,Mauv +680,mmcfieiv,hdykaiv@cbc.ca,true,84,85061.62,173.9,Fuscia +681,corwiniw,fbristeriw@biglobe.ne.jp,false,45,58073.98,218.6,Blue +682,vaddionizioix,abaudouxix@wp.com,true,67,86234.95,207.4,Yellow +683,hurlichiy,jkenneaiy@angelfire.com,false,85,51750.87,136.6,Purple +684,kkidstoniz,rmanlowiz@sitemeter.com,false,19,9916.06,28.2,Yellow +685,bginityj0,rhillyattj0@reuters.com,true,15,41133.12,216.9,Pink +686,jlauxj1,bpohlsj1@about.me,false,48,70224.36,172.8,Mauv +687,ttillardj2,shandkej2@netscape.com,false,81,22207.06,127.5,Purple +688,dvogellerj3,ewohlerj3@latimes.com,true,42,2866.52,52.5,Yellow +689,cbullierj4,mbrafieldj4@123-reg.co.uk,true,53,97194.09,61.9,Fuscia +690,nmalafej5,mkristoffersenj5@booking.com,true,42,93893.88,14.1,Red +691,nmeritonj6,myuj6@geocities.com,false,20,73740.27,166.9,Yellow +692,cfilkinj7,jcaplanj7@slideshare.net,true,90,27306.55,181.3,Goldenrod +693,thullyj8,wdanilowiczj8@zdnet.com,true,83,60098.64,117.3,Puce +694,mharnettyj9,zgooddiej9@nasa.gov,true,22,61067.9,237.6,Aquamarine +695,csielyja,kezzleja@cnn.com,true,43,77517.54,89.9,Orange +696,lkellochjb,lpatshulljb@google.com.br,false,21,37309.43,49.7,Violet +697,crussonjc,jruddomjc@e-recht24.de,true,96,27214.97,95.7,Turquoise +698,lbernardeaujd,mbrestonjd@studiopress.com,true,62,62163.5,171.4,Violet +699,lhousecroftje,kkeepeje@vinaora.com,true,73,64899.23,100.1,Turquoise +700,mflemmingjf,jferaghjf@bluehost.com,true,92,86381.37,151.1,Crimson +701,cbeaufoyjg,ageldertjg@washington.edu,false,86,10538.88,69.8,Blue +702,bstuckjh,hridewoodjh@ed.gov,false,13,41191.55,212.3,Pink +703,jmcgoochji,ecrebbinji@ibm.com,true,47,78938.47,210.5,Teal +704,pkernleyjj,clysterjj@vistaprint.com,false,45,58732.87,140.8,Blue +705,jweedonjk,awalkerjk@foxnews.com,false,26,78719.54,148.6,Maroon +706,mfluinjl,lbrockingtonjl@sun.com,true,92,50186.14,116.6,Teal +707,atwelljm,hbroxisjm@barnesandnoble.com,false,86,41574.6,147.7,Goldenrod +708,lschenkeljn,chadawayjn@mediafire.com,true,82,16726.39,164.3,Puce +709,swarbysjo,fgratlandjo@msu.edu,true,54,88648.86,44.8,Crimson +710,lsnodinjp,gbengtssonjp@networksolutions.com,true,84,71325.02,66.1,Aquamarine +711,rbiddlestonejq,bdosdelljq@webmd.com,false,61,54819.81,124.6,Crimson +712,goconcannonjr,ceddyjr@sohu.com,false,79,32417.97,20.0,Mauv +713,kcorderojs,achingedehalsjs@odnoklassniki.ru,true,26,76691.29,138.6,Fuscia +714,kmarkwelljt,lcheesleyjt@nymag.com,false,99,21081.63,67.2,Teal +715,jrimingtonju,bswalteridgeju@spotify.com,true,23,63174.74,156.8,Red +716,kyeliashevjv,stalmadgejv@multiply.com,false,83,-3340.18,213.5,Indigo +717,rboughtonjw,dhousdenjw@dedecms.com,true,64,40979.68,104.8,Fuscia +718,wbegwelljx,uposthilljx@xing.com,true,86,89664.13,193.8,Teal +719,ahalltjy,earonowiczjy@ameblo.jp,false,44,31149.9,158.3,Indigo +720,cmullisjz,igasnelljz@bizjournals.com,true,42,33540.98,227.0,Fuscia +721,beastmank0,mtoothillk0@issuu.com,false,75,74087.41,78.9,Purple +722,srangeleyk1,bmatthewmank1@geocities.com,true,60,45820.23,74.8,Indigo +723,lowensk2,mdegnenk2@apache.org,true,54,98564.02,125.0,Khaki +724,gmockfordk3,ydaviotk3@domainmarket.com,false,55,26509.13,141.5,Aquamarine +725,hcatherinek4,vbickfordk4@artisteer.com,true,83,59087.25,2.1,Goldenrod +726,eparcallk5,gstoresk5@comcast.net,false,68,53765.82,117.4,Maroon +727,kthringk6,sbainbridgek6@adobe.com,true,93,8724.04,189.9,Teal +728,jmeggisonk7,mfebvrek7@webs.com,true,33,56904.29,201.7,Yellow +729,tdeeveyk8,fweirk8@merriam-webster.com,false,58,30724.23,44.8,Crimson +730,amorgenk9,imcallasterk9@umn.edu,false,58,65230.09,193.4,Blue +731,sbengtssonka,mkhristoforovka@reverbnation.com,true,17,36651.28,178.2,Green +732,glilleykb,cdifranceschikb@cloudflare.com,true,53,17126.99,170.8,Yellow +733,dwellbelovedkc,ahatchellkc@dailymotion.com,false,83,70630.56,71.0,Crimson +734,handraudkd,mstiantkd@mashable.com,true,67,5945.12,119.6,Red +735,gdouseke,grallingke@netscape.com,true,36,37991.89,162.3,Indigo +736,sgerreykf,gdriffekf@blinklist.com,true,19,73181.22,173.5,Indigo +737,zlitzmannkg,zatcherleykg@qq.com,true,54,4223.91,5.0,Violet +738,mferrarokh,bbennekh@earthlink.net,true,40,23135.73,207.3,Goldenrod +739,abarclayki,kglantzki@house.gov,true,67,99852.81,59.3,Mauv +740,dtwinningkj,nfreiburgerkj@irs.gov,true,72,92527.26,226.3,Mauv +741,svanderkruijskk,djoskovitchkk@fema.gov,true,63,65954.96,34.4,Crimson +742,sseinekl,blissandrinikl@blinklist.com,true,24,15488.9,48.7,Puce +743,mroddamkm,nwaddellkm@yale.edu,false,70,87047.79,65.2,Khaki +744,rboorkn,zveschambeskn@sogou.com,false,61,82878.07,75.2,Orange +745,kruddomko,sbandeyko@ow.ly,true,48,50560.12,165.6,Red +746,cgiberdkp,ksarsonkp@icio.us,false,22,67979.25,76.7,Blue +747,swhitehallkq,ojustunkq@prweb.com,true,94,16531.31,138.0,Goldenrod +748,ylaylandkr,cannakinkr@google.cn,false,52,4113.22,224.0,Green +749,rpietaschks,bmccuffieks@msn.com,false,26,91627.92,109.0,Turquoise +750,mmiddlemasskt,jscadingkt@yale.edu,true,43,50070.06,16.8,Mauv +751,ghuggardku,rosgarbyku@free.fr,false,97,84868.77,136.5,Green +752,fsealeafkv,egutridgekv@google.ca,false,78,80357.62,29.8,Teal +753,mstandidgekw,tclowtonkw@craigslist.org,true,60,52510.57,79.6,Khaki +754,fsavillekx,cskeatskx@cafepress.com,false,98,54177.86,55.4,Purple +755,fdreakinky,ycarloneky@jalbum.net,true,37,54960.14,7.0,Mauv +756,fcatterickkz,gkwietakkz@simplemachines.org,false,21,5613.0,66.4,Green +757,dvanhalenl0,ohayerl0@fotki.com,false,22,58864.42,13.5,Turquoise +758,bhubbuckel1,sdavidofskil1@archive.org,false,22,37263.07,216.5,Pink +759,jroparsl2,crodliffl2@npr.org,true,51,53206.93,212.7,Green +760,mfennellyl3,ebogacel3@i2i.jp,false,78,4464.91,5.7,Maroon +761,mglantonl4,ejestl4@reverbnation.com,true,26,1767.14,206.0,Indigo +762,qmontfortl5,aalbonel5@4shared.com,false,42,59088.77,140.3,Violet +763,lgyerl6,jmaccurleyl6@eventbrite.com,false,22,19183.33,125.9,Indigo +764,mhadgkissl7,lcansdalel7@wisc.edu,false,88,97933.34,171.6,Turquoise +765,cdodsl8,sdydel8@globo.com,true,48,8972.11,78.2,Orange +766,khadcroftl9,lfilchaginl9@tinypic.com,false,39,87113.59,122.8,Indigo +767,agiblinla,cjudgkinsla@dell.com,true,77,94061.18,141.5,Crimson +768,dhurllb,damylb@epa.gov,false,43,4687.76,38.7,Mauv +769,shanscombelc,cpidgeleylc@cmu.edu,false,66,-4321.45,194.5,Turquoise +770,aashdownld,mhuntingld@jugem.jp,true,23,61671.86,225.4,Aquamarine +771,rportinarile,mlerwillle@studiopress.com,true,63,63822.19,147.6,Turquoise +772,msaylorlf,lcurnocklf@who.int,true,86,60550.42,73.2,Green +773,jsparwaylg,kjacqueminotlg@canalblog.com,true,41,50514.33,90.0,Yellow +774,mlegerwoodlh,rporsonlh@toplist.cz,true,33,97239.82,159.6,Teal +775,mmecchili,nparrishli@amazon.de,false,68,91486.93,79.0,Mauv +776,dkilleenlj,thamblinglj@washington.edu,true,64,69715.69,22.5,Violet +777,omosconelk,jgreensideslk@constantcontact.com,true,44,5241.46,110.0,Teal +778,tbratcherll,tandrivelll@simplemachines.org,true,98,20618.77,142.9,Teal +779,twanelm,hhentzelm@ovh.net,false,27,22452.95,21.6,Aquamarine +780,ngegayln,abassomln@vimeo.com,false,63,70329.93,201.4,Khaki +781,sottosenlo,aerleylo@techcrunch.com,false,40,20516.77,162.3,Violet +782,kmeadleylp,sgiottoilp@i2i.jp,true,69,15457.58,83.8,Purple +783,jgatfieldlq,emohanlq@tinypic.com,false,24,69864.91,174.8,Turquoise +784,bgabbotlr,adenhamlr@whitehouse.gov,true,87,97852.75,108.5,Fuscia +785,chileyls,idurantels@kickstarter.com,false,63,26897.56,153.0,Violet +786,bfawkeslt,hlonghirstlt@nyu.edu,true,83,42854.45,7.2,Puce +787,abygrovelu,jlainlu@goo.ne.jp,false,93,1715.17,222.8,Yellow +788,cscranneylv,mswaytonlv@imageshack.us,false,37,39586.8,173.6,Goldenrod +789,bswetmanlw,mlittledykelw@shareasale.com,true,13,93620.79,25.6,Pink +790,rcastellx,mjenynslx@cbsnews.com,false,64,97502.03,151.7,Indigo +791,awhildely,zfidgely@si.edu,true,86,10900.19,96.0,Purple +792,pspinagelz,isommertonlz@umn.edu,false,89,34495.95,167.2,Aquamarine +793,daylmorem0,rasburym0@cmu.edu,true,85,51662.45,119.8,Puce +794,rrabsonm1,nvandenveldenm1@ucsd.edu,true,49,99877.75,231.4,Mauv +795,jcorneliusm2,louldcottm2@latimes.com,true,18,34783.14,72.5,Indigo +796,lmcsporonm3,ajentetm3@instagram.com,false,45,10515.56,142.7,Green +797,hchappelm4,jwinsparem4@reference.com,false,63,79383.73,41.9,Khaki +798,mroanm5,hlafayem5@illinois.edu,true,16,85865.88,185.5,Aquamarine +799,cscollardm6,etejerom6@msu.edu,true,77,75096.41,233.8,Green +800,vbelonem7,cmenurem7@nature.com,false,93,86735.4,57.5,Violet +801,rgiacobbom8,gsorsbym8@nifty.com,true,42,21567.63,118.9,Indigo +802,hcuffm9,mgrafhomm9@clickbank.net,true,59,47064.47,194.6,Red +803,sdeverilma,asibsonma@freewebs.com,true,99,70659.88,59.4,Mauv +804,nrowberrymb,cmercermb@yellowpages.com,true,20,40618.69,110.6,Indigo +805,jshavelmc,tcowinmc@wikispaces.com,true,91,53367.65,11.5,Maroon +806,bswalemd,tobraymd@foxnews.com,false,88,76575.62,23.5,Teal +807,tpolendineme,rrumgayme@bravesites.com,false,77,80167.6,222.7,Khaki +808,tredfernmf,jtrustriemf@phoca.cz,false,29,94929.88,158.4,Turquoise +809,oegintonmg,abolinomg@admin.ch,false,95,38729.62,210.9,Green +810,mcondliffemh,celsburymh@github.io,true,27,12947.55,90.5,Orange +811,emenarymi,rcurnokkmi@nps.gov,true,99,43420.32,191.0,Maroon +812,cbamfieldmj,ccrawforthmj@exblog.jp,false,64,-3111.18,199.8,Green +813,ndudlestonmk,creveleymk@ehow.com,true,88,91535.3,69.3,Red +814,nmcenteeml,amitkcovml@dmoz.org,false,58,22965.14,120.4,Maroon +815,ggarrymm,wvanyukovmm@tmall.com,true,25,20543.95,131.9,Puce +816,chandasidemn,amickleburghmn@amazon.co.uk,true,58,96068.68,193.5,Green +817,tfenbymo,cbowerbankmo@state.gov,true,57,86420.45,70.3,Crimson +818,hfeeneymp,mkilkennymp@reuters.com,true,26,13832.73,220.7,Yellow +819,cpeddelmq,wwedgemq@webeden.co.uk,false,15,-1753.01,141.1,Green +820,tadamecmr,tcosgreavemr@dmoz.org,true,62,54733.3,46.8,Purple +821,fbegginims,cpedicanms@dell.com,false,92,26331.35,152.3,Yellow +822,bzannotellimt,drillattmt@exblog.jp,true,98,53088.31,91.4,Mauv +823,bwildmanmu,sinsealmu@shareasale.com,true,27,33382.88,16.4,Yellow +824,tdunsmoremv,ztrayesmv@posterous.com,true,29,15765.88,70.0,Puce +825,cmoxonmw,hbenionmw@yellowbook.com,false,26,74238.43,92.0,Aquamarine +826,tkoppeckmx,ebemlottmx@noaa.gov,false,84,29802.32,91.6,Mauv +827,achampneysmy,kmullendermy@acquirethisname.com,true,82,68007.28,151.7,Blue +828,msainsburymz,auzzellmz@spiegel.de,true,13,44445.82,234.1,Yellow +829,tpiwelln0,yburchalln0@edublogs.org,true,87,72267.84,110.2,Khaki +830,kbainesn1,bchessumn1@paypal.com,false,55,66022.98,74.8,Orange +831,avallendern2,adesaveryn2@hubpages.com,true,33,-202.52,51.3,Goldenrod +832,zhanneyn3,dbougourdn3@posterous.com,false,90,94985.52,77.9,Red +833,mgahann4,lsilversmidn4@trellian.com,false,95,38053.25,239.0,Puce +834,gleverettn5,acharlickn5@about.me,true,23,68019.86,91.9,Fuscia +835,sburberyen6,kmickann6@google.it,false,43,23025.1,137.7,Fuscia +836,dlindupn7,jbullersn7@uol.com.br,false,79,13973.81,217.5,Violet +837,aklousnern8,furreyn8@infoseek.co.jp,true,88,92713.68,39.6,Red +838,htothn9,edrablen9@cbsnews.com,true,50,57212.07,148.3,Turquoise +839,bblaymiresna,reberzna@theatlantic.com,true,62,72365.05,184.9,Puce +840,dmcelweenb,wnelesnb@hexun.com,false,16,5898.99,48.6,Crimson +841,tbatissenc,hkowalskinc@timesonline.co.uk,true,25,59091.38,183.7,Teal +842,mgoviernd,rbaxendalend@timesonline.co.uk,false,45,25233.05,215.1,Yellow +843,ctarpeyne,lmcrobbne@dot.gov,false,96,69785.72,49.6,Yellow +844,ksquibbsnf,tkluliceknf@smugmug.com,true,92,17162.29,50.9,Red +845,fmaynellng,hhabertng@walmart.com,false,79,30896.94,0.8,Teal +846,cdunabienh,jbraznh@harvard.edu,false,98,25174.93,16.8,Pink +847,jvanhalenni,dnoddleni@google.pl,true,54,36548.43,161.4,Goldenrod +848,ohingenj,caldricknj@google.com.hk,true,89,17907.01,76.4,Crimson +849,jdruittnk,tkyngdonnk@elpais.com,false,70,10303.06,43.9,Goldenrod +850,agauntleynl,fchaikovskinl@amazon.com,true,30,36041.95,65.4,Pink +851,teatesnm,mdowyernm@indiegogo.com,false,66,77516.5,152.3,Turquoise +852,lowernn,evizornn@goo.ne.jp,false,13,80379.36,229.8,Crimson +853,schristallno,lcowderoyno@imageshack.us,true,90,-3916.31,220.0,Mauv +854,khofernp,hmerielnp@unesco.org,false,82,72738.09,116.2,Green +855,ibrolannq,rnewnq@mashable.com,true,99,64580.15,8.6,Maroon +856,rfilernr,ccliburnnr@bloglovin.com,true,97,75886.58,127.3,Violet +857,pocahsedyns,cgullyns@google.nl,false,47,66025.54,48.9,Teal +858,kbahlnt,ldwelleynt@census.gov,false,30,17006.19,59.6,Purple +859,tecknu,jrichtnu@trellian.com,false,65,84133.56,141.0,Yellow +860,akellehernv,ffeldmusnv@skyrock.com,true,68,89448.65,0.2,Indigo +861,kkaradzasnw,bdonnellannw@ucla.edu,false,64,19725.5,88.4,Fuscia +862,pjordannx,esoggeenx@harvard.edu,true,61,10697.78,9.2,Green +863,dthorringtonny,astallanny@hao123.com,false,87,92520.21,131.7,Pink +864,ceilertnz,hstockeynz@yellowbook.com,true,24,-4166.65,102.9,Pink +865,gkippaxo0,opatemano0@godaddy.com,true,58,92532.69,223.6,Crimson +866,bhuckabeo1,wgladtbacho1@fema.gov,false,48,15862.24,170.7,Violet +867,twillmetto2,welgaro2@flickr.com,false,77,54246.67,215.2,Blue +868,aaspino3,dedmetto3@examiner.com,false,52,75131.93,229.1,Yellow +869,jblissetto4,pharcombeo4@wisc.edu,false,40,77283.98,77.5,Blue +870,cskeeleso5,edelhayo5@php.net,true,31,31368.98,10.6,Aquamarine +871,dlillimano6,nkerwicko6@over-blog.com,false,59,-4976.95,146.5,Violet +872,oboutflouro7,aarthargo7@deliciousdays.com,false,72,86576.81,38.6,Turquoise +873,agrinleyo8,bstuchburieo8@topsy.com,true,70,41443.1,40.0,Maroon +874,mbrehaulto9,ofritzo9@elpais.com,false,96,96134.88,108.1,Orange +875,lderhamoa,salabastaroa@intel.com,true,24,65219.84,94.5,Violet +876,olibbeob,dbeaufoyob@auda.org.au,true,97,64431.0,118.4,Pink +877,dvereganoc,dadcocksoc@disqus.com,false,16,3380.11,13.5,Green +878,erylettod,hnesbyod@purevolume.com,true,77,93942.71,235.2,Violet +879,mbiddissoe,bprysoe@twitter.com,false,44,42211.05,68.3,Crimson +880,dclaypoleof,phowlesof@ed.gov,true,14,75530.96,123.1,Orange +881,dfreemantleog,amanisog@twitter.com,false,93,17909.85,85.1,Violet +882,botuohyoh,sgouldstrawoh@google.de,true,29,64901.12,196.9,Green +883,akimberlyoi,rmaclachlanoi@moonfruit.com,false,45,24230.08,25.5,Khaki +884,kpemberoj,rclaasenoj@senate.gov,true,53,3772.61,126.7,Purple +885,kblackbroughok,fgurnerok@japanpost.jp,false,31,66665.42,107.6,Aquamarine +886,ericciol,oyakebovitchol@mashable.com,false,91,12.72,213.1,Khaki +887,acannavanom,chalegarthom@quantcast.com,false,80,74701.54,36.5,Indigo +888,mjanssenson,thaggerstoneon@ask.com,true,89,60108.71,85.3,Red +889,etuttiettoo,bfeeheryoo@toplist.cz,true,16,-4603.13,218.9,Puce +890,mkitchenmanop,rcoyettop@sbwire.com,false,95,15250.3,151.4,Pink +891,vcarnelloq,rwallegeoq@earthlink.net,false,43,54363.86,80.5,Maroon +892,csummerscalesor,jsantorior@weibo.com,true,53,10008.12,125.8,Blue +893,esirmonos,tsurmonos@npr.org,true,67,11020.49,92.3,Teal +894,vdunfordot,jmernerot@slashdot.org,true,77,29219.85,105.4,Green +895,nbolusou,fscrannageou@samsung.com,false,63,93759.14,105.8,Blue +896,amoylerov,bharnettov@phoca.cz,true,46,6921.41,53.3,Orange +897,gferrelliow,rgallifordow@businessinsider.com,true,22,54580.64,129.2,Puce +898,lwinkleox,mwisdenox@google.nl,true,28,68138.0,81.5,Turquoise +899,tgrisardoy,kgilbeartoy@51.la,false,95,7846.12,145.1,Purple +900,kjayeoz,skyneoz@guardian.co.uk,true,78,70129.38,0.2,Goldenrod +901,smecop0,fgoodrickep0@t-online.de,false,29,-796.8,137.1,Khaki +902,ggoutp1,gdownesp1@mlb.com,true,62,92345.61,116.0,Pink +903,mwalthop2,croizp2@bbb.org,false,79,47447.26,224.8,Aquamarine +904,smerrganp3,byarmouthp3@google.it,true,77,37050.71,190.6,Aquamarine +905,rgrindlep4,llowfillp4@yandex.ru,true,66,99440.67,137.2,Pink +906,adabornp5,nconnerryp5@dot.gov,false,92,70552.57,37.6,Green +907,ggrimsdellp6,kbehrendp6@jiathis.com,true,81,89295.97,170.6,Aquamarine +908,osmoutenp7,ialdwichp7@nih.gov,false,85,94090.23,151.9,Green +909,mocannanp8,eleathwoodp8@comcast.net,true,24,65800.55,172.4,Maroon +910,dpourveerp9,mmathevetp9@dropbox.com,true,54,35738.96,30.7,Orange +911,jragglespa,hmargrempa@wunderground.com,true,58,15246.16,235.8,Maroon +912,gmaestropb,ghackingepb@google.cn,false,46,13202.97,48.9,Green +913,pnattepc,mcritchardpc@dell.com,false,56,77065.89,7.3,Aquamarine +914,gbattiepd,ehenighanpd@usda.gov,false,33,96375.25,185.9,Teal +915,chowickpe,atraharpe@artisteer.com,false,90,73316.39,215.2,Orange +916,cduckinfieldpf,kferrarellopf@globo.com,false,42,19893.76,40.1,Indigo +917,lbradanepg,dgapperpg@sciencedaily.com,true,82,84588.63,2.4,Mauv +918,eweondph,rbaterph@freewebs.com,false,86,32398.49,234.4,Teal +919,dpavlikpi,mbrissetpi@nba.com,false,69,37981.61,66.0,Yellow +920,difflandpj,sblabeypj@pbs.org,false,40,205.48,99.3,Blue +921,oocarrollpk,hrookespk@t.co,false,39,8027.29,179.1,Green +922,mbailpl,hdorceypl@berkeley.edu,true,90,31989.58,137.0,Green +923,lhissettpm,tloudianpm@craigslist.org,true,99,76601.73,136.3,Violet +924,csindenpn,leubankpn@canalblog.com,false,83,18256.89,66.4,Orange +925,wrowlinpo,ldoolerpo@blogger.com,false,82,28114.27,161.7,Goldenrod +926,lcathiepp,hpetrollipp@issuu.com,true,82,73603.2,223.7,Turquoise +927,cchippindalepq,gleestutpq@google.com.au,false,97,71971.21,166.9,Khaki +928,cmargrettpr,hpopescupr@slate.com,true,82,35026.36,36.1,Pink +929,mhalshawps,agilhoolps@webeden.co.uk,false,80,25313.15,107.7,Blue +930,ddavydkovpt,kstearndalept@exblog.jp,true,64,39551.81,41.7,Blue +931,kgyfordpu,cheathcottpu@dot.gov,false,48,50205.55,34.7,Goldenrod +932,ssempillpv,rdigginspv@elegantthemes.com,true,36,86762.77,161.8,Goldenrod +933,nbutcherspw,gdarlassonpw@biglobe.ne.jp,false,55,42640.28,129.8,Turquoise +934,pyarntonpx,galanbrookepx@cnbc.com,true,32,28483.99,43.3,Goldenrod +935,godbypy,lruckledgepy@hubpages.com,true,36,370.68,31.2,Violet +936,fbriggspz,olinehampz@infoseek.co.jp,true,28,28368.04,131.7,Fuscia +937,cidaleq0,rasserq0@people.com.cn,false,32,59676.47,46.1,Khaki +938,fmcclintonq1,mgethinsq1@last.fm,true,20,21805.28,188.6,Puce +939,blarwoodq2,lphilippsonq2@youtu.be,true,42,87558.38,51.7,Pink +940,omattysq3,zlapennaq3@reuters.com,true,79,75167.27,39.8,Green +941,lissacofq4,mpenkethq4@a8.net,false,13,43719.34,115.9,Purple +942,rrollingq5,dohanessianq5@soundcloud.com,false,26,75355.53,26.2,Pink +943,jsennq6,bworsnipq6@eepurl.com,false,35,50325.24,185.5,Khaki +944,mduerq7,edivisq7@bizjournals.com,false,58,78500.9,152.1,Green +945,aeckq8,ctitmusq8@usa.gov,false,84,893.61,153.7,Blue +946,ofulleq9,apietrzakq9@edublogs.org,true,53,37805.24,208.3,Fuscia +947,pruminqa,dlansdaleqa@theguardian.com,false,86,30109.71,216.0,Goldenrod +948,hmacmurrayqb,scominoliqb@bluehost.com,false,74,72500.94,71.6,Orange +949,kvermanqc,sribeiroqc@mayoclinic.com,true,69,26416.2,141.1,Red +950,wmichalqd,tcorballisqd@topsy.com,true,24,61432.79,38.5,Purple +951,kklampkqe,amacvanamyqe@uol.com.br,false,39,72249.74,26.7,Khaki +952,bhusbandsqf,ejardeinqf@google.it,false,52,23211.25,52.8,Fuscia +953,dlongcasterqg,dbugbeeqg@homestead.com,false,74,16366.46,29.2,Khaki +954,rchesneyqh,moverstoneqh@newsvine.com,false,99,-2006.09,120.9,Crimson +955,iheadqi,bpoolqi@yellowpages.com,false,33,-4317.33,14.9,Goldenrod +956,cflatmanqj,eturfordqj@kickstarter.com,false,38,40558.1,71.3,Goldenrod +957,lcolefordqk,raubryqk@hugedomains.com,false,26,78048.02,234.3,Turquoise +958,aschwerinql,hholburyql@people.com.cn,false,88,79154.48,167.6,Green +959,fheffernonqm,kzamboniniqm@ameblo.jp,false,16,83008.0,76.1,Teal +960,amartiniqn,smoxsteadqn@sciencedaily.com,false,35,48144.86,130.9,Orange +961,ojaramqo,tlorrowayqo@europa.eu,false,81,10215.38,177.4,Indigo +962,ddonanqp,amceloryqp@reuters.com,true,43,12796.7,188.6,Green +963,pburghqq,dhasteqq@sciencedaily.com,true,34,80065.42,59.1,Orange +964,hforceqr,cjedrzaszkiewiczqr@time.com,true,47,87532.27,221.7,Violet +965,gculliganqs,wpiddickqs@engadget.com,true,73,39438.9,17.3,Pink +966,eredmireqt,icoxenqt@imdb.com,false,71,98327.86,57.8,Green +967,sdoersqu,zenosqu@dmoz.org,false,17,7913.05,143.9,Red +968,maynoldqv,gverlingqv@umich.edu,false,24,87024.88,180.6,Turquoise +969,kpygottqw,gchaudretqw@stanford.edu,false,56,94903.86,174.3,Goldenrod +970,ssaunperqx,bpryceqx@plala.or.jp,true,87,37998.4,117.2,Indigo +971,aayceqy,hratieqy@usa.gov,false,65,29630.67,173.2,Turquoise +972,jbakhrushinqz,oneashamqz@weibo.com,false,64,69392.77,182.5,Blue +973,mfrancesr0,mblairr0@free.fr,true,15,39618.91,140.3,Orange +974,jkaygillr1,cbellier1@microsoft.com,false,96,95563.93,81.0,Orange +975,bparamorer2,gtrengrover2@addthis.com,true,55,56345.69,154.0,Khaki +976,bpodmorer3,akhilkovr3@apple.com,false,28,14922.0,165.1,Blue +977,emackinr4,lvasilchenkor4@deviantart.com,false,29,49697.14,66.5,Indigo +978,snielsonr5,tologanr5@webnode.com,false,24,30891.03,71.5,Orange +979,ckryskar6,bstannersr6@hc360.com,true,43,86494.57,131.5,Puce +980,rbestwallr7,hlindgrenr7@gnu.org,false,60,53289.56,207.7,Khaki +981,kporter8,mhumbertr8@sphinn.com,true,72,40844.95,120.1,Pink +982,syeskovr9,amcvanamyr9@newyorker.com,false,70,50277.84,183.1,Indigo +983,atilzeyra,bcaselickra@oracle.com,true,90,67053.3,8.3,Green +984,nstredwickrb,pgiacobiliorb@biglobe.ne.jp,false,72,12131.57,234.1,Teal +985,npursegloverc,efouldesrc@wp.com,true,57,4805.7,81.3,Mauv +986,lsowersbyrd,opotticaryrd@sphinn.com,true,57,60797.72,151.9,Turquoise +987,bderbyre,hvanleeuwenre@oakley.com,false,78,46044.34,229.7,Crimson +988,rogusrf,gdunkleerf@reference.com,true,95,23477.77,132.0,Orange +989,bdomicorg,aeagleshamrg@prweb.com,true,70,81559.91,117.3,Turquoise +990,cspittalrh,lruddlesdenrh@tiny.cc,true,66,78185.35,199.1,Khaki +991,tlamasri,tsachnori@ucoz.ru,false,99,14428.88,229.0,Goldenrod +992,cfranzewitchrj,lblackdenrj@admin.ch,false,49,45862.1,145.6,Orange +993,jraitrk,rdigginsonrk@freewebs.com,false,94,95204.78,64.4,Mauv +994,jpouldenrl,ldowzellrl@ox.ac.uk,true,79,73495.97,155.7,Orange +995,dmaffeorm,lhimsworthrm@ucla.edu,true,98,99913.35,141.9,Khaki +996,sbroadburyrn,jbrenardrn@reddit.com,true,25,50938.22,218.9,Violet +997,dbirkinshawro,lferraoro@wufoo.com,false,80,87717.29,230.0,Indigo +998,sallmondrp,bdurrantrp@miitbeian.gov.cn,true,32,2571.84,113.7,Maroon +999,gbrollyrq,lpoulneyrq@wikipedia.org,true,58,5790.69,226.9,Orange +1000,ljevonsrr,jjoinsonrr@hatena.ne.jp,true,19,7591.89,217.6,Puce diff --git a/src/Backend/test_data/csv/intergration_test_data_2.csv b/src/Backend/test_data/csv/intergration_test_data_2.csv new file mode 100644 index 0000000..c332c72 --- /dev/null +++ b/src/Backend/test_data/csv/intergration_test_data_2.csv @@ -0,0 +1,1001 @@ +id,department_name,manager_name,manager_email +1,Marketing,Gardy Venton,gventon0@biglobe.ne.jp +2,Product Management,Gypsy Robb,grobb1@google.pl +3,Product Management,Hastings Girardot,hgirardot2@zdnet.com +4,Legal,Dyna Rallin,drallin3@hubpages.com +5,Training,Karolina Hayhurst,khayhurst4@360.cn +6,Engineering,Anatol Goutcher,agoutcher5@nsw.gov.au +7,Support,Ashleigh Callam,acallam6@imdb.com +8,Accounting,Yorker Dowle,ydowle7@elpais.com +9,Accounting,Vale Lilbourne,vlilbourne8@jalbum.net +10,Product Management,Reeba Haggerstone,rhaggerstone9@tripod.com +11,Training,Cari Lyle,clylea@comcast.net +12,Services,Alina Soro,asorob@census.gov +13,Accounting,Henrik Count,hcountc@tumblr.com +14,Business Development,Ralph Labbez,rlabbezd@unblog.fr +15,Sales,Pincas Newvill,pnewville@infoseek.co.jp +16,Support,Axel McFater,amcfaterf@seattletimes.com +17,Support,Lorita Wylder,lwylderg@ibm.com +18,Support,Celine Conelly,cconellyh@google.co.uk +19,Training,Bertine Leatherborrow,bleatherborrowi@printfriendly.com +20,Sales,Billie Stalman,bstalmanj@google.es +21,Marketing,Darcee Orwin,dorwink@yahoo.com +22,Support,Normie Adie,nadiel@tripod.com +23,Research and Development,Antonietta Steinhammer,asteinhammerm@foxnews.com +24,Research and Development,Delainey Kennsley,dkennsleyn@ehow.com +25,Accounting,Kingsly Civitillo,kcivitilloo@tuttocitta.it +26,Legal,Denise Blucher,dblucherp@cam.ac.uk +27,Human Resources,Darrell Andrieu,dandrieuq@jalbum.net +28,Engineering,Thurstan Bendle,tbendler@bluehost.com +29,Legal,Eleni Collcott,ecollcotts@parallels.com +30,Services,Rhona Ousby,rousbyt@dmoz.org +31,Engineering,Raphaela Peirpoint,rpeirpointu@jalbum.net +32,Business Development,Jillayne Tunnock,jtunnockv@symantec.com +33,Marketing,Remington Oldaker,roldakerw@wisc.edu +34,Support,Sherm Knapp,sknappx@forbes.com +35,Business Development,Roman Nares,rnaresy@amazon.com +36,Human Resources,Merlina Medendorp,mmedendorpz@shop-pro.jp +37,Sales,Hermione Jopke,hjopke10@ucsd.edu +38,Human Resources,Agna Scroggie,ascroggie11@hostgator.com +39,Legal,Rosamund Follett,rfollett12@guardian.co.uk +40,Human Resources,Jamie Beteriss,jbeteriss13@bigcartel.com +41,Training,Bernardine Beardshall,bbeardshall14@discovery.com +42,Marketing,Avrom Toone,atoone15@accuweather.com +43,Human Resources,Lucias O'Hagan,lohagan16@princeton.edu +44,Research and Development,Everett Shambrook,eshambrook17@usatoday.com +45,Research and Development,Randolf Arpur,rarpur18@cdc.gov +46,Services,Ferguson Gorrick,fgorrick19@sphinn.com +47,Services,Natalina Forber,nforber1a@google.com +48,Human Resources,Emmalynn Bim,ebim1b@chicagotribune.com +49,Accounting,Minor Spellacey,mspellacey1c@youtube.com +50,Research and Development,Tony Gilbane,tgilbane1d@youtube.com +51,Marketing,Dorotea Bryning,dbryning1e@booking.com +52,Support,Sansone D'Elias,sdelias1f@indiegogo.com +53,Marketing,Terrence Cromblehome,tcromblehome1g@bizjournals.com +54,Training,Lemmy Budgen,lbudgen1h@miibeian.gov.cn +55,Product Management,Sidney Vanichkin,svanichkin1i@gnu.org +56,Services,Bette Antunes,bantunes1j@reuters.com +57,Legal,Benjy Leafe,bleafe1k@ucsd.edu +58,Engineering,Ainslie Drewry,adrewry1l@163.com +59,Marketing,Earl Kaesmakers,ekaesmakers1m@oakley.com +60,Research and Development,Sela Dearden,sdearden1n@delicious.com +61,Support,Kati Macknish,kmacknish1o@discovery.com +62,Marketing,Edan Jockle,ejockle1p@yale.edu +63,Business Development,Xaviera Strettle,xstrettle1q@independent.co.uk +64,Services,Maxy Hoofe,mhoofe1r@wikimedia.org +65,Training,Stephine Fransemai,sfransemai1s@slate.com +66,Legal,Tessy Rowell,trowell1t@reddit.com +67,Legal,Rik Kilbourne,rkilbourne1u@altervista.org +68,Sales,Ariana Eich,aeich1v@mozilla.org +69,Legal,Melvyn Semper,msemper1w@quantcast.com +70,Services,Wallis Caldwell,wcaldwell1x@senate.gov +71,Legal,Alejandrina Siaskowski,asiaskowski1y@studiopress.com +72,Research and Development,Caryl Balffye,cbalffye1z@ft.com +73,Research and Development,Trudie Bamlet,tbamlet20@netlog.com +74,Support,Alard Dykes,adykes21@businessweek.com +75,Support,Perkin Mackieson,pmackieson22@abc.net.au +76,Sales,Mikaela Padillo,mpadillo23@jimdo.com +77,Legal,Irma Maylour,imaylour24@ucoz.com +78,Product Management,Arnoldo Cockburn,acockburn25@imageshack.us +79,Research and Development,Arv Tanfield,atanfield26@slate.com +80,Human Resources,Leanor McLevie,lmclevie27@edublogs.org +81,Services,Imojean Silbermann,isilbermann28@parallels.com +82,Accounting,Mavis Wiskar,mwiskar29@acquirethisname.com +83,Business Development,Pearline Sillett,psillett2a@friendfeed.com +84,Services,Sophronia Shawcroft,sshawcroft2b@adobe.com +85,Support,Zacharias Divver,zdivver2c@upenn.edu +86,Services,Hazel Casterton,hcasterton2d@simplemachines.org +87,Sales,Romain Wells,rwells2e@gravatar.com +88,Support,Emmalynne Tapscott,etapscott2f@weather.com +89,Product Management,Kyle Gwynn,kgwynn2g@ox.ac.uk +90,Engineering,Spence Isherwood,sisherwood2h@dedecms.com +91,Support,Jessie Beecraft,jbeecraft2i@vkontakte.ru +92,Business Development,Gallagher Perkis,gperkis2j@merriam-webster.com +93,Research and Development,Ferguson Farquarson,ffarquarson2k@clickbank.net +94,Training,Tallie Nares,tnares2l@ezinearticles.com +95,Accounting,Kent Gaskill,kgaskill2m@nbcnews.com +96,Human Resources,Nickola Plomer,nplomer2n@wikispaces.com +97,Legal,Keefer Brownfield,kbrownfield2o@nytimes.com +98,Legal,Genna Cardenas,gcardenas2p@indiatimes.com +99,Marketing,Dori Hagwood,dhagwood2q@google.fr +100,Legal,Bertie Wilber,bwilber2r@disqus.com +101,Services,Brandon MacGillespie,bmacgillespie2s@blogspot.com +102,Sales,Yvette Hankins,yhankins2t@themeforest.net +103,Legal,Flossie Yukhnevich,fyukhnevich2u@reference.com +104,Services,Kendall Gocke,kgocke2v@psu.edu +105,Research and Development,Amos Davydzenko,adavydzenko2w@upenn.edu +106,Engineering,Shelden Frampton,sframpton2x@irs.gov +107,Business Development,Griswold Rain,grain2y@wunderground.com +108,Sales,Dorian Rogeon,drogeon2z@about.me +109,Support,Vilhelmina Whitlaw,vwhitlaw30@answers.com +110,Accounting,Phillie Janusz,pjanusz31@istockphoto.com +111,Training,Sharon Horbart,shorbart32@archive.org +112,Support,Abran Eidler,aeidler33@irs.gov +113,Marketing,Zedekiah Densham,zdensham34@nps.gov +114,Research and Development,Matelda Buzek,mbuzek35@etsy.com +115,Marketing,Carita Wyllcock,cwyllcock36@gmpg.org +116,Marketing,Phaedra Yellowlees,pyellowlees37@cnet.com +117,Training,Rik Sapena,rsapena38@opensource.org +118,Human Resources,Duke Ranscomb,dranscomb39@github.com +119,Support,Ambrosio Hailston,ahailston3a@aboutads.info +120,Support,Kania Pumfrey,kpumfrey3b@nasa.gov +121,Human Resources,Godfrey Clavey,gclavey3c@wisc.edu +122,Human Resources,Leila Barkhouse,lbarkhouse3d@jimdo.com +123,Training,Camel Usborn,cusborn3e@foxnews.com +124,Training,Marvin Everett,meverett3f@networksolutions.com +125,Product Management,Nappie Quainton,nquainton3g@networksolutions.com +126,Legal,Berne Cleminshaw,bcleminshaw3h@google.com.au +127,Accounting,Fionnula Hoodless,fhoodless3i@umich.edu +128,Training,Amabelle Ede,aede3j@wikipedia.org +129,Accounting,Araldo MacKomb,amackomb3k@squarespace.com +130,Accounting,Jaime Rosenqvist,jrosenqvist3l@moonfruit.com +131,Accounting,Drusie Phillp,dphillp3m@google.pl +132,Sales,Worden Branscomb,wbranscomb3n@meetup.com +133,Training,Marshal Keward,mkeward3o@spiegel.de +134,Accounting,Jammie Palluschek,jpalluschek3p@chronoengine.com +135,Engineering,Homere Plumbe,hplumbe3q@msu.edu +136,Research and Development,Gearard Linstead,glinstead3r@dagondesign.com +137,Research and Development,Sybilla Schrader,sschrader3s@seesaa.net +138,Human Resources,Car Sandiford,csandiford3t@dot.gov +139,Accounting,Marget Portwaine,mportwaine3u@ucoz.com +140,Research and Development,Gardie Bonefant,gbonefant3v@accuweather.com +141,Legal,Pooh Gilburt,pgilburt3w@google.es +142,Sales,Free Lohering,flohering3x@slate.com +143,Product Management,Cherin Demare,cdemare3y@webmd.com +144,Engineering,Donnamarie Ivanchenkov,divanchenkov3z@nationalgeographic.com +145,Support,Wyatt Terzi,wterzi40@toplist.cz +146,Human Resources,Flemming Mallord,fmallord41@de.vu +147,Research and Development,Janette Pendle,jpendle42@yellowbook.com +148,Services,Kacey Robuchon,krobuchon43@addthis.com +149,Business Development,Lorri Michallat,lmichallat44@npr.org +150,Human Resources,Grete Leggat,gleggat45@cdc.gov +151,Training,Iolanthe Francisco,ifrancisco46@ovh.net +152,Marketing,Charmion Takkos,ctakkos47@infoseek.co.jp +153,Human Resources,Marybelle Plampin,mplampin48@princeton.edu +154,Services,Gwenora Cawthra,gcawthra49@apache.org +155,Support,Barny Woodlands,bwoodlands4a@freewebs.com +156,Accounting,Britt Brayshaw,bbrayshaw4b@cornell.edu +157,Marketing,Alexander Maymand,amaymand4c@xrea.com +158,Engineering,Jenn Stirley,jstirley4d@blogs.com +159,Research and Development,Elysia Halso,ehalso4e@umich.edu +160,Accounting,Felicdad Shyres,fshyres4f@bloomberg.com +161,Services,Freda Shetliff,fshetliff4g@prlog.org +162,Human Resources,Rafi Ewings,rewings4h@example.com +163,Engineering,Martin Valenti,mvalenti4i@stumbleupon.com +164,Marketing,Valera Capel,vcapel4j@ocn.ne.jp +165,Research and Development,Padraic Morson,pmorson4k@disqus.com +166,Business Development,Tito O' Mahony,to4l@statcounter.com +167,Sales,Yancy Baudinot,ybaudinot4m@harvard.edu +168,Marketing,Ethel MacCaull,emaccaull4n@list-manage.com +169,Sales,Garrot Hulance,ghulance4o@sun.com +170,Services,Wilona Huddlestone,whuddlestone4p@google.nl +171,Services,Daryl Hubbins,dhubbins4q@arizona.edu +172,Business Development,Audi Wyles,awyles4r@unblog.fr +173,Accounting,Genevieve Gauvin,ggauvin4s@usgs.gov +174,Support,Jocelyne Tremblet,jtremblet4t@wp.com +175,Research and Development,Clarinda Orgee,corgee4u@sakura.ne.jp +176,Human Resources,Kip Paulon,kpaulon4v@hud.gov +177,Legal,Shaw Langeren,slangeren4w@java.com +178,Services,Rolando Grimley,rgrimley4x@vkontakte.ru +179,Sales,Adora Crossfield,acrossfield4y@google.es +180,Training,Tadeo Tierney,ttierney4z@bizjournals.com +181,Training,Sydney Huskisson,shuskisson50@oracle.com +182,Sales,Inge Fellenor,ifellenor51@craigslist.org +183,Human Resources,Vergil Aspinwall,vaspinwall52@economist.com +184,Services,Jamison MacSweeney,jmacsweeney53@chron.com +185,Engineering,Bessie Othen,bothen54@illinois.edu +186,Research and Development,Beau Furzer,bfurzer55@smugmug.com +187,Product Management,Chip Brake,cbrake56@xrea.com +188,Support,Heinrik Van Castele,hvan57@mit.edu +189,Business Development,Tess Vogelein,tvogelein58@eepurl.com +190,Research and Development,Jillayne Anscott,janscott59@phoca.cz +191,Research and Development,Karlen Ruggiero,kruggiero5a@surveymonkey.com +192,Legal,Patrice Readett,preadett5b@youtu.be +193,Legal,Arthur Rue,arue5c@furl.net +194,Training,Bram Cotelard,bcotelard5d@sogou.com +195,Sales,Lonna Steinhammer,lsteinhammer5e@bigcartel.com +196,Human Resources,Noella Caret,ncaret5f@mtv.com +197,Research and Development,Trenton Fearnside,tfearnside5g@reverbnation.com +198,Human Resources,Liuka Lawless,llawless5h@seesaa.net +199,Marketing,Isac Goodred,igoodred5i@columbia.edu +200,Sales,Eileen Minister,eminister5j@archive.org +201,Accounting,Paige Malimoe,pmalimoe5k@army.mil +202,Business Development,Suzann Packman,spackman5l@tinyurl.com +203,Support,Miof mela Burbudge,mmela5m@ed.gov +204,Legal,Marina Incogna,mincogna5n@illinois.edu +205,Human Resources,Silvana Gwyllt,sgwyllt5o@163.com +206,Accounting,Ara Kennelly,akennelly5p@xinhuanet.com +207,Human Resources,Lyn Nolda,lnolda5q@prweb.com +208,Training,Davon Larroway,dlarroway5r@constantcontact.com +209,Accounting,Yasmeen Fabri,yfabri5s@shutterfly.com +210,Marketing,Elfrida Romushkin,eromushkin5t@canalblog.com +211,Research and Development,Boycey Oattes,boattes5u@nasa.gov +212,Accounting,Loise O'Mannion,lomannion5v@businessweek.com +213,Business Development,Angel Drewitt,adrewitt5w@cdbaby.com +214,Support,Daphene Redan,dredan5x@51.la +215,Engineering,Janice Lartice,jlartice5y@cbc.ca +216,Legal,Nicola Gwillym,ngwillym5z@biglobe.ne.jp +217,Support,Darb Mioni,dmioni60@cbslocal.com +218,Engineering,Pacorro Kenninghan,pkenninghan61@miitbeian.gov.cn +219,Accounting,Vasili Verrico,vverrico62@chronoengine.com +220,Sales,Coralie Gaiger,cgaiger63@behance.net +221,Human Resources,Cecilio Crooks,ccrooks64@csmonitor.com +222,Human Resources,Elenore Dudny,edudny65@blog.com +223,Business Development,Meggy Finley,mfinley66@craigslist.org +224,Accounting,Dasie Benedetti,dbenedetti67@dion.ne.jp +225,Support,Livvyy Rudolph,lrudolph68@smh.com.au +226,Human Resources,Maighdiln Etheridge,metheridge69@latimes.com +227,Sales,Deny Matieu,dmatieu6a@upenn.edu +228,Product Management,Cathy Philbin,cphilbin6b@networksolutions.com +229,Engineering,Grethel Sends,gsends6c@adobe.com +230,Product Management,Ashli Koenraad,akoenraad6d@nps.gov +231,Business Development,Leona Ind,lind6e@reverbnation.com +232,Engineering,Hadley Gullam,hgullam6f@guardian.co.uk +233,Sales,Kata Broadey,kbroadey6g@histats.com +234,Sales,Christoffer Wethers,cwethers6h@bloglines.com +235,Human Resources,Lucilia Whacket,lwhacket6i@weebly.com +236,Research and Development,Pamelina Hassekl,phassekl6j@plala.or.jp +237,Services,Jake O'Dunniom,jodunniom6k@cisco.com +238,Accounting,Madonna Klimowicz,mklimowicz6l@jigsy.com +239,Sales,Valera Cuesta,vcuesta6m@51.la +240,Business Development,Mord Charteris,mcharteris6n@smugmug.com +241,Training,Babara Marsy,bmarsy6o@ezinearticles.com +242,Training,Jacobo Gherardesci,jgherardesci6p@so-net.ne.jp +243,Human Resources,Sanford Rate,srate6q@newsvine.com +244,Legal,Kristofer Tedman,ktedman6r@hatena.ne.jp +245,Legal,Alli Gerrett,agerrett6s@behance.net +246,Human Resources,Bern Falshaw,bfalshaw6t@dropbox.com +247,Human Resources,Loise Backwell,lbackwell6u@fotki.com +248,Accounting,Lib Snowling,lsnowling6v@com.com +249,Engineering,Cherilynn Martynka,cmartynka6w@skyrock.com +250,Sales,Reyna Buckeridge,rbuckeridge6x@prweb.com +251,Business Development,Nonnah Duham,nduham6y@themeforest.net +252,Accounting,Leola Neller,lneller6z@nymag.com +253,Services,Cyrille Freeman,cfreeman70@opensource.org +254,Research and Development,Eberto Folliott,efolliott71@baidu.com +255,Accounting,Henriette Dmitriev,hdmitriev72@harvard.edu +256,Training,Lane Cardew,lcardew73@msu.edu +257,Engineering,Morganica Dixon,mdixon74@statcounter.com +258,Legal,Raffarty Kerne,rkerne75@google.com.hk +259,Marketing,Brear Larderot,blarderot76@blinklist.com +260,Training,Sophia Cristofor,scristofor77@1und1.de +261,Sales,Eustace Pollins,epollins78@java.com +262,Product Management,Ethelin Trowsdale,etrowsdale79@newsvine.com +263,Training,Hermann Solloway,hsolloway7a@phoca.cz +264,Support,Robby Lygoe,rlygoe7b@blogger.com +265,Marketing,Megen Cathersides,mcathersides7c@edublogs.org +266,Sales,Ceil Caudelier,ccaudelier7d@bloglines.com +267,Product Management,Bone Orrobin,borrobin7e@cbc.ca +268,Marketing,Travus Nottingham,tnottingham7f@bluehost.com +269,Training,Helge Titterell,htitterell7g@businessweek.com +270,Accounting,Ann-marie Surcombe,asurcombe7h@wiley.com +271,Business Development,Jerald Pestor,jpestor7i@cdc.gov +272,Human Resources,Blisse Briskey,bbriskey7j@t-online.de +273,Research and Development,Blondell Garaghan,bgaraghan7k@zimbio.com +274,Training,Jacquie Escalera,jescalera7l@people.com.cn +275,Support,Darcy Dumbrall,ddumbrall7m@cmu.edu +276,Business Development,Emmit Easun,eeasun7n@ihg.com +277,Engineering,Tedd Petticrew,tpetticrew7o@privacy.gov.au +278,Support,Dede Conradsen,dconradsen7p@wufoo.com +279,Engineering,Mace Clows,mclows7q@infoseek.co.jp +280,Product Management,Constantia McElroy,cmcelroy7r@360.cn +281,Accounting,Sansone Gudahy,sgudahy7s@boston.com +282,Services,Tersina Stolli,tstolli7t@pen.io +283,Legal,Emylee Sheara,esheara7u@hubpages.com +284,Sales,Perri Kaesmans,pkaesmans7v@unesco.org +285,Engineering,Ignazio Griggs,igriggs7w@addthis.com +286,Accounting,Maddie Oliphand,moliphand7x@blogs.com +287,Research and Development,Elsbeth Vann,evann7y@who.int +288,Legal,Latashia Walewski,lwalewski7z@ebay.com +289,Training,Wilhelmine Matyushenko,wmatyushenko80@independent.co.uk +290,Research and Development,Vinni Alves,valves81@php.net +291,Human Resources,Dom Subhan,dsubhan82@cnn.com +292,Training,Norby Epton,nepton83@salon.com +293,Business Development,Allan Oguz,aoguz84@yale.edu +294,Accounting,Marjorie Pedri,mpedri85@sakura.ne.jp +295,Training,Miran Lieb,mlieb86@yahoo.co.jp +296,Business Development,Whit Maffulli,wmaffulli87@mail.ru +297,Engineering,Barbi Simmill,bsimmill88@spotify.com +298,Services,Gothart Gason,ggason89@yellowbook.com +299,Human Resources,Anny McGuffog,amcguffog8a@berkeley.edu +300,Human Resources,Charmain St Leger,cst8b@hp.com +301,Business Development,Julienne Biddle,jbiddle8c@usa.gov +302,Accounting,Son Stoggles,sstoggles8d@skype.com +303,Product Management,Valerye Ardron,vardron8e@addthis.com +304,Marketing,Mariam Pearman,mpearman8f@hubpages.com +305,Engineering,Sanders Thaxter,sthaxter8g@over-blog.com +306,Business Development,Harriet Willingham,hwillingham8h@illinois.edu +307,Marketing,Sauveur Govett,sgovett8i@soundcloud.com +308,Marketing,Berna Veness,bveness8j@miibeian.gov.cn +309,Business Development,Rory Atkins,ratkins8k@bandcamp.com +310,Human Resources,Luelle Crompton,lcrompton8l@house.gov +311,Marketing,Jamey Topham,jtopham8m@biblegateway.com +312,Training,Roselle Orum,rorum8n@eepurl.com +313,Accounting,Pancho Tumini,ptumini8o@oakley.com +314,Product Management,Halimeda Hurl,hhurl8p@google.nl +315,Product Management,Vinnie Zannotelli,vzannotelli8q@instagram.com +316,Legal,Junie Rutigliano,jrutigliano8r@furl.net +317,Human Resources,Randall Newitt,rnewitt8s@weather.com +318,Product Management,Borg Fearne,bfearne8t@jiathis.com +319,Research and Development,Brier A'Barrow,babarrow8u@weibo.com +320,Legal,Van Goter,vgoter8v@illinois.edu +321,Research and Development,Ruy Allred,rallred8w@oakley.com +322,Sales,Selinda Maber,smaber8x@va.gov +323,Sales,Lacey Carter,lcarter8y@ameblo.jp +324,Research and Development,Albertine Potteridge,apotteridge8z@ameblo.jp +325,Marketing,Bary Privett,bprivett90@flavors.me +326,Services,Abbey De Gregoli,ade91@ow.ly +327,Product Management,Sonni Ottewell,sottewell92@cyberchimps.com +328,Legal,Jessee Cazereau,jcazereau93@woothemes.com +329,Training,Betteanne Border,bborder94@barnesandnoble.com +330,Training,Mickie Cockson,mcockson95@columbia.edu +331,Engineering,Julian Sedgwick,jsedgwick96@yelp.com +332,Human Resources,Tye Palatini,tpalatini97@rambler.ru +333,Support,Alysia Fishpoole,afishpoole98@weather.com +334,Marketing,Reese Antonomoli,rantonomoli99@pagesperso-orange.fr +335,Legal,Hynda Pfaffe,hpfaffe9a@cloudflare.com +336,Services,Claiborne Klesse,cklesse9b@craigslist.org +337,Sales,Lisetta Livzey,llivzey9c@istockphoto.com +338,Support,Karalynn Cumbridge,kcumbridge9d@mediafire.com +339,Services,Carmita Hoofe,choofe9e@whitehouse.gov +340,Accounting,Carmen Peggrem,cpeggrem9f@bloomberg.com +341,Legal,Roselle Livingston,rlivingston9g@shinystat.com +342,Support,Bevin Stampfer,bstampfer9h@bigcartel.com +343,Human Resources,Basilio Sline,bsline9i@symantec.com +344,Engineering,Arlana Sabatier,asabatier9j@xinhuanet.com +345,Accounting,Letisha Blanchet,lblanchet9k@engadget.com +346,Research and Development,Lyon Schumacher,lschumacher9l@dailymotion.com +347,Human Resources,Henrieta O'Brogane,hobrogane9m@techcrunch.com +348,Marketing,Garreth Breslin,gbreslin9n@webs.com +349,Research and Development,Rem Chiverstone,rchiverstone9o@upenn.edu +350,Marketing,Chadwick Melly,cmelly9p@discuz.net +351,Sales,Salvidor Middis,smiddis9q@cam.ac.uk +352,Business Development,Morie Churchley,mchurchley9r@pinterest.com +353,Product Management,Oberon Scantlebury,oscantlebury9s@youtu.be +354,Product Management,Joela Turfrey,jturfrey9t@businesswire.com +355,Marketing,Claudia Ganning,cganning9u@lycos.com +356,Product Management,Franchot Breston,fbreston9v@opera.com +357,Training,Glad Ort,gort9w@buzzfeed.com +358,Accounting,Monah Petto,mpetto9x@vkontakte.ru +359,Engineering,Iormina Dyble,idyble9y@pen.io +360,Business Development,Darnall Scryne,dscryne9z@senate.gov +361,Engineering,Delila Gosney,dgosneya0@fotki.com +362,Accounting,Clair Batchelar,cbatchelara1@google.pl +363,Support,Thain Caffrey,tcaffreya2@usda.gov +364,Services,Ali Gulvin,agulvina3@miibeian.gov.cn +365,Marketing,Xylina Loyd,xloyda4@infoseek.co.jp +366,Human Resources,Lucinda Duetsche,lduetschea5@time.com +367,Training,Pietrek Keeler,pkeelera6@ycombinator.com +368,Engineering,Natasha Vassay,nvassaya7@foxnews.com +369,Legal,Daren Lanfear,dlanfeara8@weebly.com +370,Services,Blanca Abate,babatea9@g.co +371,Training,Jacklin Cahill,jcahillaa@amazon.co.uk +372,Business Development,Noelle McFayden,nmcfaydenab@indiatimes.com +373,Engineering,Brnaba Hryncewicz,bhryncewiczac@huffingtonpost.com +374,Services,Juliette Clawson,jclawsonad@ted.com +375,Business Development,Bil Kineton,bkinetonae@e-recht24.de +376,Research and Development,Natty Clear,nclearaf@cpanel.net +377,Marketing,Karola Sandercock,ksandercockag@spotify.com +378,Sales,Ashley Manie,amanieah@apache.org +379,Engineering,Ayn McCowen,amccowenai@scribd.com +380,Human Resources,Calvin Muggeridge,cmuggeridgeaj@webmd.com +381,Support,Ruthie Exton,rextonak@nymag.com +382,Marketing,Even Christofor,echristoforal@eventbrite.com +383,Training,Nelle Jull,njullam@washington.edu +384,Marketing,Inessa Viggars,iviggarsan@ucla.edu +385,Legal,Jehanna Whale,jwhaleao@xing.com +386,Services,Annaliese Dye,adyeap@ucsd.edu +387,Business Development,Regen Ussher,russheraq@huffingtonpost.com +388,Accounting,Kattie Yanin,kyaninar@shinystat.com +389,Product Management,Mead Bagger,mbaggeras@mysql.com +390,Marketing,Pinchas MacAlpyne,pmacalpyneat@wordpress.org +391,Business Development,Aila Glavis,aglavisau@storify.com +392,Support,Hadley Pinchin,hpinchinav@arstechnica.com +393,Accounting,Zenia Fligg,zfliggaw@netlog.com +394,Research and Development,Lin Gilardoni,lgilardoniax@ft.com +395,Accounting,Klarika McIlenna,kmcilennaay@usatoday.com +396,Training,Hakim Kimmel,hkimmelaz@nba.com +397,Sales,Ellswerth Tant,etantb0@smugmug.com +398,Engineering,Joy Bugg,jbuggb1@opensource.org +399,Training,Shepperd Boarder,sboarderb2@quantcast.com +400,Research and Development,Aloisia Demcak,ademcakb3@cbc.ca +401,Services,Skye Waldrum,swaldrumb4@slideshare.net +402,Business Development,Antony Blenkhorn,ablenkhornb5@smugmug.com +403,Marketing,Muhammad Di Nisco,mdib6@smugmug.com +404,Support,Binni Forsdike,bforsdikeb7@home.pl +405,Business Development,Torey Cattanach,tcattanachb8@yahoo.co.jp +406,Human Resources,Jessy Sallis,jsallisb9@nba.com +407,Business Development,Rosabella Desvignes,rdesvignesba@rediff.com +408,Research and Development,Sharleen Wadham,swadhambb@skype.com +409,Support,Amie Paudin,apaudinbc@examiner.com +410,Services,Trenna Clout,tcloutbd@tiny.cc +411,Accounting,Bernita Cumbridge,bcumbridgebe@desdev.cn +412,Accounting,Tripp Cruikshanks,tcruikshanksbf@thetimes.co.uk +413,Training,Jackie Eneas,jeneasbg@studiopress.com +414,Legal,Cece Selwyn,cselwynbh@microsoft.com +415,Training,Ernaline Castell,ecastellbi@hhs.gov +416,Services,Edward De Atta,edebj@google.com.hk +417,Sales,Lenette Akett,lakettbk@usgs.gov +418,Research and Development,Margarethe Behneke,mbehnekebl@furl.net +419,Accounting,Leland Bineham,lbinehambm@biblegateway.com +420,Human Resources,Mehetabel Lutsch,mlutschbn@answers.com +421,Accounting,Candra Bendley,cbendleybo@simplemachines.org +422,Human Resources,Shoshana Herrero,sherrerobp@amazon.co.uk +423,Accounting,Theodore Renoden,trenodenbq@oakley.com +424,Accounting,Chandal Antonetti,cantonettibr@kickstarter.com +425,Services,Elbertina Autie,eautiebs@joomla.org +426,Training,Rakel Bedome,rbedomebt@wikipedia.org +427,Product Management,Leanna Dymick,ldymickbu@creativecommons.org +428,Business Development,Ernst Clyne,eclynebv@ustream.tv +429,Sales,Chryste Colls,ccollsbw@themeforest.net +430,Support,Kippie Alessandretti,kalessandrettibx@123-reg.co.uk +431,Product Management,Waldemar McKearnen,wmckearnenby@berkeley.edu +432,Product Management,Barnie Balshaw,bbalshawbz@angelfire.com +433,Sales,Tiphanie Broinlich,tbroinlichc0@state.tx.us +434,Accounting,Alie Coggins,acogginsc1@wsj.com +435,Accounting,Beatrix McCool,bmccoolc2@webeden.co.uk +436,Engineering,Janela Martinello,jmartinelloc3@wikimedia.org +437,Business Development,Jessamine Josuweit,jjosuweitc4@yandex.ru +438,Human Resources,Renato Scotchmoor,rscotchmoorc5@ucsd.edu +439,Legal,Eilis Wartonby,ewartonbyc6@tmall.com +440,Services,Eldridge Deschlein,edeschleinc7@accuweather.com +441,Engineering,Flory Haugeh,fhaugehc8@storify.com +442,Marketing,Sindee Sebley,ssebleyc9@ucla.edu +443,Marketing,Torin Le Fevre,tleca@nbcnews.com +444,Product Management,Rea McAlinion,rmcalinioncb@reference.com +445,Human Resources,Fina Going,fgoingcc@so-net.ne.jp +446,Research and Development,Marilyn MacRonald,mmacronaldcd@hhs.gov +447,Support,Ly Cyson,lcysonce@dion.ne.jp +448,Product Management,Eloise Durie,eduriecf@nba.com +449,Engineering,Stillman Edmons,sedmonscg@sitemeter.com +450,Training,Tuck Clayden,tclaydench@census.gov +451,Support,Ernesta Hasley,ehasleyci@ebay.com +452,Services,Melina Limrick,mlimrickcj@ask.com +453,Accounting,Marsha Bridewell,mbridewellck@simplemachines.org +454,Business Development,Cahra Megarry,cmegarrycl@about.com +455,Support,Norbert Stobbs,nstobbscm@imageshack.us +456,Marketing,Mira Sorrell,msorrellcn@qq.com +457,Services,Danila Congram,dcongramco@harvard.edu +458,Sales,Katleen Muffin,kmuffincp@rambler.ru +459,Marketing,Prentice Iddiens,piddienscq@google.nl +460,Sales,Brita Talboy,btalboycr@free.fr +461,Business Development,Lynne Eldershaw,leldershawcs@bbc.co.uk +462,Accounting,Madonna Mettricke,mmettrickect@canalblog.com +463,Research and Development,Paulette Boutellier,pboutelliercu@shutterfly.com +464,Training,Rosco Murrells,rmurrellscv@redcross.org +465,Product Management,Antonius Kubicki,akubickicw@opensource.org +466,Marketing,Ody Ansill,oansillcx@google.com.hk +467,Human Resources,Moselle Outibridge,moutibridgecy@unesco.org +468,Business Development,Oralie Hicks,ohickscz@seattletimes.com +469,Research and Development,Kathryn McCreadie,kmccreadied0@pcworld.com +470,Support,Elayne Dearlove,edearloved1@wordpress.com +471,Business Development,Ianthe Alfuso,ialfusod2@si.edu +472,Training,Raynor Chavey,rchaveyd3@networksolutions.com +473,Research and Development,Michel Linturn,mlinturnd4@paypal.com +474,Support,Prent Skirling,pskirlingd5@nationalgeographic.com +475,Support,Jeffrey Brecknock,jbrecknockd6@ihg.com +476,Legal,Ruy Beharrell,rbeharrelld7@nifty.com +477,Services,Pail Gainor,pgainord8@deviantart.com +478,Business Development,Clerc Drife,cdrifed9@fc2.com +479,Product Management,Granger Sollas,gsollasda@telegraph.co.uk +480,Training,Karita Wrixon,kwrixondb@paypal.com +481,Services,Arlene Freeburn,afreeburndc@ameblo.jp +482,Product Management,Lorettalorna Feldbau,lfeldbaudd@1und1.de +483,Human Resources,Michaeline Tipler,mtiplerde@marriott.com +484,Accounting,Ferrell Edscer,fedscerdf@netlog.com +485,Product Management,Lek Rocks,lrocksdg@flickr.com +486,Human Resources,Clemmy Hartzog,chartzogdh@elpais.com +487,Legal,Derk Lehrer,dlehrerdi@hugedomains.com +488,Product Management,Orsa Polo,opolodj@tinypic.com +489,Human Resources,Alessandra Joutapaitis,ajoutapaitisdk@wufoo.com +490,Engineering,Aubry Frere,afreredl@wikipedia.org +491,Marketing,Keith Gethings,kgethingsdm@webnode.com +492,Business Development,Amory Cawdron,acawdrondn@bluehost.com +493,Human Resources,Adlai Gigg,agiggdo@google.com +494,Business Development,Cindee Caddick,ccaddickdp@cmu.edu +495,Marketing,Emile Hanby,ehanbydq@multiply.com +496,Human Resources,Darla Farrant,dfarrantdr@ustream.tv +497,Engineering,Russ Pestricke,rpestrickeds@nasa.gov +498,Marketing,Jaime Freegard,jfreegarddt@aboutads.info +499,Human Resources,Barrie Sargeant,bsargeantdu@slideshare.net +500,Research and Development,Margarete Bowller,mbowllerdv@aol.com +501,Legal,Lynea Farnaby,lfarnabydw@intel.com +502,Training,Gal Simonite,gsimonitedx@so-net.ne.jp +503,Marketing,Scarface Einchcombe,seinchcombedy@chicagotribune.com +504,Marketing,Reidar Rugiero,rrugierodz@xinhuanet.com +505,Training,Willetta Sowersby,wsowersbye0@cam.ac.uk +506,Training,Margery Antrobus,mantrobuse1@foxnews.com +507,Marketing,Mirella Carpenter,mcarpentere2@pinterest.com +508,Support,Domingo Pidwell,dpidwelle3@quantcast.com +509,Engineering,Ferdie McIlhagga,fmcilhaggae4@nbcnews.com +510,Sales,Ruddy Haps,rhapse5@e-recht24.de +511,Product Management,Duky Lardner,dlardnere6@mac.com +512,Product Management,Danny Gutch,dgutche7@state.tx.us +513,Research and Development,Ibbie Oneill,ioneille8@storify.com +514,Research and Development,Uta Cookes,ucookese9@live.com +515,Business Development,Felicdad Borkett,fborkettea@163.com +516,Training,Terry de Quincey,tdeeb@scribd.com +517,Services,Jacynth McCloughen,jmccloughenec@wiley.com +518,Human Resources,Alick Mizzen,amizzened@time.com +519,Research and Development,Thorny Astbery,tastberyee@alexa.com +520,Marketing,Edmon Vowden,evowdenef@ebay.com +521,Product Management,Hubey Callaway,hcallawayeg@mayoclinic.com +522,Research and Development,Regan Brideoke,rbrideokeeh@usnews.com +523,Engineering,Bernelle Llewellen,bllewellenei@cbc.ca +524,Sales,Darell Spencock,dspencockej@tinyurl.com +525,Legal,Tristan Trowel,ttrowelek@goodreads.com +526,Research and Development,Randi Sapshed,rsapshedel@lulu.com +527,Accounting,Lowrance Drayn,ldraynem@nationalgeographic.com +528,Sales,Delmar Slides,dslidesen@cargocollective.com +529,Training,Ward Reijmers,wreijmerseo@blog.com +530,Research and Development,Perla Dashper,pdashperep@msu.edu +531,Marketing,Carr Naughton,cnaughtoneq@nbcnews.com +532,Accounting,Karissa Southway,ksouthwayer@themeforest.net +533,Legal,Blanca Rraundl,brraundles@wordpress.org +534,Training,Anastasia MacAlister,amacalisteret@digg.com +535,Marketing,Milena MacFarland,mmacfarlandeu@pen.io +536,Human Resources,Gwenora Fuente,gfuenteev@who.int +537,Business Development,Christina Smitham,csmithamew@engadget.com +538,Support,Abelard Dowe,adoweex@taobao.com +539,Services,Kerk Dewdney,kdewdneyey@mysql.com +540,Research and Development,Noah Riding,nridingez@mayoclinic.com +541,Human Resources,Sandro Barenski,sbarenskif0@gov.uk +542,Research and Development,Edee Gazzard,egazzardf1@jalbum.net +543,Human Resources,Verge Measen,vmeasenf2@myspace.com +544,Support,Whitman Snibson,wsnibsonf3@wisc.edu +545,Training,Carina Moodie,cmoodief4@umn.edu +546,Engineering,Anetta Maymand,amaymandf5@is.gd +547,Engineering,Amalee Geal,agealf6@archive.org +548,Human Resources,Coralyn Haysham,chayshamf7@livejournal.com +549,Human Resources,Sigfried Codlin,scodlinf8@plala.or.jp +550,Support,Raynell Livezey,rlivezeyf9@google.it +551,Accounting,Ethelin Lappine,elappinefa@bandcamp.com +552,Sales,Theobald Oxx,toxxfb@netvibes.com +553,Support,Granthem Leverette,gleverettefc@symantec.com +554,Support,Ariel Hartus,ahartusfd@scribd.com +555,Training,Olvan Schankelborg,oschankelborgfe@so-net.ne.jp +556,Research and Development,Anallese Blumer,ablumerff@chron.com +557,Services,Lynne Frenchum,lfrenchumfg@wunderground.com +558,Training,Ursulina Serginson,userginsonfh@macromedia.com +559,Sales,Marketa Hargess,mhargessfi@usda.gov +560,Training,Fitzgerald Bramble,fbramblefj@hubpages.com +561,Research and Development,Ema Diable,ediablefk@cdc.gov +562,Legal,Tyson Quincey,tquinceyfl@domainmarket.com +563,Marketing,Brear Sharple,bsharplefm@elegantthemes.com +564,Training,Jordana Renzini,jrenzinifn@oaic.gov.au +565,Legal,Vivi Wetwood,vwetwoodfo@geocities.com +566,Training,Reece Westney,rwestneyfp@unc.edu +567,Product Management,Kial Gent,kgentfq@independent.co.uk +568,Accounting,Babette Chrichton,bchrichtonfr@ibm.com +569,Business Development,Aurelie Gush,agushfs@reference.com +570,Marketing,Cori Lammie,clammieft@theguardian.com +571,Marketing,Birk Grimditch,bgrimditchfu@spotify.com +572,Engineering,Rossie Casbon,rcasbonfv@psu.edu +573,Services,Bertine Iggulden,bigguldenfw@amazon.co.jp +574,Research and Development,Egbert Gudgen,egudgenfx@cdbaby.com +575,Human Resources,Perice Hefford,pheffordfy@sogou.com +576,Business Development,Eben Wormell,ewormellfz@indiatimes.com +577,Business Development,Vallie Riccio,vricciog0@livejournal.com +578,Business Development,Drake Gabbotts,dgabbottsg1@altervista.org +579,Accounting,Stu Ubank,subankg2@arizona.edu +580,Business Development,Althea Kinney,akinneyg3@indiegogo.com +581,Sales,Marlene Brambell,mbrambellg4@com.com +582,Human Resources,Melitta Sandwich,msandwichg5@networksolutions.com +583,Human Resources,Kelsey Bucktrout,kbucktroutg6@mac.com +584,Sales,Sinclair Skentelbery,sskentelberyg7@about.com +585,Business Development,Archie Knappitt,aknappittg8@discuz.net +586,Sales,Harrison Royal,hroyalg9@sun.com +587,Human Resources,Clementine Grishanov,cgrishanovga@marketwatch.com +588,Sales,Winni Jacox,wjacoxgb@xrea.com +589,Sales,Candra Sturzaker,csturzakergc@cyberchimps.com +590,Human Resources,Howey Featenby,hfeatenbygd@bbb.org +591,Research and Development,Tamara Dewen,tdewenge@businessweek.com +592,Engineering,Giustino Cabell,gcabellgf@hubpages.com +593,Accounting,Dynah Smorthwaite,dsmorthwaitegg@purevolume.com +594,Support,Giavani Udy,gudygh@google.ca +595,Engineering,Mickie Hegg,mhegggi@shinystat.com +596,Business Development,Vivian Gatrill,vgatrillgj@instagram.com +597,Services,Evaleen Harniman,eharnimangk@ted.com +598,Product Management,Wendi Craighall,wcraighallgl@nyu.edu +599,Business Development,Agneta Hargate,ahargategm@stanford.edu +600,Product Management,Sarette Lind,slindgn@lycos.com +601,Legal,Bonny Jeandot,bjeandotgo@berkeley.edu +602,Product Management,Ella Seebert,eseebertgp@tripod.com +603,Business Development,Judd Lumley,jlumleygq@furl.net +604,Services,Jens Rouby,jroubygr@wired.com +605,Services,Hailey Dorin,hdorings@smh.com.au +606,Accounting,Tildie Greguoli,tgreguoligt@printfriendly.com +607,Sales,Eryn Picardo,epicardogu@tripadvisor.com +608,Sales,Sophie Belk,sbelkgv@wp.com +609,Training,Harmon Ratnage,hratnagegw@yellowbook.com +610,Product Management,Germayne Baudts,gbaudtsgx@over-blog.com +611,Support,Christel Chaimson,cchaimsongy@vimeo.com +612,Product Management,Nisse Matusson,nmatussongz@blog.com +613,Services,Selia Iori,siorih0@va.gov +614,Sales,Lauralee Scarce,lscarceh1@ebay.co.uk +615,Business Development,Helaina Walpole,hwalpoleh2@blogger.com +616,Business Development,Randee Hymers,rhymersh3@bloomberg.com +617,Support,Vonnie Barfoot,vbarfooth4@ameblo.jp +618,Legal,Klemens Fyfe,kfyfeh5@ocn.ne.jp +619,Services,Worthy Greste,wgresteh6@nymag.com +620,Services,Patric Richley,prichleyh7@sciencedaily.com +621,Sales,Helga Albers,halbersh8@usda.gov +622,Business Development,Muffin Cocks,mcocksh9@apple.com +623,Legal,Henrietta Bentje,hbentjeha@spotify.com +624,Services,Meade Burtonwood,mburtonwoodhb@facebook.com +625,Business Development,Ralina Hargess,rhargesshc@flavors.me +626,Business Development,Luci Yuranovev,lyuranovevhd@oaic.gov.au +627,Support,Alvinia Leftwich,aleftwichhe@bbc.co.uk +628,Research and Development,Davine Matushevich,dmatushevichhf@newsvine.com +629,Sales,Wilmar Regitz,wregitzhg@e-recht24.de +630,Sales,Marian Fancourt,mfancourthh@sun.com +631,Services,Carmelle Owttrim,cowttrimhi@salon.com +632,Business Development,Lindsay Creenan,lcreenanhj@phoca.cz +633,Support,Brendin Yakebovich,byakebovichhk@chron.com +634,Engineering,Brennen Galsworthy,bgalsworthyhl@foxnews.com +635,Legal,Gardy Stampe,gstampehm@shop-pro.jp +636,Services,Sonnnie Firbank,sfirbankhn@samsung.com +637,Business Development,Lawry Cutchie,lcutchieho@geocities.com +638,Sales,Binny Berge,bbergehp@weather.com +639,Services,Bay Alp,balphq@utexas.edu +640,Business Development,Betsey Trahar,btraharhr@arizona.edu +641,Legal,Monique Antunez,mantunezhs@skype.com +642,Support,Phillipe Cockson,pcocksonht@geocities.jp +643,Engineering,Fraser Tatters,ftattershu@cafepress.com +644,Business Development,Lazaro Epton,leptonhv@icq.com +645,Engineering,Elva Camerello,ecamerellohw@sciencedirect.com +646,Human Resources,Winifield Loynes,wloyneshx@stumbleupon.com +647,Product Management,Rosalinde Passingham,rpassinghamhy@webmd.com +648,Business Development,Berti Munson,bmunsonhz@is.gd +649,Business Development,Kaylyn Hallstone,khallstonei0@craigslist.org +650,Services,Hali Winkett,hwinketti1@wired.com +651,Research and Development,Maison Hemeret,mhemereti2@g.co +652,Product Management,Feliza Garnul,fgarnuli3@pinterest.com +653,Legal,Sam Burrill,sburrilli4@pcworld.com +654,Human Resources,Wolfy Andrzejak,wandrzejaki5@nytimes.com +655,Accounting,Bent Goeff,bgoeffi6@bloomberg.com +656,Product Management,Zebulen Emson,zemsoni7@phoca.cz +657,Training,Matias Tace,mtacei8@unesco.org +658,Services,Erin Balsillie,ebalsilliei9@google.cn +659,Training,Nels McKellen,nmckellenia@fema.gov +660,Legal,Lynnelle Capewell,lcapewellib@ca.gov +661,Human Resources,Cal Lishman,clishmanic@taobao.com +662,Accounting,Ulla Allberry,uallberryid@clickbank.net +663,Sales,Falito Pavlata,fpavlataie@cyberchimps.com +664,Support,Maude Ness,mnessif@pinterest.com +665,Product Management,Rahal Ashford,rashfordig@hhs.gov +666,Engineering,Karole Janaud,kjanaudih@rediff.com +667,Training,Cecilia Doyland,cdoylandii@mail.ru +668,Business Development,Avril Boken,abokenij@51.la +669,Accounting,Flinn Ansell,fansellik@ftc.gov +670,Accounting,Bryanty Suff,bsuffil@dyndns.org +671,Marketing,Claretta Cappleman,ccapplemanim@google.com.au +672,Legal,Trenna Sabatier,tsabatierin@intel.com +673,Human Resources,Robinet Kybert,rkybertio@bing.com +674,Accounting,Darice Bulfield,dbulfieldip@mac.com +675,Engineering,Devland Maier,dmaieriq@sfgate.com +676,Product Management,Gearalt Dimont,gdimontir@noaa.gov +677,Training,Cy Franzonello,cfranzonellois@eventbrite.com +678,Accounting,Sharyl Leete,sleeteit@wikispaces.com +679,Accounting,Stesha Scotchmur,sscotchmuriu@rambler.ru +680,Accounting,Chance Wadesworth,cwadesworthiv@sohu.com +681,Sales,Haze Staining,hstainingiw@cocolog-nifty.com +682,Engineering,Jacenta Stobbs,jstobbsix@bandcamp.com +683,Business Development,Rudolf Bathowe,rbathoweiy@usnews.com +684,Sales,Babette Rhead,brheadiz@fotki.com +685,Engineering,Lonni Peasnone,lpeasnonej0@yahoo.co.jp +686,Accounting,Abigale Antal,aantalj1@goo.gl +687,Accounting,Shannen Phelit,sphelitj2@dropbox.com +688,Training,Masha Bateup,mbateupj3@psu.edu +689,Services,Annette Aldam,aaldamj4@gravatar.com +690,Business Development,Rosa Elnough,relnoughj5@cbsnews.com +691,Business Development,Claresta Nolot,cnolotj6@wordpress.org +692,Accounting,Channa Surmeir,csurmeirj7@so-net.ne.jp +693,Research and Development,Hagan Brundell,hbrundellj8@mapy.cz +694,Engineering,Judie Kestell,jkestellj9@twitpic.com +695,Human Resources,Lucius Mattek,lmattekja@wordpress.org +696,Human Resources,Demeter Stormouth,dstormouthjb@prweb.com +697,Human Resources,Allard Friman,afrimanjc@whitehouse.gov +698,Human Resources,Annissa Audley,aaudleyjd@drupal.org +699,Business Development,Waylen Annand,wannandje@slideshare.net +700,Accounting,Krissie Spacie,kspaciejf@craigslist.org +701,Support,Edyth Phonix,ephonixjg@virginia.edu +702,Support,Dora Nehl,dnehljh@amazonaws.com +703,Support,Geralda Moar,gmoarji@about.com +704,Human Resources,Anabella Gaitung,agaitungjj@simplemachines.org +705,Engineering,Andre Luckwell,aluckwelljk@samsung.com +706,Sales,Murdoch Jerdein,mjerdeinjl@google.co.uk +707,Research and Development,Cariotta Harbottle,charbottlejm@1und1.de +708,Sales,Bat Headington,bheadingtonjn@columbia.edu +709,Sales,Koral Regler,kreglerjo@e-recht24.de +710,Sales,Laetitia Kees,lkeesjp@yahoo.co.jp +711,Research and Development,Modestia Demko,mdemkojq@icq.com +712,Accounting,Lissie Kensitt,lkensittjr@deviantart.com +713,Sales,Athene Huglin,ahuglinjs@free.fr +714,Research and Development,Hastings Asp,haspjt@goo.ne.jp +715,Human Resources,Silvanus Debow,sdebowju@telegraph.co.uk +716,Accounting,Birgitta D'Andrea,bdandreajv@yahoo.co.jp +717,Services,Muhammad Swindon,mswindonjw@ucoz.ru +718,Training,Malina Barnish,mbarnishjx@google.cn +719,Product Management,Tedda Arnaudi,tarnaudijy@paginegialle.it +720,Legal,Debbie Romero,dromerojz@shinystat.com +721,Product Management,Bella Nehl,bnehlk0@hugedomains.com +722,Business Development,Leighton Fearnall,lfearnallk1@elpais.com +723,Services,Harriet Fairbanks,hfairbanksk2@ucla.edu +724,Training,Jaquelyn Browne,jbrownek3@newsvine.com +725,Sales,Brittani Le Page,blek4@about.me +726,Research and Development,Arden Clayal,aclayalk5@arizona.edu +727,Engineering,Miltie Lafford,mlaffordk6@storify.com +728,Engineering,Ezra Stichall,estichallk7@ox.ac.uk +729,Product Management,Clarance Seamans,cseamansk8@latimes.com +730,Human Resources,Allissa Bools,aboolsk9@nydailynews.com +731,Support,Constanta Alexsandrov,calexsandrovka@reuters.com +732,Accounting,King Cushion,kcushionkb@dailymotion.com +733,Accounting,Panchito Halsall,phalsallkc@bandcamp.com +734,Support,Jayme Yeend,jyeendkd@addthis.com +735,Legal,Jeffy Pargeter,jpargeterke@meetup.com +736,Marketing,Jo ann Hiskey,jannkf@mapquest.com +737,Business Development,Fidelio Kubicka,fkubickakg@tiny.cc +738,Business Development,Glynn Blevin,gblevinkh@instagram.com +739,Marketing,Denys Luckes,dluckeski@umich.edu +740,Product Management,Deeanne Langland,dlanglandkj@macromedia.com +741,Services,Ringo Cauldwell,rcauldwellkk@exblog.jp +742,Marketing,Constantino Harring,charringkl@twitter.com +743,Marketing,Myrilla Buckel,mbuckelkm@soup.io +744,Sales,Nikola Rainbow,nrainbowkn@ning.com +745,Engineering,Claudine Sharer,csharerko@studiopress.com +746,Business Development,Joete Schettini,jschettinikp@un.org +747,Product Management,Illa Butter,ibutterkq@mozilla.org +748,Product Management,Pinchas Stearns,pstearnskr@phoca.cz +749,Sales,Gearard Nockells,gnockellsks@unc.edu +750,Human Resources,Vivyanne Screas,vscreaskt@stanford.edu +751,Support,Ray Dagger,rdaggerku@networkadvertising.org +752,Accounting,Malvina Spini,mspinikv@google.pl +753,Marketing,Sophia O'Kielt,sokieltkw@addtoany.com +754,Sales,Kristoforo Guillard,kguillardkx@facebook.com +755,Product Management,Vladimir Frangione,vfrangioneky@drupal.org +756,Support,Rosita Hurd,rhurdkz@smugmug.com +757,Business Development,Geneva Frammingham,gframminghaml0@bing.com +758,Engineering,Tadeo Melmar,tmelmarl1@opera.com +759,Accounting,Melina Quartly,mquartlyl2@cocolog-nifty.com +760,Training,Eldin Grundey,egrundeyl3@drupal.org +761,Engineering,Daphene Brodeau,dbrodeaul4@cocolog-nifty.com +762,Research and Development,Dannie Normadell,dnormadelll5@sphinn.com +763,Product Management,Boonie Evelyn,bevelynl6@sohu.com +764,Human Resources,Damaris Kobes,dkobesl7@arizona.edu +765,Business Development,Davy Lawrenson,dlawrensonl8@bbb.org +766,Legal,Licha Galiford,lgalifordl9@cbc.ca +767,Product Management,Pen Treat,ptreatla@mysql.com +768,Research and Development,Gill Allward,gallwardlb@vinaora.com +769,Business Development,Sterne Baxstar,sbaxstarlc@kickstarter.com +770,Engineering,Ulick Grishanin,ugrishaninld@nsw.gov.au +771,Support,Bria Lawden,blawdenle@hubpages.com +772,Product Management,Orella Covil,ocovillf@digg.com +773,Accounting,Ari Haburne,ahaburnelg@wisc.edu +774,Accounting,Sonny Dimmne,sdimmnelh@techcrunch.com +775,Training,Tamar Nansom,tnansomli@geocities.jp +776,Product Management,Bartolemo Joanaud,bjoanaudlj@weebly.com +777,Research and Development,Denise Fever,dfeverlk@oakley.com +778,Support,Charmain Heads,cheadsll@weather.com +779,Support,Arabele Jenne,ajennelm@shareasale.com +780,Support,Friedrick Battrick,fbattrickln@histats.com +781,Engineering,Berkley Fadian,bfadianlo@yahoo.com +782,Engineering,Gabriel Bernadzki,gbernadzkilp@google.de +783,Business Development,Kasey Stinchcombe,kstinchcombelq@nps.gov +784,Product Management,Dareen Gelly,dgellylr@irs.gov +785,Support,Zilvia Zealey,zzealeyls@cdc.gov +786,Research and Development,Gerhardt Housen,ghousenlt@baidu.com +787,Legal,Tadio Zaniolo,tzaniololu@nifty.com +788,Product Management,Pascal Dron,pdronlv@hao123.com +789,Sales,Aurie Murrum,amurrumlw@wunderground.com +790,Engineering,Farr Truss,ftrusslx@tinypic.com +791,Services,Bryna Patise,bpatisely@tmall.com +792,Support,Kirstyn Scurrey,kscurreylz@reddit.com +793,Support,Adrienne Kearford,akearfordm0@instagram.com +794,Engineering,Arther Doerling,adoerlingm1@cbc.ca +795,Sales,Jillian Salaman,jsalamanm2@unblog.fr +796,Services,Hillier Basnett,hbasnettm3@github.io +797,Training,Renell Krier,rkrierm4@si.edu +798,Research and Development,Demott Szymon,dszymonm5@ox.ac.uk +799,Human Resources,Padraig Amberger,pambergerm6@chronoengine.com +800,Legal,Malia Firman,mfirmanm7@chronoengine.com +801,Research and Development,Sergent Sabey,ssabeym8@dailymotion.com +802,Product Management,Zahara Tribbeck,ztribbeckm9@elpais.com +803,Human Resources,Sonnie Aymes,saymesma@nba.com +804,Sales,Geoffry Jellis,gjellismb@zimbio.com +805,Marketing,Tabbie Witter,twittermc@blogtalkradio.com +806,Support,Benji Switland,bswitlandmd@howstuffworks.com +807,Accounting,Eleanore Schaumann,eschaumannme@usnews.com +808,Accounting,Sanderson Copcott,scopcottmf@yahoo.com +809,Marketing,Stacy Crosby,scrosbymg@slashdot.org +810,Sales,Clarance Scotford,cscotfordmh@google.it +811,Business Development,Ameline Marzelle,amarzellemi@dyndns.org +812,Engineering,Barbette Knowlton,bknowltonmj@ameblo.jp +813,Accounting,Marla Barbey,mbarbeymk@smh.com.au +814,Accounting,Johann Reijmers,jreijmersml@fema.gov +815,Sales,Jsandye Pilbury,jpilburymm@dmoz.org +816,Services,Conan Pollitt,cpollittmn@umich.edu +817,Human Resources,Petra Moncaster,pmoncastermo@ox.ac.uk +818,Sales,Nolana Tweed,ntweedmp@photobucket.com +819,Product Management,Bree Harrell,bharrellmq@icio.us +820,Training,Kelcie Jalland,kjallandmr@patch.com +821,Legal,Constantin Gibbings,cgibbingsms@deliciousdays.com +822,Human Resources,Hardy Butterley,hbutterleymt@etsy.com +823,Legal,Eli Ratcliffe,eratcliffemu@hostgator.com +824,Business Development,Coralyn Belone,cbelonemv@cbsnews.com +825,Accounting,Vanya Grzelewski,vgrzelewskimw@zimbio.com +826,Engineering,Gregorio Dixcey,gdixceymx@mashable.com +827,Product Management,Jackie Line,jlinemy@si.edu +828,Services,Lek Ahmad,lahmadmz@un.org +829,Sales,Frayda Blabber,fblabbern0@vistaprint.com +830,Marketing,Hammad Marrow,hmarrown1@reuters.com +831,Support,Zuzana Duffer,zduffern2@naver.com +832,Services,Coretta Duley,cduleyn3@thetimes.co.uk +833,Human Resources,Adina Buckthorp,abuckthorpn4@cornell.edu +834,Sales,Elwyn Poole,epoolen5@slashdot.org +835,Product Management,Whitman Greenard,wgreenardn6@deliciousdays.com +836,Engineering,Gerri Batterham,gbatterhamn7@deliciousdays.com +837,Marketing,Ivy Cracker,icrackern8@eventbrite.com +838,Product Management,Cyrille Winchurst,cwinchurstn9@tinyurl.com +839,Legal,Delmor Jansema,djansemana@delicious.com +840,Research and Development,Samantha Searl,ssearlnb@pcworld.com +841,Sales,Raddy Abrahamsson,rabrahamssonnc@sun.com +842,Services,Fitzgerald Wolver,fwolvernd@github.io +843,Support,Melvin Haselwood,mhaselwoodne@bandcamp.com +844,Business Development,Hussein Lynds,hlyndsnf@360.cn +845,Business Development,Pryce Voce,pvoceng@scientificamerican.com +846,Business Development,Beilul Thirtle,bthirtlenh@google.nl +847,Legal,Bev Chappel,bchappelni@chronoengine.com +848,Marketing,Britni Gallifont,bgallifontnj@ihg.com +849,Support,Rudolfo Itzhaiek,ritzhaieknk@dell.com +850,Business Development,Dunc Clausewitz,dclausewitznl@psu.edu +851,Human Resources,Allan Thal,athalnm@upenn.edu +852,Marketing,Glennie Lines,glinesnn@free.fr +853,Legal,Wilhelmina Shivlin,wshivlinno@oracle.com +854,Training,Loralyn Tulley,ltulleynp@newyorker.com +855,Research and Development,Geoff Peppin,gpeppinnq@mit.edu +856,Marketing,Abe Physick,aphysicknr@go.com +857,Legal,Deva Tetley,dtetleyns@auda.org.au +858,Services,Gregory Cutridge,gcutridgent@nbcnews.com +859,Accounting,Thaxter Coldman,tcoldmannu@angelfire.com +860,Services,Cathrine Janney,cjanneynv@miitbeian.gov.cn +861,Product Management,Stace Grunwald,sgrunwaldnw@si.edu +862,Human Resources,Fayth Mountfort,fmountfortnx@fastcompany.com +863,Sales,Starla Prettyjohn,sprettyjohnny@surveymonkey.com +864,Human Resources,Donavon Brasted,dbrastednz@dropbox.com +865,Product Management,Deeanne Flacknoe,dflacknoeo0@sohu.com +866,Marketing,Ewen Gian,egiano1@51.la +867,Support,Rodie Sander,rsandero2@slideshare.net +868,Engineering,Darb Lapidus,dlapiduso3@whitehouse.gov +869,Services,Morgan Jiggins,mjigginso4@usgs.gov +870,Business Development,Paloma Quadri,pquadrio5@ask.com +871,Services,Deedee Heliet,dhelieto6@goodreads.com +872,Training,Glennis Jerke,gjerkeo7@ezinearticles.com +873,Sales,Dasi Stormonth,dstormontho8@pinterest.com +874,Business Development,Tristan McMeeking,tmcmeekingo9@forbes.com +875,Sales,Sadye Airth,sairthoa@addtoany.com +876,Human Resources,Normand Lilliman,nlillimanob@nature.com +877,Sales,Hansiain Gyenes,hgyenesoc@networkadvertising.org +878,Support,Renado Waterland,rwaterlandod@reverbnation.com +879,Legal,Howie Poon,hpoonoe@samsung.com +880,Training,Derrick Bambrugh,dbambrughof@free.fr +881,Training,Manolo Knapton,mknaptonog@fema.gov +882,Services,Alika Illes,aillesoh@blogger.com +883,Human Resources,Haily Gostage,hgostageoi@digg.com +884,Legal,Aggie Baythrop,abaythropoj@google.pl +885,Accounting,Georg Brimley,gbrimleyok@theatlantic.com +886,Accounting,Cody Matejovsky,cmatejovskyol@google.pl +887,Engineering,Milo Jarvie,mjarvieom@mayoclinic.com +888,Human Resources,Chaddy Tow,ctowon@reference.com +889,Human Resources,Beryl Grafham,bgrafhamoo@networkadvertising.org +890,Training,Trudi Olesen,tolesenop@shareasale.com +891,Marketing,Gardener MacNockater,gmacnockateroq@theguardian.com +892,Training,Hortensia Machen,hmachenor@paypal.com +893,Training,Vlad Rosier,vrosieros@dot.gov +894,Services,Elli Shieldon,eshieldonot@baidu.com +895,Support,Shannon Capron,scapronou@netlog.com +896,Business Development,Maggie Rugiero,mrugieroov@parallels.com +897,Support,Gustaf Sokell,gsokellow@discovery.com +898,Business Development,Nestor Caesmans,ncaesmansox@earthlink.net +899,Product Management,Ring Showl,rshowloy@dagondesign.com +900,Product Management,Curt Tall,ctalloz@mapquest.com +901,Support,Nollie Derrett,nderrettp0@comcast.net +902,Engineering,Urson Wither,uwitherp1@lulu.com +903,Support,Imogene Yandell,iyandellp2@yellowbook.com +904,Support,Kent Meier,kmeierp3@ebay.co.uk +905,Sales,Ashil Birtles,abirtlesp4@artisteer.com +906,Sales,Rowney Twyford,rtwyfordp5@dagondesign.com +907,Support,Mathew Emlen,memlenp6@nymag.com +908,Sales,Corny O'Boyle,coboylep7@deliciousdays.com +909,Product Management,Chelsy MacLeese,cmacleesep8@sciencedirect.com +910,Business Development,Nahum Hanigan,nhaniganp9@techcrunch.com +911,Training,Florrie Brundle,fbrundlepa@jugem.jp +912,Marketing,Antonie MacConnell,amacconnellpb@thetimes.co.uk +913,Engineering,Cati Fumagallo,cfumagallopc@flickr.com +914,Engineering,Russell De Caville,rdepd@china.com.cn +915,Sales,Orelee Waymont,owaymontpe@fc2.com +916,Accounting,Terri Simcock,tsimcockpf@usda.gov +917,Engineering,Olenka Leah,oleahpg@ucoz.com +918,Research and Development,Calvin Petrushkevich,cpetrushkevichph@elpais.com +919,Human Resources,Rana Cottage,rcottagepi@merriam-webster.com +920,Accounting,Damiano Beeby,dbeebypj@independent.co.uk +921,Sales,Reinaldo Castletine,rcastletinepk@twitpic.com +922,Marketing,Wernher Watford,wwatfordpl@ucsd.edu +923,Accounting,Noreen Dudding,nduddingpm@fda.gov +924,Marketing,Dane Crevy,dcrevypn@pcworld.com +925,Services,Nydia Dressell,ndressellpo@hc360.com +926,Legal,Petronella Durman,pdurmanpp@forbes.com +927,Human Resources,Regen Finlaison,rfinlaisonpq@sina.com.cn +928,Training,Carlos Scopyn,cscopynpr@qq.com +929,Research and Development,Natka Egre,negreps@bluehost.com +930,Business Development,Noak Strutton,nstruttonpt@mac.com +931,Sales,Astrid Glass,aglasspu@miibeian.gov.cn +932,Training,Darby Pickover,dpickoverpv@google.com +933,Services,Lyn Dunsmore,ldunsmorepw@kickstarter.com +934,Marketing,Nalani Torre,ntorrepx@zimbio.com +935,Legal,Laurette Noli,lnolipy@cyberchimps.com +936,Support,Elena Noteyoung,enoteyoungpz@devhub.com +937,Product Management,Tobias Duddin,tduddinq0@craigslist.org +938,Services,Ketty Ridolfi,kridolfiq1@wikipedia.org +939,Product Management,Dacie Voisey,dvoiseyq2@addtoany.com +940,Human Resources,Hillie Wort,hwortq3@ask.com +941,Accounting,Verla Kettlestringes,vkettlestringesq4@github.com +942,Engineering,Jamil Doohey,jdooheyq5@usatoday.com +943,Services,Vale Grollmann,vgrollmannq6@dagondesign.com +944,Support,Morten Laurentin,mlaurentinq7@answers.com +945,Services,Delcina Folley,dfolleyq8@msu.edu +946,Accounting,Lina Treby,ltrebyq9@weibo.com +947,Legal,Philip Wooldridge,pwooldridgeqa@scientificamerican.com +948,Sales,Aurelea Sharples,asharplesqb@360.cn +949,Support,Jerrold Burmaster,jburmasterqc@google.cn +950,Services,Stevie Lorkins,slorkinsqd@accuweather.com +951,Training,Alys Browne,abrowneqe@geocities.com +952,Training,Addi Older,aolderqf@vistaprint.com +953,Human Resources,Joline Sopp,jsoppqg@linkedin.com +954,Services,Roddie McCane,rmccaneqh@abc.net.au +955,Product Management,Ricky Gallemore,rgallemoreqi@usda.gov +956,Human Resources,Gregoire Soanes,gsoanesqj@rakuten.co.jp +957,Services,Herman Casburn,hcasburnqk@java.com +958,Human Resources,Koo MacIlhargy,kmacilhargyql@gizmodo.com +959,Engineering,Brittni Masser,bmasserqm@pbs.org +960,Services,Lenka Asel,laselqn@people.com.cn +961,Business Development,Byrom Blacksland,bblackslandqo@epa.gov +962,Human Resources,Andrew Howsin,ahowsinqp@deliciousdays.com +963,Support,Richard Spykings,rspykingsqq@wikimedia.org +964,Legal,Jeramie Fawcett,jfawcettqr@soup.io +965,Business Development,Cece Barnes,cbarnesqs@answers.com +966,Human Resources,Chickie Gerant,cgerantqt@wunderground.com +967,Support,Killie Guillou,kguillouqu@histats.com +968,Marketing,Pyotr Braferton,pbrafertonqv@yahoo.co.jp +969,Human Resources,Vaughn Lansberry,vlansberryqw@privacy.gov.au +970,Services,Janice Bettis,jbettisqx@slideshare.net +971,Research and Development,Mortimer Heading,mheadingqy@blinklist.com +972,Training,Quentin Trusty,qtrustyqz@dell.com +973,Human Resources,Katinka Shanklin,kshanklinr0@free.fr +974,Business Development,Selena Bustard,sbustardr1@google.de +975,Accounting,Tedda Benez,tbenezr2@is.gd +976,Research and Development,Hakim Lugsdin,hlugsdinr3@webnode.com +977,Engineering,Dorie Skeete,dskeeter4@homestead.com +978,Research and Development,Chere Kobieriecki,ckobierieckir5@hc360.com +979,Product Management,Salim Moulsdall,smoulsdallr6@cloudflare.com +980,Human Resources,Oralla Gerin,ogerinr7@constantcontact.com +981,Marketing,Emmerich Gelling,egellingr8@psu.edu +982,Training,Hermione Anstie,hanstier9@hexun.com +983,Research and Development,Ilka Gavigan,igaviganra@stumbleupon.com +984,Accounting,Ginevra Scholar,gscholarrb@bing.com +985,Support,Candra Husk,chuskrc@umn.edu +986,Services,Emilee Peterffy,epeterffyrd@pen.io +987,Services,Robinett Eblein,rebleinre@mediafire.com +988,Training,Trever Beyer,tbeyerrf@cnet.com +989,Legal,Toiboid Crosser,tcrosserrg@digg.com +990,Legal,Maryjane Vreede,mvreederh@state.tx.us +991,Human Resources,Wernher Ecles,weclesri@dropbox.com +992,Sales,Melisandra Vane,mvanerj@sciencedirect.com +993,Human Resources,Normy Fenelow,nfenelowrk@addthis.com +994,Human Resources,Elie Simms,esimmsrl@sfgate.com +995,Training,Kaycee Millom,kmillomrm@networksolutions.com +996,Engineering,Zulema Eldershaw,zeldershawrn@blog.com +997,Research and Development,Rhiamon Geldard,rgeldardro@ted.com +998,Marketing,Rab Sergant,rsergantrp@artisteer.com +999,Research and Development,Dorelle MacVagh,dmacvaghrq@bravesites.com +1000,Support,Elvira Bucham,ebuchamrr@jigsy.com