diff --git a/.gitignore b/.gitignore index 9885b66..46d9564 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .vscode tmp /ripoff +/ripoff-export .DS_Store /export diff --git a/README.md b/README.md index baf6bc7..8a839a0 100644 --- a/README.md +++ b/README.md @@ -95,13 +95,34 @@ rows: An experimental command has been added to generate ripoff files from your database. This may be useful to users just starting to use ripoff who don't have so much fake data that templating is required yet. -Currently, it attempts to export all data from all tables into a single ripoff file. You can use the `--exclude` flag to exclude specific tables from the export: +Currently, it attempts to export all data from all tables into a single ripoff file. You can use the `--exclude` flag to exclude specific tables from the export, and the `--exclude-columns` flag to exclude specific columns: ```bash # Export all tables except 'users' and 'audit_logs' ripoff-export --exclude users --exclude audit_logs /path/to/export + +# Exclude created_at and updated_at columns from all tables +ripoff-export --exclude-columns created_at --exclude-columns updated_at /path/to/export + +# Exclude email column only from users table +ripoff-export --exclude-columns users.email /path/to/export + +# Combine exclusions: exclude created_at globally and email from users table +ripoff-export --exclude-columns created_at --exclude-columns users.email /path/to/export + +# Combine table and column exclusions +ripoff-export --exclude audit_logs --exclude-columns created_at --exclude-columns users.email /path/to/export ``` +## Column Exclusion Format + +The `--exclude-columns` flag accepts two formats: + +- `table.column` - Excludes a specific column from a specific table +- `column` - Excludes the column from ALL tables + +The latter format is especially useful if you have generated columns on every table like `created_at` or `updated_at` to avoid noisy updates when you re-export your data. + In the future, additional flags may be added to allow you to include tables, add arbitrary `WHERE` conditions, modify the row id/key, export multiple files, or use existing templates. ## Installation diff --git a/cmd/ripoff-export/ripoff_export.go b/cmd/ripoff-export/ripoff_export.go index 3da8e45..8039f8d 100644 --- a/cmd/ripoff-export/ripoff_export.go +++ b/cmd/ripoff-export/ripoff_export.go @@ -23,11 +23,13 @@ func errAttr(err error) slog.Attr { func main() { // Define flags var excludeTables stringSliceFlag + var excludeColumns stringSliceFlag flag.Var(&excludeTables, "exclude", "Exclude specific tables from export (can be specified multiple times)") - + flag.Var(&excludeColumns, "exclude-columns", "Exclude specific columns from export. Format: 'table.column' or 'column' (can be specified multiple times)") + // Parse flags flag.Parse() - + dburl := os.Getenv("DATABASE_URL") if dburl == "" { slog.Error("DATABASE_URL env variable is required") @@ -97,8 +99,8 @@ func main() { } }() - // Pass the excluded tables to the export function - ripoffFile, err := ripoff.ExportToRipoff(ctx, tx, excludeTables) + // Pass the excluded tables and columns to the export function + ripoffFile, err := ripoff.ExportToRipoff(ctx, tx, excludeTables, excludeColumns) if err != nil { slog.Error("Could not assemble ripoff file from database", errAttr(err)) os.Exit(1) diff --git a/export.go b/export.go index 8933263..1fee25d 100644 --- a/export.go +++ b/export.go @@ -10,6 +10,48 @@ import ( "github.com/lib/pq" ) +// parseColumnExclusions parses column exclusion specifications and returns +// table-specific exclusions and global column exclusions. +func parseColumnExclusions(excludeColumns []string) (map[string][]string, []string) { + tableSpecific := make(map[string][]string) + var globalColumns []string + + for _, spec := range excludeColumns { + parts := strings.SplitN(spec, ".", 2) + if len(parts) == 2 { + // table.column format + table, column := parts[0], parts[1] + tableSpecific[table] = append(tableSpecific[table], column) + } else { + // column format - applies to all tables + globalColumns = append(globalColumns, spec) + } + } + + return tableSpecific, globalColumns +} + +// shouldExcludeColumn checks if a column should be excluded based on exclusion rules. +func shouldExcludeColumn(table, column string, tableSpecific map[string][]string, globalColumns []string) bool { + // Check global column exclusions + for _, globalCol := range globalColumns { + if column == globalCol { + return true + } + } + + // Check table-specific exclusions + if excludedCols, exists := tableSpecific[table]; exists { + for _, excludedCol := range excludedCols { + if column == excludedCol { + return true + } + } + } + + return false +} + type RowMissingDependency struct { Row Row ConstraintMapKey [3]string @@ -17,33 +59,38 @@ type RowMissingDependency struct { // Exports all rows in the database to a ripoff file. // excludeTables is a list of table names to exclude from the export. -func ExportToRipoff(ctx context.Context, tx pgx.Tx, excludeTables []string) (RipoffFile, error) { +// excludeColumns is a list of column specifications to exclude from the export. +// Format: "table.column" (exclude column from specific table) or "column" (exclude column from all tables). +func ExportToRipoff(ctx context.Context, tx pgx.Tx, excludeTables []string, excludeColumns []string) (RipoffFile, error) { ripoffFile := RipoffFile{ Rows: map[string]Row{}, } + // Parse column exclusions + tableSpecificExclusions, globalColumnExclusions := parseColumnExclusions(excludeColumns) + // We use primary keys to determine what columns to use as row keys. primaryKeyResult, err := getPrimaryKeys(ctx, tx) if err != nil { return ripoffFile, err } - + // Remove excluded tables from the primary keys for _, table := range excludeTables { delete(primaryKeyResult, table) } - + // We use foreign keys to reference other rows using the table_name:literal(...) syntax. foreignKeyResult, err := getForeignKeysResult(ctx, tx) if err != nil { return ripoffFile, err } - + // Remove excluded tables from foreign key results for _, table := range excludeTables { delete(foreignKeyResult, table) } - + // A map from [table,column] -> ForeignKey for single column foreign keys. singleColumnFkeyMap := map[[2]string]*ForeignKey{} // A map from [table,constraintName,values] -> rowKey. @@ -59,18 +106,27 @@ func ExportToRipoff(ctx context.Context, tx pgx.Tx, excludeTables []string) (Rip missingDependencies := []RowMissingDependency{} for table, primaryKeys := range primaryKeyResult { - columns := make([]string, len(foreignKeyResult[table].Columns)) - // Due to yaml limitations, ripoff treats all data as nullable text on import and export. - for i, column := range foreignKeyResult[table].Columns { - columns[i] = fmt.Sprintf("CAST(%s AS TEXT)", pq.QuoteIdentifier(column)) + // Filter out excluded columns from the foreign key result columns + var filteredColumns []string + for _, column := range foreignKeyResult[table].Columns { + if !shouldExcludeColumn(table, column, tableSpecificExclusions, globalColumnExclusions) { + filteredColumns = append(filteredColumns, fmt.Sprintf("CAST(%s AS TEXT)", pq.QuoteIdentifier(column))) + } + } + + // Skip table if no columns remain after filtering + if len(filteredColumns) == 0 { + continue } - selectQuery := fmt.Sprintf("SELECT %s FROM %s;", strings.Join(columns, ", "), pq.QuoteIdentifier(table)) + + selectQuery := fmt.Sprintf("SELECT %s FROM %s;", strings.Join(filteredColumns, ", "), pq.QuoteIdentifier(table)) rows, err := tx.Query(ctx, selectQuery) if err != nil { return RipoffFile{}, err } defer rows.Close() fields := rows.FieldDescriptions() + for rows.Next() { columnsRaw, err := rows.Values() if err != nil { diff --git a/export_test.go b/export_test.go index 6d72d77..b0b5d8b 100644 --- a/export_test.go +++ b/export_test.go @@ -23,7 +23,7 @@ func runExportTestData(t *testing.T, ctx context.Context, tx pgx.Tx, testDir str require.NoError(t, err) // Generate new ripoff file. - ripoffFile, err := ExportToRipoff(ctx, tx, []string{}) + ripoffFile, err := ExportToRipoff(ctx, tx, []string{}, []string{}) require.NoError(t, err) // Ensure ripoff file matches expected output. @@ -140,7 +140,7 @@ func TestExcludeFlag(t *testing.T) { // Test 1: Exclude a single table t.Run("Single exclude", func(t *testing.T) { - ripoffFile, err := ExportToRipoff(ctx, tx, []string{"exclude_me"}) + ripoffFile, err := ExportToRipoff(ctx, tx, []string{"exclude_me"}, []string{}) require.NoError(t, err) // Verify that ripoffFile.Rows contains rows from include_me but not exclude_me @@ -165,10 +165,10 @@ func TestExcludeFlag(t *testing.T) { // We should have rows from include_me require.True(t, hasIncludeMe, "Expected to find rows from include_me table") - + // We should NOT have rows from exclude_me require.False(t, hasExcludeMe, "Found rows from exclude_me table even though it was excluded") - + // We should have rows from also_exclude_me (since it wasn't excluded in this test) require.True(t, hasAlsoExcludeMe, "Expected to find rows from also_exclude_me table") @@ -199,7 +199,7 @@ func TestExcludeFlag(t *testing.T) { // Test 2: Exclude multiple tables t.Run("Multiple excludes", func(t *testing.T) { - ripoffFile, err := ExportToRipoff(ctx, tx, []string{"exclude_me", "also_exclude_me"}) + ripoffFile, err := ExportToRipoff(ctx, tx, []string{"exclude_me", "also_exclude_me"}, []string{}) require.NoError(t, err) // Verify that ripoffFile.Rows contains rows from include_me but not from the excluded tables @@ -224,10 +224,10 @@ func TestExcludeFlag(t *testing.T) { // We should have rows from include_me require.True(t, hasIncludeMe, "Expected to find rows from include_me table") - + // We should NOT have rows from exclude_me require.False(t, hasExcludeMe, "Found rows from exclude_me table even though it was excluded") - + // We should NOT have rows from also_exclude_me require.False(t, hasAlsoExcludeMe, "Found rows from also_exclude_me table even though it was excluded") @@ -256,3 +256,141 @@ func TestExcludeFlag(t *testing.T) { require.Equal(t, 0, alsoExcludeCount, "Expected 0 rows from also_exclude_me table") }) } + +// TestExcludeColumnsFlag tests that the exclude-columns flag properly excludes columns from export +func TestExcludeColumnsFlag(t *testing.T) { + envUrl := os.Getenv("RIPOFF_TEST_DATABASE_URL") + if envUrl == "" { + envUrl = "postgres:///ripoff-test-db" + } + ctx := context.Background() + conn, err := pgx.Connect(ctx, envUrl) + if err != nil { + require.NoError(t, err) + } + defer conn.Close(ctx) + + // Start a transaction that we'll roll back at the end + tx, err := conn.Begin(ctx) + require.NoError(t, err) + defer func() { + err := tx.Rollback(ctx) + require.NoError(t, err) + }() + + // Create test tables with timestamped columns + _, err = tx.Exec(ctx, ` + CREATE TABLE users ( + id SERIAL PRIMARY KEY, + name TEXT, + email TEXT, + created_at TIMESTAMP DEFAULT NOW(), + updated_at TIMESTAMP DEFAULT NOW() + ); + + CREATE TABLE posts ( + id SERIAL PRIMARY KEY, + title TEXT, + content TEXT, + user_id INTEGER REFERENCES users(id), + created_at TIMESTAMP DEFAULT NOW(), + updated_at TIMESTAMP DEFAULT NOW() + ); + + INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com'), ('Bob', 'bob@example.com'); + INSERT INTO posts (title, content, user_id) VALUES + ('Post 1', 'Content 1', 1), + ('Post 2', 'Content 2', 1), + ('Post 3', 'Content 3', 2); + `) + require.NoError(t, err) + + // Test 1: Exclude global columns (created_at, updated_at) + t.Run("Global column exclusion", func(t *testing.T) { + ripoffFile, err := ExportToRipoff(ctx, tx, []string{}, []string{"created_at", "updated_at"}) + require.NoError(t, err) + + // Verify that no row contains created_at or updated_at columns + for rowId, row := range ripoffFile.Rows { + _, hasCreatedAt := row["created_at"] + _, hasUpdatedAt := row["updated_at"] + require.False(t, hasCreatedAt, "Row %s should not have created_at column", rowId) + require.False(t, hasUpdatedAt, "Row %s should not have updated_at column", rowId) + + // But should still have other columns + tableName := strings.Split(rowId, ":")[0] + switch tableName { + case "users": + _, hasName := row["name"] + _, hasEmail := row["email"] + require.True(t, hasName, "Row %s should have name column", rowId) + require.True(t, hasEmail, "Row %s should have email column", rowId) + case "posts": + _, hasTitle := row["title"] + _, hasContent := row["content"] + require.True(t, hasTitle, "Row %s should have title column", rowId) + require.True(t, hasContent, "Row %s should have content column", rowId) + } + } + }) + + // Test 2: Exclude table-specific column (users.created_at) - shared column name + t.Run("Table-specific column exclusion", func(t *testing.T) { + ripoffFile, err := ExportToRipoff(ctx, tx, []string{}, []string{"users.created_at"}) + require.NoError(t, err) + + // Verify that user rows don't have created_at but post rows still have created_at + for rowId, row := range ripoffFile.Rows { + tableName := strings.Split(rowId, ":")[0] + switch tableName { + case "users": + _, hasCreatedAt := row["created_at"] + require.False(t, hasCreatedAt, "User row %s should not have created_at column", rowId) + // Should still have other columns + _, hasName := row["name"] + _, hasEmail := row["email"] + require.True(t, hasName, "User row %s should have name column", rowId) + require.True(t, hasEmail, "User row %s should have email column", rowId) + case "posts": + // Posts should have created_at since only users.created_at was excluded + _, hasTitle := row["title"] + _, hasCreatedAt := row["created_at"] + require.True(t, hasTitle, "Post row %s should have title column", rowId) + require.True(t, hasCreatedAt, "Post row %s should have created_at column", rowId) + } + } + }) + + // Test 3: Combine both exclusion types + t.Run("Combined exclusions", func(t *testing.T) { + ripoffFile, err := ExportToRipoff(ctx, tx, []string{}, []string{"created_at", "users.email"}) + require.NoError(t, err) + + // Verify exclusions are applied correctly + for rowId, row := range ripoffFile.Rows { + tableName := strings.Split(rowId, ":")[0] + + // No row should have created_at (global exclusion) + _, hasCreatedAt := row["created_at"] + require.False(t, hasCreatedAt, "Row %s should not have created_at column", rowId) + + switch tableName { + case "users": + // Users should not have email (table-specific exclusion) + _, hasEmail := row["email"] + require.False(t, hasEmail, "User row %s should not have email column", rowId) + // But should have name and updated_at + _, hasName := row["name"] + _, hasUpdatedAt := row["updated_at"] + require.True(t, hasName, "User row %s should have name column", rowId) + require.True(t, hasUpdatedAt, "User row %s should have updated_at column", rowId) + case "posts": + // Posts should have all columns except created_at + _, hasTitle := row["title"] + _, hasUpdatedAt := row["updated_at"] + require.True(t, hasTitle, "Post row %s should have title column", rowId) + require.True(t, hasUpdatedAt, "Post row %s should have updated_at column", rowId) + } + } + }) +}