Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
.vscode
tmp
/ripoff
/ripoff-export
.DS_Store
/export
23 changes: 22 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,34 @@ rows:

An experimental command has been added to generate ripoff files from your database. This may be useful to users just starting to use ripoff who don't have so much fake data that templating is required yet.

Currently, it attempts to export all data from all tables into a single ripoff file. You can use the `--exclude` flag to exclude specific tables from the export:
Currently, it attempts to export all data from all tables into a single ripoff file. You can use the `--exclude` flag to exclude specific tables from the export, and the `--exclude-columns` flag to exclude specific columns:

```bash
# Export all tables except 'users' and 'audit_logs'
ripoff-export --exclude users --exclude audit_logs /path/to/export

# Exclude created_at and updated_at columns from all tables
ripoff-export --exclude-columns created_at --exclude-columns updated_at /path/to/export

# Exclude email column only from users table
ripoff-export --exclude-columns users.email /path/to/export

# Combine exclusions: exclude created_at globally and email from users table
ripoff-export --exclude-columns created_at --exclude-columns users.email /path/to/export

# Combine table and column exclusions
ripoff-export --exclude audit_logs --exclude-columns created_at --exclude-columns users.email /path/to/export
```

## Column Exclusion Format

The `--exclude-columns` flag accepts two formats:

- `table.column` - Excludes a specific column from a specific table
- `column` - Excludes the column from ALL tables

The latter format is especially useful if you have generated columns on every table like `created_at` or `updated_at` to avoid noisy updates when you re-export your data.

In the future, additional flags may be added to allow you to include tables, add arbitrary `WHERE` conditions, modify the row id/key, export multiple files, or use existing templates.

## Installation
Expand Down
10 changes: 6 additions & 4 deletions cmd/ripoff-export/ripoff_export.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@ func errAttr(err error) slog.Attr {
func main() {
// Define flags
var excludeTables stringSliceFlag
var excludeColumns stringSliceFlag
flag.Var(&excludeTables, "exclude", "Exclude specific tables from export (can be specified multiple times)")

flag.Var(&excludeColumns, "exclude-columns", "Exclude specific columns from export. Format: 'table.column' or 'column' (can be specified multiple times)")

// Parse flags
flag.Parse()

dburl := os.Getenv("DATABASE_URL")
if dburl == "" {
slog.Error("DATABASE_URL env variable is required")
Expand Down Expand Up @@ -97,8 +99,8 @@ func main() {
}
}()

// Pass the excluded tables to the export function
ripoffFile, err := ripoff.ExportToRipoff(ctx, tx, excludeTables)
// Pass the excluded tables and columns to the export function
ripoffFile, err := ripoff.ExportToRipoff(ctx, tx, excludeTables, excludeColumns)
if err != nil {
slog.Error("Could not assemble ripoff file from database", errAttr(err))
os.Exit(1)
Expand Down
76 changes: 66 additions & 10 deletions export.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,40 +10,87 @@ import (
"github.com/lib/pq"
)

// parseColumnExclusions parses column exclusion specifications and returns
// table-specific exclusions and global column exclusions.
func parseColumnExclusions(excludeColumns []string) (map[string][]string, []string) {
tableSpecific := make(map[string][]string)
var globalColumns []string

for _, spec := range excludeColumns {
parts := strings.SplitN(spec, ".", 2)
if len(parts) == 2 {
// table.column format
table, column := parts[0], parts[1]
tableSpecific[table] = append(tableSpecific[table], column)
} else {
// column format - applies to all tables
globalColumns = append(globalColumns, spec)
}
}

return tableSpecific, globalColumns
}

// shouldExcludeColumn checks if a column should be excluded based on exclusion rules.
func shouldExcludeColumn(table, column string, tableSpecific map[string][]string, globalColumns []string) bool {
// Check global column exclusions
for _, globalCol := range globalColumns {
if column == globalCol {
return true
}
}

// Check table-specific exclusions
if excludedCols, exists := tableSpecific[table]; exists {
for _, excludedCol := range excludedCols {
if column == excludedCol {
return true
}
}
}

return false
}

type RowMissingDependency struct {
Row Row
ConstraintMapKey [3]string
}

// Exports all rows in the database to a ripoff file.
// excludeTables is a list of table names to exclude from the export.
func ExportToRipoff(ctx context.Context, tx pgx.Tx, excludeTables []string) (RipoffFile, error) {
// excludeColumns is a list of column specifications to exclude from the export.
// Format: "table.column" (exclude column from specific table) or "column" (exclude column from all tables).
func ExportToRipoff(ctx context.Context, tx pgx.Tx, excludeTables []string, excludeColumns []string) (RipoffFile, error) {
ripoffFile := RipoffFile{
Rows: map[string]Row{},
}

// Parse column exclusions
tableSpecificExclusions, globalColumnExclusions := parseColumnExclusions(excludeColumns)

// We use primary keys to determine what columns to use as row keys.
primaryKeyResult, err := getPrimaryKeys(ctx, tx)
if err != nil {
return ripoffFile, err
}

// Remove excluded tables from the primary keys
for _, table := range excludeTables {
delete(primaryKeyResult, table)
}

// We use foreign keys to reference other rows using the table_name:literal(...) syntax.
foreignKeyResult, err := getForeignKeysResult(ctx, tx)
if err != nil {
return ripoffFile, err
}

// Remove excluded tables from foreign key results
for _, table := range excludeTables {
delete(foreignKeyResult, table)
}

// A map from [table,column] -> ForeignKey for single column foreign keys.
singleColumnFkeyMap := map[[2]string]*ForeignKey{}
// A map from [table,constraintName,values] -> rowKey.
Expand All @@ -59,18 +106,27 @@ func ExportToRipoff(ctx context.Context, tx pgx.Tx, excludeTables []string) (Rip
missingDependencies := []RowMissingDependency{}

for table, primaryKeys := range primaryKeyResult {
columns := make([]string, len(foreignKeyResult[table].Columns))
// Due to yaml limitations, ripoff treats all data as nullable text on import and export.
for i, column := range foreignKeyResult[table].Columns {
columns[i] = fmt.Sprintf("CAST(%s AS TEXT)", pq.QuoteIdentifier(column))
// Filter out excluded columns from the foreign key result columns
var filteredColumns []string
for _, column := range foreignKeyResult[table].Columns {
if !shouldExcludeColumn(table, column, tableSpecificExclusions, globalColumnExclusions) {
filteredColumns = append(filteredColumns, fmt.Sprintf("CAST(%s AS TEXT)", pq.QuoteIdentifier(column)))
}
}

// Skip table if no columns remain after filtering
if len(filteredColumns) == 0 {
continue
}
selectQuery := fmt.Sprintf("SELECT %s FROM %s;", strings.Join(columns, ", "), pq.QuoteIdentifier(table))

selectQuery := fmt.Sprintf("SELECT %s FROM %s;", strings.Join(filteredColumns, ", "), pq.QuoteIdentifier(table))
rows, err := tx.Query(ctx, selectQuery)
if err != nil {
return RipoffFile{}, err
}
defer rows.Close()
fields := rows.FieldDescriptions()

for rows.Next() {
columnsRaw, err := rows.Values()
if err != nil {
Expand Down
152 changes: 145 additions & 7 deletions export_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func runExportTestData(t *testing.T, ctx context.Context, tx pgx.Tx, testDir str
require.NoError(t, err)

// Generate new ripoff file.
ripoffFile, err := ExportToRipoff(ctx, tx, []string{})
ripoffFile, err := ExportToRipoff(ctx, tx, []string{}, []string{})
require.NoError(t, err)

// Ensure ripoff file matches expected output.
Expand Down Expand Up @@ -140,7 +140,7 @@ func TestExcludeFlag(t *testing.T) {

// Test 1: Exclude a single table
t.Run("Single exclude", func(t *testing.T) {
ripoffFile, err := ExportToRipoff(ctx, tx, []string{"exclude_me"})
ripoffFile, err := ExportToRipoff(ctx, tx, []string{"exclude_me"}, []string{})
require.NoError(t, err)

// Verify that ripoffFile.Rows contains rows from include_me but not exclude_me
Expand All @@ -165,10 +165,10 @@ func TestExcludeFlag(t *testing.T) {

// We should have rows from include_me
require.True(t, hasIncludeMe, "Expected to find rows from include_me table")

// We should NOT have rows from exclude_me
require.False(t, hasExcludeMe, "Found rows from exclude_me table even though it was excluded")

// We should have rows from also_exclude_me (since it wasn't excluded in this test)
require.True(t, hasAlsoExcludeMe, "Expected to find rows from also_exclude_me table")

Expand Down Expand Up @@ -199,7 +199,7 @@ func TestExcludeFlag(t *testing.T) {

// Test 2: Exclude multiple tables
t.Run("Multiple excludes", func(t *testing.T) {
ripoffFile, err := ExportToRipoff(ctx, tx, []string{"exclude_me", "also_exclude_me"})
ripoffFile, err := ExportToRipoff(ctx, tx, []string{"exclude_me", "also_exclude_me"}, []string{})
require.NoError(t, err)

// Verify that ripoffFile.Rows contains rows from include_me but not from the excluded tables
Expand All @@ -224,10 +224,10 @@ func TestExcludeFlag(t *testing.T) {

// We should have rows from include_me
require.True(t, hasIncludeMe, "Expected to find rows from include_me table")

// We should NOT have rows from exclude_me
require.False(t, hasExcludeMe, "Found rows from exclude_me table even though it was excluded")

// We should NOT have rows from also_exclude_me
require.False(t, hasAlsoExcludeMe, "Found rows from also_exclude_me table even though it was excluded")

Expand Down Expand Up @@ -256,3 +256,141 @@ func TestExcludeFlag(t *testing.T) {
require.Equal(t, 0, alsoExcludeCount, "Expected 0 rows from also_exclude_me table")
})
}

// TestExcludeColumnsFlag tests that the exclude-columns flag properly excludes columns from export
func TestExcludeColumnsFlag(t *testing.T) {
envUrl := os.Getenv("RIPOFF_TEST_DATABASE_URL")
if envUrl == "" {
envUrl = "postgres:///ripoff-test-db"
}
ctx := context.Background()
conn, err := pgx.Connect(ctx, envUrl)
if err != nil {
require.NoError(t, err)
}
defer conn.Close(ctx)

// Start a transaction that we'll roll back at the end
tx, err := conn.Begin(ctx)
require.NoError(t, err)
defer func() {
err := tx.Rollback(ctx)
require.NoError(t, err)
}()

// Create test tables with timestamped columns
_, err = tx.Exec(ctx, `
CREATE TABLE users (
id SERIAL PRIMARY KEY,
name TEXT,
email TEXT,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);

CREATE TABLE posts (
id SERIAL PRIMARY KEY,
title TEXT,
content TEXT,
user_id INTEGER REFERENCES users(id),
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);

INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com'), ('Bob', 'bob@example.com');
INSERT INTO posts (title, content, user_id) VALUES
('Post 1', 'Content 1', 1),
('Post 2', 'Content 2', 1),
('Post 3', 'Content 3', 2);
`)
require.NoError(t, err)

// Test 1: Exclude global columns (created_at, updated_at)
t.Run("Global column exclusion", func(t *testing.T) {
ripoffFile, err := ExportToRipoff(ctx, tx, []string{}, []string{"created_at", "updated_at"})
require.NoError(t, err)

// Verify that no row contains created_at or updated_at columns
for rowId, row := range ripoffFile.Rows {
_, hasCreatedAt := row["created_at"]
_, hasUpdatedAt := row["updated_at"]
require.False(t, hasCreatedAt, "Row %s should not have created_at column", rowId)
require.False(t, hasUpdatedAt, "Row %s should not have updated_at column", rowId)

// But should still have other columns
tableName := strings.Split(rowId, ":")[0]
switch tableName {
case "users":
_, hasName := row["name"]
_, hasEmail := row["email"]
require.True(t, hasName, "Row %s should have name column", rowId)
require.True(t, hasEmail, "Row %s should have email column", rowId)
case "posts":
_, hasTitle := row["title"]
_, hasContent := row["content"]
require.True(t, hasTitle, "Row %s should have title column", rowId)
require.True(t, hasContent, "Row %s should have content column", rowId)
}
}
})

// Test 2: Exclude table-specific column (users.created_at) - shared column name
t.Run("Table-specific column exclusion", func(t *testing.T) {
ripoffFile, err := ExportToRipoff(ctx, tx, []string{}, []string{"users.created_at"})
require.NoError(t, err)

// Verify that user rows don't have created_at but post rows still have created_at
for rowId, row := range ripoffFile.Rows {
tableName := strings.Split(rowId, ":")[0]
switch tableName {
case "users":
_, hasCreatedAt := row["created_at"]
require.False(t, hasCreatedAt, "User row %s should not have created_at column", rowId)
// Should still have other columns
_, hasName := row["name"]
_, hasEmail := row["email"]
require.True(t, hasName, "User row %s should have name column", rowId)
require.True(t, hasEmail, "User row %s should have email column", rowId)
case "posts":
// Posts should have created_at since only users.created_at was excluded
_, hasTitle := row["title"]
_, hasCreatedAt := row["created_at"]
require.True(t, hasTitle, "Post row %s should have title column", rowId)
require.True(t, hasCreatedAt, "Post row %s should have created_at column", rowId)
}
}
})

// Test 3: Combine both exclusion types
t.Run("Combined exclusions", func(t *testing.T) {
ripoffFile, err := ExportToRipoff(ctx, tx, []string{}, []string{"created_at", "users.email"})
require.NoError(t, err)
Comment on lines +364 to +367
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm fine if the test only includes a combined case like this fwiw. But more tests == more good, probably

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tend to have a lot more individual tests lately. I'm not sure if it's actually good, but having overlapping test edges tends to be more protective over the long term in my experience. Happy to prune it down to a more targeted system though if you'd prefer.


// Verify exclusions are applied correctly
for rowId, row := range ripoffFile.Rows {
tableName := strings.Split(rowId, ":")[0]

// No row should have created_at (global exclusion)
_, hasCreatedAt := row["created_at"]
require.False(t, hasCreatedAt, "Row %s should not have created_at column", rowId)

switch tableName {
case "users":
// Users should not have email (table-specific exclusion)
_, hasEmail := row["email"]
require.False(t, hasEmail, "User row %s should not have email column", rowId)
// But should have name and updated_at
_, hasName := row["name"]
_, hasUpdatedAt := row["updated_at"]
require.True(t, hasName, "User row %s should have name column", rowId)
require.True(t, hasUpdatedAt, "User row %s should have updated_at column", rowId)
case "posts":
// Posts should have all columns except created_at
_, hasTitle := row["title"]
_, hasUpdatedAt := row["updated_at"]
require.True(t, hasTitle, "Post row %s should have title column", rowId)
require.True(t, hasUpdatedAt, "Post row %s should have updated_at column", rowId)
}
}
})
}