Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 66 additions & 2 deletions backend/src/api/project/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ import {
ProjectParams,
ProjectResponseSchema,
ReplaceOperationSchema,
TrimWhitespaceSchema,
type Project,
} from '@backend/api/project/schemas'
import { databasePlugin } from '@backend/plugins/database'
import { errorHandlerPlugin } from '@backend/plugins/error-handler'
import { ReplaceOperationService } from '@backend/services/replace-operation.service'
import { TrimWhitespaceService } from '@backend/services/trim-whitespace.service'
import { ApiErrorHandler } from '@backend/types/error-handler'
import { ApiErrors } from '@backend/types/error-schemas'
import { enhanceSchemaWithTypes, type DuckDBTablePragma } from '@backend/utils/duckdb-types'
Expand Down Expand Up @@ -558,7 +560,7 @@ export const projectRoutes = new Elysia({ prefix: '/api/project' })
const replaceService = new ReplaceOperationService(db())

try {
const affectedRows = await replaceService.performReplace({
const affectedRows = await replaceService.performOperation({
table,
column,
find,
Expand All @@ -584,7 +586,7 @@ export const projectRoutes = new Elysia({ prefix: '/api/project' })
body: ReplaceOperationSchema,
response: {
200: t.Object({
affectedRows: t.Number(),
affectedRows: t.Integer(),
}),
400: ApiErrors,
404: ApiErrors,
Expand All @@ -598,3 +600,65 @@ export const projectRoutes = new Elysia({ prefix: '/api/project' })
},
},
)

.post(
'/:projectId/trim_whitespace',
async ({ db, params: { projectId }, body: { column }, status }) => {
const table = `project_${projectId}`

// Check if column exists
const columnExistsReader = await db().runAndReadAll(
'SELECT 1 FROM information_schema.columns WHERE table_name = ? AND column_name = ?',
[table, column],
)

if (columnExistsReader.getRows().length === 0) {
return status(
400,
ApiErrorHandler.validationErrorWithData('Column not found', [
`Column '${column}' does not exist in table '${table}'`,
]),
)
}

const trimWhitespaceService = new TrimWhitespaceService(db())

try {
const affectedRows = await trimWhitespaceService.performOperation({
table,
column,
})

return {
affectedRows,
}
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'
return status(
500,
ApiErrorHandler.internalServerErrorWithData(
'Failed to perform trim whitespace operation',
[errorMessage],
),
)
}
},
{
body: TrimWhitespaceSchema,
response: {
200: t.Object({
affectedRows: t.Integer(),
}),
400: ApiErrors,
404: ApiErrors,
422: ApiErrors,
500: ApiErrors,
},
detail: {
summary: 'Trim leading and trailing whitespace from a column',
description:
'Remove leading and trailing whitespace characters from all values in a specific column',
tags,
},
},
)
8 changes: 8 additions & 0 deletions backend/src/api/project/schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,11 @@ export const ReplaceOperationSchema = t.Object({
default: false,
}),
})

// Trim whitespace operation schema
export const TrimWhitespaceSchema = t.Object({
column: t.String({
minLength: 1,
error: 'Column name is required and must be at least 1 character long',
}),
})
116 changes: 116 additions & 0 deletions backend/src/services/column-operation.service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import type { DuckDBConnection, DuckDBValue } from '@duckdb/node-api'

export interface ColumnOperationParams {
table: string
column: string
}

export abstract class ColumnOperationService {
constructor(protected db: DuckDBConnection) {}

/**
* Abstract method that must be implemented by subclasses to perform the specific operation
* This will be the entry point called from the API endpoints
*/
public abstract performOperation(params: ColumnOperationParams): Promise<number>

/**
* Common pattern for column operations:
* 1. Ensure column is string type if needed
* 2. Count affected rows before operation
* 3. Perform operation if rows affected
* 4. Rollback if no rows affected or operation failed
*/
protected async executeColumnOperation(
table: string,
column: string,
operation: () => { query: string; params: DuckDBValue[] },
countAffectedRows: () => Promise<number>,
): Promise<number> {
await this.db.run('BEGIN TRANSACTION')

try {
// Check if column is string-like, if not, convert it first
await this.ensureColumnIsStringType(table, column)

// Count rows that will be affected before the update
const affectedRows = await countAffectedRows()

// Only proceed if there are rows to update
if (affectedRows === 0) {
await this.db.run('ROLLBACK')

return affectedRows
}

// Build and execute the parameterized UPDATE query
const { query, params } = operation()
await this.db.run(query, params)
await this.db.run('COMMIT')

return affectedRows
} catch (error) {
await this.db.run('ROLLBACK')
throw error
}
}

/**
* Changes the column type using ALTER TABLE
*/
protected async changeColumnType(table: string, column: string, newType: string): Promise<void> {
await this.db.run(`ALTER TABLE "${table}" ALTER "${column}" TYPE ${newType}`)
}
Comment on lines +61 to +63
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major

Quote identifiers via a helper to prevent edge-case injection and quoting bugs

Centralize quoting and reuse everywhere identifiers are interpolated.

-  protected async changeColumnType(table: string, column: string, newType: string): Promise<void> {
-    await this.db.run(`ALTER TABLE "${table}" ALTER "${column}" TYPE ${newType}`)
-  }
+  protected ident(name: string): string {
+    return `"${name.replace(/"/g, '""')}"`
+  }
+
+  protected async changeColumnType(table: string, column: string, newType: string): Promise<void> {
+    await this.db.run(`ALTER TABLE ${this.ident(table)} ALTER ${this.ident(column)} TYPE ${newType}`)
+  }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
protected async changeColumnType(table: string, column: string, newType: string): Promise<void> {
await this.db.run(`ALTER TABLE "${table}" ALTER "${column}" TYPE ${newType}`)
}
protected ident(name: string): string {
return `"${name.replace(/"/g, '""')}"`
}
protected async changeColumnType(table: string, column: string, newType: string): Promise<void> {
await this.db.run(
`ALTER TABLE ${this.ident(table)} ALTER ${this.ident(column)} TYPE ${newType}`
)
}
🤖 Prompt for AI Agents
In backend/src/services/column-operation.service.ts around lines 59 to 61 the
ALTER TABLE statement interpolates table and column identifiers directly; create
and use a helper (e.g., quoteIdentifier(name: string): string) that returns the
identifier wrapped in double quotes with any internal double quotes escaped by
doubling them, then replace the inline interpolation with calls to that helper
for both table and column (and ensure this helper is used consistently across
the codebase wherever identifiers are interpolated).


/**
* Escapes special regex characters in a string
*/
protected escapeRegex(str: string): string {
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
}

protected async getCount(query: string, params: DuckDBValue[]): Promise<number> {
const result = (await this.db.runAndReadAll(query, params)).getRowObjectsJson() as Array<{
count: number
}>

return Number(result[0]!.count)
}

/**
* Checks if a column type is string-like (VARCHAR, TEXT, CHAR, BPCHAR)
*/
private isStringLikeType(columnType: string): boolean {
return ['VARCHAR', 'TEXT', 'CHAR', 'BPCHAR'].some((type) => columnType.includes(type))
}

/**
* Ensures the column is a string-like type, converting it if necessary
*/
private async ensureColumnIsStringType(table: string, column: string): Promise<void> {
const columnType = await this.getColumnType(table, column)

if (!this.isStringLikeType(columnType)) {
// Convert the column to VARCHAR
await this.changeColumnType(table, column, 'VARCHAR')
}
}

/**
* Gets the column type from the table schema
*/
private async getColumnType(table: string, column: string): Promise<string> {
const result = await this.db.runAndReadAll(`PRAGMA table_info("${table}")`)
const columns = result.getRowObjectsJson() as Array<{
name: string
type: string
}>

const columnInfo = columns.find((col) => col.name === column)
if (!columnInfo) {
throw new Error(`Column '${column}' not found in table '${table}'`)
}

return columnInfo.type.toUpperCase()
}
Comment on lines +102 to +115
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major

Use identifier helper in PRAGMA

Apply the same quoting strategy when reading schema.

-    const result = await this.db.runAndReadAll(`PRAGMA table_info("${table}")`)
+    const result = await this.db.runAndReadAll(`PRAGMA table_info(${this.ident(table)})`)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
private async getColumnType(table: string, column: string): Promise<string> {
const result = await this.db.runAndReadAll(`PRAGMA table_info("${table}")`)
const columns = result.getRowObjectsJson() as Array<{
name: string
type: string
}>
const columnInfo = columns.find((col) => col.name === column)
if (!columnInfo) {
throw new Error(`Column '${column}' not found in table '${table}'`)
}
return columnInfo.type.toUpperCase()
}
private async getColumnType(table: string, column: string): Promise<string> {
const result = await this.db.runAndReadAll(
`PRAGMA table_info(${this.ident(table)})`
)
const columns = result.getRowObjectsJson() as Array<{
name: string
type: string
}>
const columnInfo = columns.find((col) => col.name === column)
if (!columnInfo) {
throw new Error(`Column '${column}' not found in table '${table}'`)
}
return columnInfo.type.toUpperCase()
}
🤖 Prompt for AI Agents
In backend/src/services/column-operation.service.ts around lines 104-117, the
PRAGMA statement currently interpolates the table name directly; update it to
use the project’s identifier-quoting helper (the same helper used elsewhere in
this service) when building the SQL (e.g. construct PRAGMA
table_info(<quotedIdentifier>)) so the table name is quoted/escaped
consistently; replace the string interpolation with a call to that helper and
use the returned value in the PRAGMA call.

}
117 changes: 12 additions & 105 deletions backend/src/services/replace-operation.service.ts
Original file line number Diff line number Diff line change
@@ -1,54 +1,25 @@
import type { DuckDBConnection, DuckDBValue } from '@duckdb/node-api'
import type { ColumnOperationParams } from '@backend/services/column-operation.service'
import { ColumnOperationService } from '@backend/services/column-operation.service'
import type { DuckDBValue } from '@duckdb/node-api'

export interface ReplaceOperationParams {
table: string
column: string
interface ReplaceOperationParams extends ColumnOperationParams {
find: string
replace: string
caseSensitive: boolean
wholeWord: boolean
}

export class ReplaceOperationService {
constructor(private db: DuckDBConnection) {}

/**
* Performs a replace operation on a column in a project table
*/
async performReplace(params: ReplaceOperationParams): Promise<number> {
export class ReplaceOperationService extends ColumnOperationService {
public async performOperation(params: ReplaceOperationParams): Promise<number> {
const { table, column, find, replace, caseSensitive, wholeWord } = params

// Get the original column type before any modifications
const originalColumnType = await this.getColumnType(table, column)

// Check if column is string-like, if not, convert it first
const wasConverted = await this.ensureColumnIsStringType(table, column)

// Count rows that will be affected before the update
const affectedRows = await this.countAffectedRows(table, column, find, caseSensitive, wholeWord)

// Only proceed if there are rows to update
if (affectedRows === 0) {
// Revert column type if it was converted and no rows were affected
if (wasConverted) {
await this.changeColumnType(table, column, originalColumnType)
}
return 0
}

// Build and execute the parameterized UPDATE query
const { query, params: queryParams } = this.buildParameterizedUpdateQuery(
return this.executeColumnOperation(
table,
column,
find,
replace,
caseSensitive,
wholeWord,
() =>
this.buildParameterizedUpdateQuery(table, column, find, replace, caseSensitive, wholeWord),
() => this.countAffectedRows(table, column, find, caseSensitive, wholeWord),
)

await this.db.run(query, queryParams)

return affectedRows
}

/**
Expand Down Expand Up @@ -113,7 +84,7 @@ export class ReplaceOperationService {
/**
* Counts the number of rows that will be affected by the replace operation
*/
private async countAffectedRows(
private countAffectedRows(
table: string,
column: string,
find: string,
Expand Down Expand Up @@ -152,70 +123,6 @@ export class ReplaceOperationService {
}
}

const countBeforeReader = await this.db.runAndReadAll(query, params)
const countBeforeResult = countBeforeReader.getRowObjectsJson()

return Number(countBeforeResult[0]?.count ?? 0)
}

/**
* Escapes special regex characters in a string
*/
private escapeRegex(str: string): string {
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
}

/**
* Gets the column type from the table schema
*/
private async getColumnType(table: string, column: string): Promise<string> {
const result = await this.db.runAndReadAll(`PRAGMA table_info("${table}")`)
const columns = result.getRowObjectsJson() as Array<{
cid: number
name: string
type: string
pk: boolean
notnull: boolean
dflt_value: string | null
}>

const columnInfo = columns.find((col) => col.name === column)
if (!columnInfo) {
throw new Error(`Column '${column}' not found in table '${table}'`)
}

return columnInfo.type.toUpperCase()
}

/**
* Checks if a column type is string-like (VARCHAR, TEXT, BLOB)
*/
private isStringLikeType(columnType: string): boolean {
const stringTypes = ['VARCHAR', 'TEXT', 'CHAR', 'BPCHAR']

return stringTypes.some((type) => columnType.includes(type))
}

/**
* Ensures the column is a string-like type, converting it if necessary
* Returns true if the column was converted, false otherwise
*/
private async ensureColumnIsStringType(table: string, column: string): Promise<boolean> {
const columnType = await this.getColumnType(table, column)

if (!this.isStringLikeType(columnType)) {
// Convert the column to VARCHAR
await this.changeColumnType(table, column, 'VARCHAR')
return true
}

return false
}

/**
* Changes the column type to the specified type
*/
private async changeColumnType(table: string, column: string, newType: string): Promise<void> {
await this.db.run(`ALTER TABLE "${table}" ALTER "${column}" TYPE ${newType}`)
return this.getCount(query, params)
}
}
Loading