From b3781049ac45644e999b64fd476d1db6b47b658e Mon Sep 17 00:00:00 2001 From: arc-alex Date: Sat, 10 Jan 2026 21:13:23 +0100 Subject: [PATCH] fix: preserver utf8 charset in filenames --- apps/client/src/common/api/db.ts | 11 ++++++----- apps/server/src/utils/upload.ts | 30 +++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/apps/client/src/common/api/db.ts b/apps/client/src/common/api/db.ts index 28f3b9b333..83b2da7464 100644 --- a/apps/client/src/common/api/db.ts +++ b/apps/client/src/common/api/db.ts @@ -32,15 +32,16 @@ export async function downloadProject(fileName: string) { /** * HTTP request to upload project file + * + * Note: The browser's FormData API automatically encodes filenames according to + * RFC 2231/RFC 5987 standards with UTF-8 charset. The issue is server-side where + * Busboy (used by Multer) defaults to 'latin1' charset. The server-side fix in + * upload.ts handles the encoding conversion. */ export async function uploadProjectFile(file: File): Promise { const formData = new FormData(); formData.append('project', file); - const response = await axios.post(`${dbPath}/upload`, formData, { - headers: { - 'Content-Type': 'multipart/form-data', - }, - }); + const response = await axios.post(`${dbPath}/upload`, formData); return response.data; } diff --git a/apps/server/src/utils/upload.ts b/apps/server/src/utils/upload.ts index 323dda1b8d..2a8948a491 100644 --- a/apps/server/src/utils/upload.ts +++ b/apps/server/src/utils/upload.ts @@ -30,6 +30,33 @@ function generateNewFileName(filePath: string, callback: (newName: string) => vo checkExistence(newPath); } +/** + * Fixes encoding issues where UTF-8 bytes are incorrectly interpreted as Latin-1. + * + * Multer uses Busboy internally, which defaults to 'latin1' charset for parsing + * Content-Disposition header parameters (like filenames). This causes UTF-8 + * characters to be misinterpreted. + * + * Example: 'ø' (UTF-8: 0xC3 0xB8) gets misinterpreted as 'ø' (Latin-1: 0xC3 0xB8) + * + * Solution: Convert the string back to bytes using Latin-1 (which preserves + * byte values), then re-interpret those bytes as UTF-8. + * + * Note: This is the standard workaround since Multer doesn't expose Busboy's + * defParamCharset option directly. + * @link https://github.com/expressjs/multer/issues/1104 + */ +function fixFilenameEncoding(filename: string): string { + try { + // Convert the string back to bytes using Latin-1 (which preserves byte values), + // then re-interpret those bytes as UTF-8 + return Buffer.from(filename, 'latin1').toString('utf8'); + } catch (error) { + // If conversion fails, return the original filename + return filename; + } +} + // Define multer storage object export const storage = multer.diskStorage({ destination: function (_req, file, cb) { @@ -40,7 +67,8 @@ export const storage = multer.diskStorage({ ensureDirectory(publicDir.uploadsDir); - const sanitisedName = sanitize(file.originalname); + const fixedFilename = fixFilenameEncoding(file.originalname); + const sanitisedName = sanitize(fixedFilename); const filePath = path.join(publicDir.uploadsDir, sanitisedName); // Check if file already exists