From 53e184bdacf3c83841970ddc31b12b66bb9dd095 Mon Sep 17 00:00:00 2001 From: Milen Pivchev Date: Wed, 10 Sep 2025 14:25:30 +0200 Subject: [PATCH 1/3] WIP Signed-off-by: Milen Pivchev --- .../Utils/FileNameSanitizer.swift | 111 ++++++++++++++++++ .../FileSanitizingUnitTests.swift | 73 ++++++++++++ 2 files changed, 184 insertions(+) create mode 100644 Sources/NextcloudKit/Utils/FileNameSanitizer.swift create mode 100644 Tests/NextcloudKitUnitTests/FileSanitizingUnitTests.swift diff --git a/Sources/NextcloudKit/Utils/FileNameSanitizer.swift b/Sources/NextcloudKit/Utils/FileNameSanitizer.swift new file mode 100644 index 00000000..1392ced9 --- /dev/null +++ b/Sources/NextcloudKit/Utils/FileNameSanitizer.swift @@ -0,0 +1,111 @@ +// +// File.swift +// NextcloudKit +// +// Created by Milen Pivchev on 09.09.25. +// + +import Foundation + +extension String { + + func containsBidiControlCharacters() -> Bool { +// guard let filename = filename else { return false } + + // Decode percent-encoded string + let decoded: String + if let decodedStr = removingPercentEncoding { + decoded = decodedStr + } else { + return false + } + + // List of bidi control characters + let bidiControlCharacters: [UInt32] = [ + 0x202A, 0x202B, 0x202C, 0x202D, 0x202E, + 0x200E, 0x200F, 0x2066, 0x2067, 0x2068, + 0x2069, 0x061C + ] + + // Check each Unicode scalar + for scalar in decoded.unicodeScalars { + if bidiControlCharacters.contains(scalar.value) { + return true + } + if scalar.value < 32 { + return true + } + } + + return false + } + + // Minimal sanitizer: remove only spoof-prone embedding/override marks and ASCII controls < U+0020. + // Keeps helper marks (LRM/RLM/ALM/isolate) intact to avoid breaking RTL readability. + fileprivate func removingSpoofProneBidiAndLowControls() -> String { + let alwaysRemove: Set = [ + 0x202A, // LRE + 0x202B, // RLE + 0x202C, // PDF + 0x202D, // LRO + 0x202E // RLO + ] + let filtered = unicodeScalars.lazy.filter { s in + let v = s.value + if v < 0x20 { return false } // drop ASCII controls + if alwaysRemove.contains(v) { return false } // drop spoof-prone + return true + } + return String(String.UnicodeScalarView(filtered)) + } +// +// // If you still want a simple sanitize that preserves extension/base order: +// public func sanitizeForBidiCharacters(isFolder: Bool, isRTL: Bool = false) -> String { +// let ns = self as NSString +// let base = ns.deletingPathExtension +// let ext = ns.pathExtension +// let dot = isFolder ? "" : "." +// let isolatedExt = "\u{202C}\u{2066}\(dot)\(ext)\u{2069}" +// return containsBidiControlCharacters(self) ? base + isolatedExt : base + "." + ext +// } + + public func sanitizeForBidiCharacters(isFolder: Bool, isRTL: Bool = false) -> String { + let ns = self as NSString + let base = ns.deletingPathExtension + let ext = ns.pathExtension + + guard !ext.isEmpty else { return base } + + let dangerousBidiScalars: Set = [ + 0x202A, 0x202B, 0x202C, 0x202D, 0x202E, + 0x200E, 0x200F, 0x2066, 0x2067, 0x2068, + 0x2069, 0x061C + ] + let containsBidi = base.unicodeScalars.contains { dangerousBidiScalars.contains($0.value) } + + if isRTL { + if containsBidi { + return "\u{202C}\u{2066}.\(ext)\u{2069}" + base + } else { + return ".\(ext)" + base + } + } else { + if containsBidi { + return base + "\u{202C}\u{2066}.\(ext)\u{2069}" + } else { + return base + "." + ext + } + } + } + // Your existing splitter + func getFilenameAndExtension(isFolder: Bool, isRTL: Bool) -> (String, String) { + if isFolder { + return (self, "") + } + let ns = self as NSString + let base = ns.deletingPathExtension + let ext = ns.pathExtension + let extWithDot = ext.isEmpty ? "" : "." + ext + return isRTL ? (extWithDot, base) : (base, extWithDot) + } +} diff --git a/Tests/NextcloudKitUnitTests/FileSanitizingUnitTests.swift b/Tests/NextcloudKitUnitTests/FileSanitizingUnitTests.swift new file mode 100644 index 00000000..8945ef35 --- /dev/null +++ b/Tests/NextcloudKitUnitTests/FileSanitizingUnitTests.swift @@ -0,0 +1,73 @@ +// SPDX-FileCopyrightText: Nextcloud GmbH +// SPDX-FileCopyrightText: 2024 Milen Pivchev +// SPDX-License-Identifier: GPL-3.0-or-later + +import Testing +import Foundation +@testable import NextcloudKit + +@Suite(.serialized) struct FileSanitizingUnitTests { + // MARK: - Helper for test expectation + func expectedSanitized(for filename: String, isFolder: Bool, isRTL: Bool) -> String { + let ns = filename as NSString + let base = ns.deletingPathExtension + let ext = ns.pathExtension + + if isFolder || ext.isEmpty { return base } + + let dangerousBidiScalars: Set = [ + 0x202A, 0x202B, 0x202C, 0x202D, 0x202E, + 0x200E, 0x200F, 0x2066, 0x2067, 0x2068, + 0x2069, 0x061C + ] + let containsBidi = base.unicodeScalars.contains { dangerousBidiScalars.contains($0.value) } + + if isRTL { + return containsBidi + ? "\u{202C}\u{2066}.\(ext)\u{2069}" + base + : ".\(ext)" + base + } else { + return containsBidi + ? base + "\u{202C}\u{2066}.\(ext)\u{2069}" + : base + "." + ext + } + } + + // MARK: - Test Cases + @Test + func testSanitizeForBidiCharacters_UIRendering() { + let cases: [(String, Bool, Bool)] = [ + // LTR, normal and malicious + ("invoice\u{202E}cod.exe", false, false), // malicious RLO + ("archive.tar.gz", false, false), // multiple dots + ("myFolder", true, false), // folder + ("document.txt", false, false), // normal file + ("Foo\u{202E}dm.exe", false, false), // another malicious + + // RTL Hebrew / Arabic safe + ("תמונה.jpg", false, true), // Hebrew base + ("מכתב.pdf", false, true), // Hebrew base + ("שלום", true, true), // Hebrew folder + ("مرحبا", true, true), // Arabic folder + ("ملف.pdf", false, true), // Arabic file + + // Mixed-language + ("report.ملف", false, true), // English base, Arabic extension + ("وثيقة.docx", false, true), // Arabic base, English extension + ("summary.תמונה", false, true), // English base, Hebrew extension + ("מסמך.txt", false, true), // Hebrew base, English extension + + // Mixed-language with malicious bidi + ("report\u{202E}cod.exe", false, true), // English base + RLO trick + ("ملف\u{202E}cod.exe", false, true), // Arabic base + RLO trick + ("תמונה\u{202E}cod.exe", false, true) // Hebrew base + RLO trick + ] + + for (filename, isFolder, isRTL) in cases { + let result = filename.sanitizeForBidiCharacters(isFolder: isFolder, isRTL: isRTL) + let expected = expectedSanitized(for: filename, isFolder: isFolder, isRTL: isRTL) + #expect(result == expected, "Failed for filename: \(filename), isFolder: \(isFolder), isRTL: \(isRTL)") + } + } +} + From 23cf0e70494174161ae8354d8a04875b047a8f9f Mon Sep 17 00:00:00 2001 From: Milen Pivchev Date: Wed, 10 Sep 2025 15:02:18 +0200 Subject: [PATCH 2/3] Finalize Signed-off-by: Milen Pivchev --- .../Utils/FileNameSanitizer.swift | 72 ------------------- 1 file changed, 72 deletions(-) diff --git a/Sources/NextcloudKit/Utils/FileNameSanitizer.swift b/Sources/NextcloudKit/Utils/FileNameSanitizer.swift index 1392ced9..dd159117 100644 --- a/Sources/NextcloudKit/Utils/FileNameSanitizer.swift +++ b/Sources/NextcloudKit/Utils/FileNameSanitizer.swift @@ -8,67 +8,6 @@ import Foundation extension String { - - func containsBidiControlCharacters() -> Bool { -// guard let filename = filename else { return false } - - // Decode percent-encoded string - let decoded: String - if let decodedStr = removingPercentEncoding { - decoded = decodedStr - } else { - return false - } - - // List of bidi control characters - let bidiControlCharacters: [UInt32] = [ - 0x202A, 0x202B, 0x202C, 0x202D, 0x202E, - 0x200E, 0x200F, 0x2066, 0x2067, 0x2068, - 0x2069, 0x061C - ] - - // Check each Unicode scalar - for scalar in decoded.unicodeScalars { - if bidiControlCharacters.contains(scalar.value) { - return true - } - if scalar.value < 32 { - return true - } - } - - return false - } - - // Minimal sanitizer: remove only spoof-prone embedding/override marks and ASCII controls < U+0020. - // Keeps helper marks (LRM/RLM/ALM/isolate) intact to avoid breaking RTL readability. - fileprivate func removingSpoofProneBidiAndLowControls() -> String { - let alwaysRemove: Set = [ - 0x202A, // LRE - 0x202B, // RLE - 0x202C, // PDF - 0x202D, // LRO - 0x202E // RLO - ] - let filtered = unicodeScalars.lazy.filter { s in - let v = s.value - if v < 0x20 { return false } // drop ASCII controls - if alwaysRemove.contains(v) { return false } // drop spoof-prone - return true - } - return String(String.UnicodeScalarView(filtered)) - } -// -// // If you still want a simple sanitize that preserves extension/base order: -// public func sanitizeForBidiCharacters(isFolder: Bool, isRTL: Bool = false) -> String { -// let ns = self as NSString -// let base = ns.deletingPathExtension -// let ext = ns.pathExtension -// let dot = isFolder ? "" : "." -// let isolatedExt = "\u{202C}\u{2066}\(dot)\(ext)\u{2069}" -// return containsBidiControlCharacters(self) ? base + isolatedExt : base + "." + ext -// } - public func sanitizeForBidiCharacters(isFolder: Bool, isRTL: Bool = false) -> String { let ns = self as NSString let base = ns.deletingPathExtension @@ -97,15 +36,4 @@ extension String { } } } - // Your existing splitter - func getFilenameAndExtension(isFolder: Bool, isRTL: Bool) -> (String, String) { - if isFolder { - return (self, "") - } - let ns = self as NSString - let base = ns.deletingPathExtension - let ext = ns.pathExtension - let extWithDot = ext.isEmpty ? "" : "." + ext - return isRTL ? (extWithDot, base) : (base, extWithDot) - } } From 27af6aad97f9f4cb09fee42dcf51f994a47ff2df Mon Sep 17 00:00:00 2001 From: Milen Pivchev Date: Mon, 15 Sep 2025 18:38:46 +0200 Subject: [PATCH 3/3] Fix PR issues Signed-off-by: Milen Pivchev --- Sources/NextcloudKit/Utils/FileNameSanitizer.swift | 9 +++------ .../NextcloudKitUnitTests/FileSanitizingUnitTests.swift | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/Sources/NextcloudKit/Utils/FileNameSanitizer.swift b/Sources/NextcloudKit/Utils/FileNameSanitizer.swift index dd159117..c1920d92 100644 --- a/Sources/NextcloudKit/Utils/FileNameSanitizer.swift +++ b/Sources/NextcloudKit/Utils/FileNameSanitizer.swift @@ -1,9 +1,6 @@ -// -// File.swift -// NextcloudKit -// -// Created by Milen Pivchev on 09.09.25. -// +// SPDX-FileCopyrightText: Nextcloud GmbH +// SPDX-FileCopyrightText: 2025 Milen Pivchev +// SPDX-License-Identifier: GPL-3.0-or-later import Foundation diff --git a/Tests/NextcloudKitUnitTests/FileSanitizingUnitTests.swift b/Tests/NextcloudKitUnitTests/FileSanitizingUnitTests.swift index 8945ef35..e91d8027 100644 --- a/Tests/NextcloudKitUnitTests/FileSanitizingUnitTests.swift +++ b/Tests/NextcloudKitUnitTests/FileSanitizingUnitTests.swift @@ -1,5 +1,5 @@ // SPDX-FileCopyrightText: Nextcloud GmbH -// SPDX-FileCopyrightText: 2024 Milen Pivchev +// SPDX-FileCopyrightText: 2025 Milen Pivchev // SPDX-License-Identifier: GPL-3.0-or-later import Testing