From 3b4ed711549a4761f459565c043d81d91d2d8fdc Mon Sep 17 00:00:00 2001 From: Elie Gambache Date: Sat, 24 Jan 2026 21:54:53 +0200 Subject: [PATCH 01/10] feat(generator): skip duplicate books when Sefaria version exists Add deduplication logic in Otzaria generator to skip books that already exist from Sefaria source. Sefaria has priority over other sources. --- .../seforimlibrary/otzariasqlite/Generator.kt | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/generator/otzariasqlite/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/otzariasqlite/Generator.kt b/generator/otzariasqlite/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/otzariasqlite/Generator.kt index 60d01a8..624d25b 100644 --- a/generator/otzariasqlite/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/otzariasqlite/Generator.kt +++ b/generator/otzariasqlite/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/otzariasqlite/Generator.kt @@ -685,6 +685,19 @@ class DatabaseGenerator( return } + // Skip if a book with the same heRef already exists from Sefaria (Sefaria has priority) + val existingBook = repository.getBookByHeRef(title) + if (existingBook != null) { + val existingSource = repository.getSourceById(existingBook.sourceId) + if (existingSource?.name == "Sefaria") { + logger.i { "⏭️ Skipping '$title' - already exists from Sefaria (priority source)" } + processedBooksCount += 1 + val pct = if (totalBooksToProcess > 0) (processedBooksCount * 100 / totalBooksToProcess) else 0 + logger.i { "Books progress: $processedBooksCount/$totalBooksToProcess (${pct}%)" } + return + } + } + // Assign a unique ID to this book val currentBookId = nextBookId++ logger.d { "Assigning ID $currentBookId to book '$title' with categoryId: $categoryId" } From f06a5eabce6b783a9f6711ff6aca11673ab3ef69 Mon Sep 17 00:00:00 2001 From: Elie Gambache Date: Sat, 24 Jan 2026 21:37:49 +0200 Subject: [PATCH 02/10] feat(book): add heRef property for stable Hebrew reference identification - Add heRef column to book table with index for efficient lookups - Update Book model with heRef property and KDoc documentation - Add selectByHeRef query and getBookByHeRef repository method - Update insert queries to include heRef parameter - Set heRef from title in Otzaria generator - Set heRef from heTitle in Sefaria generator This allows stable identification of books across database regenerations, similar to how line.heRef works for lines. --- .../seforimlibrary/core/models/Book.kt | 3 +++ .../dao/extensions/ModelExtensions.kt | 1 + .../dao/repository/SeforimRepository.kt | 15 +++++++++++++++ .../kdroidfilter/seforimlibrary/db/BookQueries.sq | 11 +++++++---- .../kdroidfilter/seforimlibrary/db/Database.sq | 2 ++ .../seforimlibrary/otzariasqlite/Generator.kt | 1 + .../sefariasqlite/SefariaDirectImporter.kt | 1 + 7 files changed, 30 insertions(+), 4 deletions(-) diff --git a/core/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/core/models/Book.kt b/core/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/core/models/Book.kt index a9575f0..4bf7f10 100644 --- a/core/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/core/models/Book.kt +++ b/core/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/core/models/Book.kt @@ -9,6 +9,8 @@ import kotlinx.serialization.Serializable * @property id The unique identifier of the book * @property categoryId The identifier of the category this book belongs to * @property title The title of the book + * @property heRef A stable Hebrew reference identifier for the book, used for consistent identification + * across database regenerations (e.g., "בראשית", "רש״י על בראשית") * @property sourceId The identifier of the source this book originates from * @property authors The list of authors of this book * @property topics The list of topics associated with this book @@ -28,6 +30,7 @@ data class Book( val categoryId: Long, val sourceId: Long, val title: String, + val heRef: String? = null, val authors: List = emptyList(), val topics: List = emptyList(), val pubPlaces: List = emptyList(), diff --git a/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/extensions/ModelExtensions.kt b/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/extensions/ModelExtensions.kt index 7c621f4..7447db1 100644 --- a/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/extensions/ModelExtensions.kt +++ b/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/extensions/ModelExtensions.kt @@ -78,6 +78,7 @@ fun io.github.kdroidfilter.seforimlibrary.db.Book.toModel(json: Json, authors: L categoryId = categoryId, sourceId = sourceId, title = title, + heRef = heRef, authors = authors, topics = emptyList(), pubPlaces = pubPlaces, diff --git a/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/SeforimRepository.kt b/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/SeforimRepository.kt index 06727d5..dd0111f 100644 --- a/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/SeforimRepository.kt +++ b/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/SeforimRepository.kt @@ -641,6 +641,19 @@ class SeforimRepository(databasePath: String, private val driver: SqlDriver) { return@withContext bookData.toModel(json, authors, pubPlaces, pubDates).copy(topics = topics) } + /** + * Retrieves a book by its stable Hebrew reference identifier (heRef). + * Returns null if no book with the given heRef exists. + */ + suspend fun getBookByHeRef(heRef: String): Book? = withContext(Dispatchers.IO) { + val bookData = database.bookQueriesQueries.selectByHeRef(heRef).executeAsOneOrNull() ?: return@withContext null + val authors = getBookAuthors(bookData.id) + val topics = getBookTopics(bookData.id) + val pubPlaces = getBookPubPlaces(bookData.id) + val pubDates = getBookPubDates(bookData.id) + return@withContext bookData.toModel(json, authors, pubPlaces, pubDates).copy(topics = topics) + } + /** * Retrieves a book by approximate title (exact, normalized, or LIKE). */ @@ -852,6 +865,7 @@ class SeforimRepository(databasePath: String, private val driver: SqlDriver) { categoryId = book.categoryId, sourceId = book.sourceId, title = book.title, + heRef = book.heRef, heShortDesc = book.heShortDesc, notesContent = book.notesContent, orderIndex = book.order.toLong(), @@ -909,6 +923,7 @@ class SeforimRepository(databasePath: String, private val driver: SqlDriver) { categoryId = book.categoryId, sourceId = book.sourceId, title = book.title, + heRef = book.heRef, heShortDesc = book.heShortDesc, notesContent = book.notesContent, orderIndex = book.order.toLong(), diff --git a/dao/src/commonMain/sqldelight/io/github/kdroidfilter/seforimlibrary/db/BookQueries.sq b/dao/src/commonMain/sqldelight/io/github/kdroidfilter/seforimlibrary/db/BookQueries.sq index 3048c2f..cc6bd9f 100644 --- a/dao/src/commonMain/sqldelight/io/github/kdroidfilter/seforimlibrary/db/BookQueries.sq +++ b/dao/src/commonMain/sqldelight/io/github/kdroidfilter/seforimlibrary/db/BookQueries.sq @@ -40,12 +40,15 @@ selectBaseIds: SELECT id FROM book WHERE isBaseBook = 1 ORDER BY orderIndex, title; insert: -INSERT INTO book (categoryId, sourceId, title, heShortDesc, notesContent, orderIndex, totalLines, isBaseBook, hasSourceConnection, hasAltStructures, hasTeamim, hasNekudot) -VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?); +INSERT INTO book (categoryId, sourceId, title, heRef, heShortDesc, notesContent, orderIndex, totalLines, isBaseBook, hasSourceConnection, hasAltStructures, hasTeamim, hasNekudot) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?); insertWithId: -INSERT INTO book (id, categoryId, sourceId, title, heShortDesc, notesContent, orderIndex, totalLines, isBaseBook, hasSourceConnection, hasAltStructures, hasTeamim, hasNekudot) -VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?); +INSERT INTO book (id, categoryId, sourceId, title, heRef, heShortDesc, notesContent, orderIndex, totalLines, isBaseBook, hasSourceConnection, hasAltStructures, hasTeamim, hasNekudot) +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?); + +selectByHeRef: +SELECT * FROM book WHERE heRef = ? LIMIT 1; updateTotalLines: UPDATE book SET totalLines = ? WHERE id = ?; diff --git a/dao/src/commonMain/sqldelight/io/github/kdroidfilter/seforimlibrary/db/Database.sq b/dao/src/commonMain/sqldelight/io/github/kdroidfilter/seforimlibrary/db/Database.sq index 33cd012..45bc822 100644 --- a/dao/src/commonMain/sqldelight/io/github/kdroidfilter/seforimlibrary/db/Database.sq +++ b/dao/src/commonMain/sqldelight/io/github/kdroidfilter/seforimlibrary/db/Database.sq @@ -69,6 +69,7 @@ CREATE TABLE IF NOT EXISTS book ( categoryId INTEGER NOT NULL, sourceId INTEGER NOT NULL, title TEXT NOT NULL, + heRef TEXT, heShortDesc TEXT, -- Optional raw notes attached to the base book (when a companion file 'הערות על ' exists) notesContent TEXT, @@ -91,6 +92,7 @@ CREATE INDEX IF NOT EXISTS idx_book_category ON book(categoryId); CREATE INDEX IF NOT EXISTS idx_book_title ON book(title); CREATE INDEX IF NOT EXISTS idx_book_order ON book(orderIndex); CREATE INDEX IF NOT EXISTS idx_book_source ON book(sourceId); +CREATE INDEX IF NOT EXISTS idx_book_heref ON book(heRef); -- Book-publication place junction table CREATE TABLE IF NOT EXISTS book_pub_place ( diff --git a/generator/otzariasqlite/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/otzariasqlite/Generator.kt b/generator/otzariasqlite/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/otzariasqlite/Generator.kt index 60d01a8..54530ae 100644 --- a/generator/otzariasqlite/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/otzariasqlite/Generator.kt +++ b/generator/otzariasqlite/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/otzariasqlite/Generator.kt @@ -728,6 +728,7 @@ class DatabaseGenerator( categoryId = categoryId, sourceId = sourceId, title = title, + heRef = title, authors = authors, pubPlaces = pubPlaces, pubDates = pubDates, diff --git a/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/SefariaDirectImporter.kt b/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/SefariaDirectImporter.kt index 266d681..785f142 100644 --- a/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/SefariaDirectImporter.kt +++ b/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/SefariaDirectImporter.kt @@ -174,6 +174,7 @@ class SefariaDirectImporter( categoryId = catId, sourceId = sourceId, title = payload.heTitle, + heRef = payload.heTitle, authors = payload.authors.map { Author(name = it) }, pubPlaces = emptyList(), pubDates = payload.pubDates, From 16ff5873cdd1dd410b7b87dc3a98fc9aa46e6e37 Mon Sep 17 00:00:00 2001 From: Elie Gambache <elyahou.hadass@gmail.com> Date: Sat, 24 Jan 2026 22:58:43 +0200 Subject: [PATCH 03/10] =?UTF-8?q?fix:=20rename=20"=D7=A4=D7=99=D7=A8=D7=95?= =?UTF-8?q?=D7=A9=D7=99=D7=9D=20=D7=9E=D7=95=D7=93=D7=A8=D7=A0=D7=99=D7=99?= =?UTF-8?q?=D7=9D"=20categories=20to=20"=D7=9E=D7=97=D7=91=D7=A8=D7=99=20?= =?UTF-8?q?=D7=96=D7=9E=D7=A0=D7=A0=D7=95"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add post-processing script to rename Sefaria categories after import, aligning with Otzaria's naming convention. Runs automatically as part of generateSeforimDb pipeline, right after Sefaria import and before Otzaria append. Usage: ./gradlew :sefariasqlite:renameCategories -PseforimDb=/path/to/seforim.db Closes kdroidFilter/Zayit#247 --- build.gradle.kts | 4 ++ generator/sefariasqlite/build.gradle.kts | 25 +++++++ .../RenameCategoriesPostProcess.kt | 71 +++++++++++++++++++ 3 files changed, 100 insertions(+) create mode 100644 generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/RenameCategoriesPostProcess.kt diff --git a/build.gradle.kts b/build.gradle.kts index d963faa..a9e3c5f 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -14,6 +14,7 @@ tasks.register("generateSeforimDb") { dependsOn(":sefariasqlite:generateSefariaSqlite") dependsOn(":otzariasqlite:appendOtzaria") dependsOn(":otzariasqlite:generateHavroutaLinks") + dependsOn(":sefariasqlite:renameCategories") dependsOn(":catalog:buildCatalog") dependsOn(":searchindex:buildLuceneIndexDefault") dependsOn(":packaging:writeReleaseInfo") @@ -31,6 +32,9 @@ project(":otzariasqlite").tasks.matching { "appendOtzaria" ) }.configureEach { + mustRunAfter(":sefariasqlite:renameCategories") +} +project(":sefariasqlite").tasks.matching { it.name == "renameCategories" }.configureEach { mustRunAfter(":sefariasqlite:generateSefariaSqlite") } project(":otzariasqlite").tasks.matching { it.name == "generateHavroutaLinks" }.configureEach { diff --git a/generator/sefariasqlite/build.gradle.kts b/generator/sefariasqlite/build.gradle.kts index 324715e..c57bdc4 100644 --- a/generator/sefariasqlite/build.gradle.kts +++ b/generator/sefariasqlite/build.gradle.kts @@ -65,3 +65,28 @@ tasks.register<JavaExec>("generateSefariaSqlite") { "-XX:MaxGCPauseMillis=200" ) } + +// Post-processing step to rename categories after all generation is complete +// Usage: +// ./gradlew :sefariasqlite:renameCategories +// ./gradlew :sefariasqlite:renameCategories -PseforimDb=/path/to/seforim.db +tasks.register<JavaExec>("renameCategories") { + group = "application" + description = "Rename 'פירושים מודרניים' categories to 'מחברי זמננו' after generation." + + dependsOn("jvmJar") + mainClass.set("io.github.kdroidfilter.seforimlibrary.sefariasqlite.RenameCategoriesPostProcessKt") + classpath = files(tasks.named("jvmJar")) + configurations.getByName("jvmRuntimeClasspath") + + // Pass DB path if provided + if (project.hasProperty("seforimDb")) { + systemProperty("seforimDb", project.property("seforimDb") as String) + } else if (System.getenv("SEFORIM_DB") != null) { + systemProperty("seforimDb", System.getenv("SEFORIM_DB")) + } else { + val defaultDbPath = rootProject.layout.buildDirectory.file("seforim.db").get().asFile.absolutePath + systemProperty("seforimDb", defaultDbPath) + } + + jvmArgs = listOf("-Xmx256m") +} diff --git a/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/RenameCategoriesPostProcess.kt b/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/RenameCategoriesPostProcess.kt new file mode 100644 index 0000000..904b282 --- /dev/null +++ b/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/RenameCategoriesPostProcess.kt @@ -0,0 +1,71 @@ +package io.github.kdroidfilter.seforimlibrary.sefariasqlite + +import co.touchlab.kermit.Logger +import co.touchlab.kermit.Severity +import java.nio.file.Paths +import java.sql.DriverManager +import kotlin.io.path.exists +import kotlin.system.exitProcess + +/** + * Post-processing step to rename categories in the database. + * This runs after all books and links are generated, so it doesn't affect + * any path-based matching logic. + * + * Usage: + * ./gradlew -p SeforimLibrary :sefariasqlite:renameCategories -PseforimDb=/path/to/seforim.db + * + * Env alternatives: + * SEFORIM_DB + */ +fun main(args: Array<String>) { + Logger.setMinSeverity(Severity.Info) + val logger = Logger.withTag("RenameCategories") + + val dbPathStr = args.getOrNull(0) + ?: System.getProperty("seforimDb") + ?: System.getenv("SEFORIM_DB") + ?: Paths.get("build", "seforim.db").toString() + val dbPath = Paths.get(dbPathStr) + + if (!dbPath.exists()) { + logger.e { "DB not found at $dbPath" } + exitProcess(1) + } + + logger.i { "Renaming categories in $dbPath" } + + // Category renames: old name -> new name + val categoryRenames = mapOf( + "פירושים מודרניים על התנ״ך" to "מחברי זמננו", + "פירושים מודרניים על התלמוד" to "מחברי זמננו", + "פירושים מודרניים על המשנה" to "מחברי זמננו" + ) + + try { + DriverManager.getConnection("jdbc:sqlite:$dbPath").use { conn -> + conn.autoCommit = false + + val updateStmt = conn.prepareStatement( + "UPDATE category SET title = ? WHERE title = ?" + ) + + var totalUpdated = 0 + for ((oldName, newName) in categoryRenames) { + updateStmt.setString(1, newName) + updateStmt.setString(2, oldName) + val updated = updateStmt.executeUpdate() + if (updated > 0) { + logger.i { "Renamed '$oldName' -> '$newName' ($updated rows)" } + totalUpdated += updated + } + } + + conn.commit() + logger.i { "Category rename complete. Total categories updated: $totalUpdated" } + } + } catch (e: Exception) { + logger.e(e) { "Failed to rename categories" } + exitProcess(1) + } +} From 45fa366cfc20f729b85cf8291e1948196410e59d Mon Sep 17 00:00:00 2001 From: Elie Gambache <elyahou.hadass@gmail.com> Date: Sat, 24 Jan 2026 23:54:26 +0200 Subject: [PATCH 04/10] fix: use base_text_order for commentary book ordering Sefaria's table_of_contents.json doesn't provide an 'order' field for commentary books. Instead, it provides 'base_text_order' which indicates the order of the base text being commented on. Changes: - Parse base_text_order as fallback when order is not available - Look up book order by Hebrew title in addition to English title This fixes the issue where most commentary books had orderIndex=999. Closes kdroidFilter/Zayit#263 --- .../seforimlibrary/sefariasqlite/SefariaDirectImporter.kt | 4 +++- .../seforimlibrary/sefariasqlite/SefariaImportOrdering.kt | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/SefariaDirectImporter.kt b/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/SefariaDirectImporter.kt index 785f142..c579b4a 100644 --- a/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/SefariaDirectImporter.kt +++ b/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/SefariaDirectImporter.kt @@ -162,7 +162,9 @@ class SefariaDirectImporter( val catId = ensureCategoryPath(payload.categoriesHe) val bookId = nextBookId.getAndIncrement() val bookPath = buildBookPath(payload.categoriesHe, payload.heTitle) - val bookOrder = bookOrders[payload.enTitle]?.toFloat() ?: 999f + val bookOrder = (bookOrders[payload.enTitle] + ?: bookOrders[payload.heTitle] + ?: bookOrders[sanitizeFolder(payload.heTitle)])?.toFloat() ?: 999f val normalizedPath = normalizedBookPath(payload.categoriesHe, payload.heTitle) val isBaseBook = normalizedPath in baseBookKeys diff --git a/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/SefariaImportOrdering.kt b/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/SefariaImportOrdering.kt index 93effb5..bfbdbcc 100644 --- a/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/SefariaImportOrdering.kt +++ b/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/SefariaImportOrdering.kt @@ -4,6 +4,7 @@ import co.touchlab.kermit.Logger import kotlinx.serialization.json.Json import kotlinx.serialization.json.JsonObject import kotlinx.serialization.json.contentOrNull +import kotlinx.serialization.json.doubleOrNull import kotlinx.serialization.json.intOrNull import kotlinx.serialization.json.jsonArray import kotlinx.serialization.json.jsonObject @@ -38,7 +39,10 @@ internal fun parseTableOfContentsOrders( fun processTocItem(item: JsonObject, categoryPath: List<String> = emptyList()) { val title = item["title"]?.jsonPrimitive?.contentOrNull val heTitle = item["heTitle"]?.jsonPrimitive?.contentOrNull + // Use order if available, otherwise fall back to base_text_order (for commentaries) val order = item["order"]?.jsonPrimitive?.intOrNull + ?: item["base_text_order"]?.jsonPrimitive?.intOrNull + ?: item["base_text_order"]?.jsonPrimitive?.doubleOrNull?.toInt() if (title != null && order != null) { bookOrders[title] = order } From 8daec49beb9fa6d25347f56e99d99c8c2fd3b757 Mon Sep 17 00:00:00 2001 From: Elie Gambache <elyahou.hadass@gmail.com> Date: Sun, 25 Jan 2026 00:06:07 +0200 Subject: [PATCH 05/10] fix: skip empty categories when building catalog Categories that have no books and no non-empty subcategories are now excluded from the precomputed catalog. This fixes the issue where blacklisted books would leave empty category folders in the tree. The fix works recursively: if a category's only children are themselves empty, the parent category is also excluded. --- .../seforimlibrary/catalog/BuildCatalog.kt | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/generator/catalog/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/catalog/BuildCatalog.kt b/generator/catalog/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/catalog/BuildCatalog.kt index 0868731..fb7c76b 100644 --- a/generator/catalog/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/catalog/BuildCatalog.kt +++ b/generator/catalog/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/catalog/BuildCatalog.kt @@ -63,8 +63,8 @@ private suspend fun buildCatalogTree(repository: SeforimRepository, logger: Logg val rootCategories = repository.getRootCategories().sortedBy { it.order } var totalCategories = 0 - val roots = rootCategories.map { root -> - buildCatalogCategoryRecursive(root, booksByCategory, repository).also { + val roots = rootCategories.mapNotNull { root -> + buildCatalogCategoryRecursive(root, booksByCategory, repository)?.also { totalCategories += countCategories(it) } } @@ -83,7 +83,7 @@ private suspend fun buildCatalogCategoryRecursive( category: Category, booksByCategory: Map<Long, List<Book>>, repository: SeforimRepository -): CatalogCategory { +): CatalogCategory? { val books = booksByCategory[category.id] ?.map { book -> CatalogBook( @@ -107,7 +107,12 @@ private suspend fun buildCatalogCategoryRecursive( val subcategories = repository.getCategoryChildren(category.id) .sortedBy { it.order } - .map { child -> buildCatalogCategoryRecursive(child, booksByCategory, repository) } + .mapNotNull { child -> buildCatalogCategoryRecursive(child, booksByCategory, repository) } + + // Skip empty categories (no books and no non-empty subcategories) + if (books.isEmpty() && subcategories.isEmpty()) { + return null + } return CatalogCategory( id = category.id, From 5c66e3b39a427be9ea817a9929f93ff7ef1d70f8 Mon Sep 17 00:00:00 2001 From: Elie Gambache <elyahou.hadass@gmail.com> Date: Sun, 25 Jan 2026 00:16:10 +0200 Subject: [PATCH 06/10] feat: unify Sefaria/Otzaria category naming with merge support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend RenameCategoriesPostProcess to handle both renaming and merging: - Simple rename when no target category exists under the same parent - Merge books and subcategories when target category already exists Mappings added: - פירושים מודרניים על התנ״ך/התלמוד/המשנה -> מחברי זמננו - ספרות מודרנית -> מחברי זמננו (merge under הלכה, מחשבת ישראל, ספרי מוסר, שו״ת) - ראשונים על התנ״ך -> ראשונים (merge under תנ״ך) - אחרונים על התנ״ך -> אחרונים (merge under תנ״ך) - ראשונים/אחרונים על התלמוד -> ראשונים/אחרונים (rename under בבלי) - ראשונים/אחרונים על המשנה -> ראשונים/אחרונים (rename under משנה) This ensures consistent category structure between Sefaria and Otzaria sources. --- .../RenameCategoriesPostProcess.kt | 155 ++++++++++++++++-- 1 file changed, 138 insertions(+), 17 deletions(-) diff --git a/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/RenameCategoriesPostProcess.kt b/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/RenameCategoriesPostProcess.kt index 904b282..c0b6397 100644 --- a/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/RenameCategoriesPostProcess.kt +++ b/generator/sefariasqlite/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/sefariasqlite/RenameCategoriesPostProcess.kt @@ -3,14 +3,19 @@ package io.github.kdroidfilter.seforimlibrary.sefariasqlite import co.touchlab.kermit.Logger import co.touchlab.kermit.Severity import java.nio.file.Paths +import java.sql.Connection import java.sql.DriverManager import kotlin.io.path.exists import kotlin.system.exitProcess /** - * Post-processing step to rename categories in the database. - * This runs after all books and links are generated, so it doesn't affect - * any path-based matching logic. + * Post-processing step to rename and merge categories in the database. + * This runs after Sefaria import but before Otzaria, so categories are unified + * before additional books are added. + * + * Handles two cases: + * 1. Simple rename: When no target category exists under the same parent + * 2. Merge: When a target category already exists, books are moved and source is deleted * * Usage: * ./gradlew -p SeforimLibrary :sefariasqlite:renameCategories -PseforimDb=/path/to/seforim.db @@ -33,39 +38,155 @@ fun main(args: Array<String>) { exitProcess(1) } - logger.i { "Renaming categories in $dbPath" } + logger.i { "Renaming/merging categories in $dbPath" } // Category renames: old name -> new name + // If a category with the new name already exists under the same parent, + // books will be moved and the old category deleted (merge). val categoryRenames = mapOf( + // Modern commentaries -> מחברי זמננו "פירושים מודרניים על התנ״ך" to "מחברי זמננו", "פירושים מודרניים על התלמוד" to "מחברי זמננו", - "פירושים מודרניים על המשנה" to "מחברי זמננו" + "פירושים מודרניים על המשנה" to "מחברי זמננו", + // Modern literature -> מחברי זמננו + "ספרות מודרנית" to "מחברי זמננו", + // Sefaria-specific categories to Otzaria-style + "ראשונים על התנ״ך" to "ראשונים", + "אחרונים על התנ״ך" to "אחרונים", + "ראשונים על התלמוד" to "ראשונים", + "אחרונים על התלמוד" to "אחרונים", + "ראשונים על המשנה" to "ראשונים", + "אחרונים על המשנה" to "אחרונים" ) try { DriverManager.getConnection("jdbc:sqlite:$dbPath").use { conn -> conn.autoCommit = false - val updateStmt = conn.prepareStatement( - "UPDATE category SET title = ? WHERE title = ?" - ) + var totalRenamed = 0 + var totalMerged = 0 - var totalUpdated = 0 for ((oldName, newName) in categoryRenames) { - updateStmt.setString(1, newName) - updateStmt.setString(2, oldName) - val updated = updateStmt.executeUpdate() - if (updated > 0) { - logger.i { "Renamed '$oldName' -> '$newName' ($updated rows)" } - totalUpdated += updated + val result = renameOrMergeCategory(conn, oldName, newName, logger) + when (result) { + is RenameResult.Renamed -> totalRenamed += result.count + is RenameResult.Merged -> totalMerged += result.booksMoved + is RenameResult.NotFound -> { /* skip */ } } } conn.commit() - logger.i { "Category rename complete. Total categories updated: $totalUpdated" } + logger.i { "Category processing complete. Renamed: $totalRenamed, Merged: $totalMerged books" } } } catch (e: Exception) { - logger.e(e) { "Failed to rename categories" } + logger.e(e) { "Failed to process categories" } exitProcess(1) } } + +private sealed class RenameResult { + data class Renamed(val count: Int) : RenameResult() + data class Merged(val booksMoved: Int) : RenameResult() + data object NotFound : RenameResult() +} + +/** + * Renames a category or merges it into an existing category with the target name. + * + * @return The result of the operation + */ +private fun renameOrMergeCategory( + conn: Connection, + oldName: String, + newName: String, + logger: Logger +): RenameResult { + // Find all source categories with oldName + val sourceCats = mutableListOf<Pair<Long, Long?>>() // (id, parentId) + conn.prepareStatement("SELECT id, parentId FROM category WHERE title = ?").use { stmt -> + stmt.setString(1, oldName) + stmt.executeQuery().use { rs -> + while (rs.next()) { + val id = rs.getLong(1) + val parentId = rs.getObject(2) as? Long + sourceCats.add(id to parentId) + } + } + } + + if (sourceCats.isEmpty()) { + return RenameResult.NotFound + } + + var totalRenamed = 0 + var totalBooksMoved = 0 + + for ((sourceId, parentId) in sourceCats) { + // Check if a target category with newName exists under the same parent + val targetId = findCategoryByNameAndParent(conn, newName, parentId) + + if (targetId != null && targetId != sourceId) { + // Merge: move books from source to target, then delete source + val booksMoved = moveBooksToCategory(conn, sourceId, targetId) + val subCatsMoved = moveSubcategoriesToParent(conn, sourceId, targetId) + deleteCategory(conn, sourceId) + logger.i { "Merged '$oldName' (id=$sourceId) into '$newName' (id=$targetId): $booksMoved books, $subCatsMoved subcategories" } + totalBooksMoved += booksMoved + } else { + // Simple rename + conn.prepareStatement("UPDATE category SET title = ? WHERE id = ?").use { stmt -> + stmt.setString(1, newName) + stmt.setLong(2, sourceId) + stmt.executeUpdate() + } + logger.i { "Renamed '$oldName' (id=$sourceId) -> '$newName'" } + totalRenamed++ + } + } + + return if (totalBooksMoved > 0) { + RenameResult.Merged(totalBooksMoved) + } else { + RenameResult.Renamed(totalRenamed) + } +} + +private fun findCategoryByNameAndParent(conn: Connection, name: String, parentId: Long?): Long? { + val sql = if (parentId != null) { + "SELECT id FROM category WHERE title = ? AND parentId = ?" + } else { + "SELECT id FROM category WHERE title = ? AND parentId IS NULL" + } + conn.prepareStatement(sql).use { stmt -> + stmt.setString(1, name) + if (parentId != null) { + stmt.setLong(2, parentId) + } + stmt.executeQuery().use { rs -> + return if (rs.next()) rs.getLong(1) else null + } + } +} + +private fun moveBooksToCategory(conn: Connection, fromCategoryId: Long, toCategoryId: Long): Int { + conn.prepareStatement("UPDATE book SET categoryId = ? WHERE categoryId = ?").use { stmt -> + stmt.setLong(1, toCategoryId) + stmt.setLong(2, fromCategoryId) + return stmt.executeUpdate() + } +} + +private fun moveSubcategoriesToParent(conn: Connection, fromCategoryId: Long, toParentId: Long): Int { + conn.prepareStatement("UPDATE category SET parentId = ? WHERE parentId = ?").use { stmt -> + stmt.setLong(1, toParentId) + stmt.setLong(2, fromCategoryId) + return stmt.executeUpdate() + } +} + +private fun deleteCategory(conn: Connection, categoryId: Long) { + conn.prepareStatement("DELETE FROM category WHERE id = ?").use { stmt -> + stmt.setLong(1, categoryId) + stmt.executeUpdate() + } +} From 5f51c614c45399de0ab483ac1aaf4c57a7afec12 Mon Sep 17 00:00:00 2001 From: Elie Gambache <elyahou.hadass@gmail.com> Date: Sun, 25 Jan 2026 02:59:50 +0200 Subject: [PATCH 07/10] feat(search): add computeFacets() for instant aggregates --- .../search/LuceneSearchEngine.kt | 66 +++++++++++++++++++ .../seforimlibrary/search/SearchEngine.kt | 24 +++++++ .../seforimlibrary/search/SearchSession.kt | 14 ++++ 3 files changed, 104 insertions(+) diff --git a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/LuceneSearchEngine.kt b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/LuceneSearchEngine.kt index a1e5b8e..6fc6ab1 100644 --- a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/LuceneSearchEngine.kt +++ b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/LuceneSearchEngine.kt @@ -8,14 +8,19 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute import org.apache.lucene.index.DirectoryReader import org.apache.lucene.index.StoredFields import org.apache.lucene.index.Term +import org.apache.lucene.index.LeafReaderContext import org.apache.lucene.search.BooleanClause import org.apache.lucene.search.BooleanQuery import org.apache.lucene.search.BoostQuery +import org.apache.lucene.search.Collector import org.apache.lucene.search.FuzzyQuery import org.apache.lucene.search.IndexSearcher +import org.apache.lucene.search.LeafCollector import org.apache.lucene.search.PrefixQuery import org.apache.lucene.search.Query +import org.apache.lucene.search.Scorable import org.apache.lucene.search.ScoreDoc +import org.apache.lucene.search.ScoreMode import org.apache.lucene.search.TermQuery import org.apache.lucene.util.QueryBuilder import org.apache.lucene.store.FSDirectory @@ -241,6 +246,67 @@ class LuceneSearchEngine( // Directory is closed automatically when readers are closed } + override fun computeFacets( + query: String, + near: Int, + bookFilter: Long?, + categoryFilter: Long?, + bookIds: Collection<Long>?, + lineIds: Collection<Long>? + ): SearchFacets? { + val context = buildSearchContext(query, near, bookFilter, categoryFilter, bookIds, lineIds) + ?: return null + + return withSearcher { searcher -> + val categoryCounts = mutableMapOf<Long, Int>() + val bookCounts = mutableMapOf<Long, Int>() + var totalHits = 0L + + // Lightweight collector that only reads stored fields for aggregation + val collector = object : Collector { + override fun getLeafCollector(leafContext: LeafReaderContext): LeafCollector { + val storedFields = leafContext.reader().storedFields() + + return object : LeafCollector { + override fun setScorer(scorer: Scorable) { + // No scoring needed for facet counting + } + + override fun collect(doc: Int) { + totalHits++ + val luceneDoc = storedFields.document(doc) + + // Book count + val bookId = luceneDoc.getField("book_id")?.numericValue()?.toLong() + if (bookId != null) { + bookCounts[bookId] = (bookCounts[bookId] ?: 0) + 1 + } + + // Category counts from ancestors (stored as comma-separated string) + val ancestorStr = luceneDoc.getField("ancestor_category_ids")?.stringValue() ?: "" + if (ancestorStr.isNotEmpty()) { + for (idStr in ancestorStr.split(",")) { + val catId = idStr.trim().toLongOrNull() ?: continue + categoryCounts[catId] = (categoryCounts[catId] ?: 0) + 1 + } + } + } + } + } + + override fun scoreMode(): ScoreMode = ScoreMode.COMPLETE_NO_SCORES + } + + searcher.search(context.query, collector) + + SearchFacets( + totalHits = totalHits, + categoryCounts = categoryCounts.toMap(), + bookCounts = bookCounts.toMap() + ) + } + } + // --- Inner SearchSession class --- inner class LuceneSearchSession internal constructor( diff --git a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchEngine.kt b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchEngine.kt index 2b7b247..e71c57e 100644 --- a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchEngine.kt +++ b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchEngine.kt @@ -95,4 +95,28 @@ interface SearchEngine : Closeable { * @return List of normalized terms to highlight (includes original tokens + expansions) */ fun buildHighlightTerms(query: String): List<String> + + /** + * Computes aggregate facet counts without loading full results. + * + * Uses a lightweight Lucene collector that only reads book IDs and ancestor + * category IDs from the index. This is much faster than streaming all results + * and allows the UI to display the category/book tree immediately. + * + * @param query The search query in Hebrew (may contain nikud/teamim) + * @param near Proximity slop for phrase matching (default: 5) + * @param bookFilter Optional single book ID to restrict results + * @param categoryFilter Optional category ID to restrict results + * @param bookIds Optional collection of book IDs to restrict results (OR logic) + * @param lineIds Optional collection of line IDs to restrict results (OR logic) + * @return [SearchFacets] with counts, or null if query is invalid + */ + fun computeFacets( + query: String, + near: Int = 5, + bookFilter: Long? = null, + categoryFilter: Long? = null, + bookIds: Collection<Long>? = null, + lineIds: Collection<Long>? = null + ): SearchFacets? } diff --git a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchSession.kt b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchSession.kt index 8984aaa..8a72ba1 100644 --- a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchSession.kt +++ b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchSession.kt @@ -72,3 +72,17 @@ data class LineHit( val score: Float, val rawText: String ) + +/** + * Aggregated facet counts from a search query. + * Computed once via a lightweight Lucene collector without loading full results. + * + * @property totalHits Total number of matching documents + * @property categoryCounts Map of categoryId to count (includes ancestor categories) + * @property bookCounts Map of bookId to count + */ +data class SearchFacets( + val totalHits: Long, + val categoryCounts: Map<Long, Int>, + val bookCounts: Map<Long, Int>, +) From 3be58f0ef0859f5f61912314e52a2db249a5ab72 Mon Sep 17 00:00:00 2001 From: Elie Gambache <elyahou.hadass@gmail.com> Date: Sun, 25 Jan 2026 08:39:35 +0200 Subject: [PATCH 08/10] feat(search): add baseBookOnly filter and ancestorCategoryIds indexing - Add ancestorCategoryIds to Lucene index for instant category filtering - Add baseBookOnly parameter to openSession() and computeFacets() - Filter by is_base_book directly in Lucene instead of fetching all IDs - Add getAncestorCategoryIds() to SeforimRepository --- .../dao/repository/SeforimRepository.kt | 8 ++++++++ .../seforimlibrary/searchindex/TextIndexWriter.kt | 2 ++ .../searchindex/BuildLuceneIndex.kt | 9 +++++++++ .../searchindex/lucene/LuceneTextIndexWriter.kt | 10 ++++++++++ .../seforimlibrary/search/LuceneSearchEngine.kt | 15 ++++++++++----- .../seforimlibrary/search/SearchEngine.kt | 8 ++++++-- 6 files changed, 45 insertions(+), 7 deletions(-) diff --git a/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/SeforimRepository.kt b/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/SeforimRepository.kt index dd0111f..92e7dae 100644 --- a/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/SeforimRepository.kt +++ b/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/SeforimRepository.kt @@ -289,6 +289,14 @@ class SeforimRepository(databasePath: String, private val driver: SqlDriver) { database.categoryClosureQueriesQueries.selectDescendants(ancestorId).executeAsList() } + /** + * Returns all ancestor category IDs (including the category itself) using the + * category_closure table. Used for pre-indexing ancestors in search indexes. + */ + suspend fun getAncestorCategoryIds(categoryId: Long): List<Long> = withContext(Dispatchers.IO) { + database.categoryClosureQueriesQueries.selectAncestors(categoryId).executeAsList() + } + /** * Finds categories whose title matches the LIKE pattern. Use %term% for contains. */ diff --git a/generator/searchindex/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/TextIndexWriter.kt b/generator/searchindex/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/TextIndexWriter.kt index ed4f16c..41bc731 100644 --- a/generator/searchindex/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/TextIndexWriter.kt +++ b/generator/searchindex/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/TextIndexWriter.kt @@ -12,6 +12,7 @@ interface TextIndexWriter : AutoCloseable { * @param bookId The book id * @param bookTitle The book title (for display) * @param categoryId The category id of the book + * @param ancestorCategoryIds List of ancestor category IDs (including categoryId itself) for filtering * @param lineId The line id * @param lineIndex The 0-based line index within the book * @param normalizedText Normalized text to index in the primary field (typically StandardAnalyzer) @@ -24,6 +25,7 @@ interface TextIndexWriter : AutoCloseable { bookId: Long, bookTitle: String, categoryId: Long, + ancestorCategoryIds: List<Long> = emptyList(), lineId: Long, lineIndex: Int, normalizedText: String, diff --git a/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/BuildLuceneIndex.kt b/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/BuildLuceneIndex.kt index 7aa2b51..039f322 100644 --- a/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/BuildLuceneIndex.kt +++ b/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/BuildLuceneIndex.kt @@ -13,6 +13,7 @@ import kotlinx.coroutines.runBlocking import kotlinx.coroutines.async import kotlinx.coroutines.awaitAll import kotlinx.coroutines.Dispatchers +import java.util.concurrent.ConcurrentHashMap import org.apache.lucene.analysis.standard.StandardAnalyzer import org.apache.lucene.analysis.Analyzer import org.apache.lucene.analysis.TokenStream @@ -123,6 +124,9 @@ fun main() = runBlocking { logger.i { "Indexing $totalBooks books into $indexDir using StandardAnalyzer + 4-gram field" } val progress = java.util.concurrent.atomic.AtomicInteger(0) + // Pre-compute ancestor category IDs per category (cached across books) + val ancestorsByCategory = ConcurrentHashMap<Long, List<Long>>() + books.map { book -> async(workerDispatcher) { val current = progress.incrementAndGet() @@ -191,6 +195,10 @@ fun main() = runBlocking { val allLines = runCatching { localRepo.getLines(book.id, 0, total - 1) }.getOrDefault(emptyList()) // Note: rawPlainText is no longer stored in the index. // Snippet source is fetched from DB at query time by RepositorySnippetSourceProvider. + // Pre-compute ancestor category IDs for this book's category (cached) + val ancestors = ancestorsByCategory.getOrPut(book.categoryId) { + runBlocking { localRepo.getAncestorCategoryIds(book.categoryId) } + } var processed = 0 var nextLogPct = 10 for (ln in allLines) { @@ -199,6 +207,7 @@ fun main() = runBlocking { bookId = book.id, bookTitle = book.title, categoryId = book.categoryId, + ancestorCategoryIds = ancestors, lineId = ln.id, lineIndex = ln.lineIndex, normalizedText = normalized, diff --git a/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/lucene/LuceneTextIndexWriter.kt b/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/lucene/LuceneTextIndexWriter.kt index 9c57405..dcbbe18 100644 --- a/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/lucene/LuceneTextIndexWriter.kt +++ b/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/lucene/LuceneTextIndexWriter.kt @@ -33,6 +33,7 @@ class LuceneTextIndexWriter( const val FIELD_BOOK_ID = "book_id" const val FIELD_CATEGORY_ID = "category_id" + const val FIELD_ANCESTOR_CATEGORY_IDS = "ancestor_category_ids" const val FIELD_BOOK_TITLE = "book_title" const val FIELD_LINE_ID = "line_id" const val FIELD_LINE_INDEX = "line_index" @@ -59,6 +60,7 @@ class LuceneTextIndexWriter( bookId: Long, bookTitle: String, categoryId: Long, + ancestorCategoryIds: List<Long>, lineId: Long, lineIndex: Int, normalizedText: String, @@ -76,6 +78,14 @@ class LuceneTextIndexWriter( add(IntPoint(FIELD_CATEGORY_ID, categoryId.toInt())) add(StoredField(FIELD_BOOK_TITLE, bookTitle)) + // Index ancestor category IDs for efficient filtering and retrieval + // IntPoint for filtering (multi-valued) + for (ancestorId in ancestorCategoryIds) { + add(IntPoint(FIELD_ANCESTOR_CATEGORY_IDS, ancestorId.toInt())) + } + // StoredField for retrieval (comma-separated) + add(StoredField(FIELD_ANCESTOR_CATEGORY_IDS, ancestorCategoryIds.joinToString(","))) + add(StoredField(FIELD_LINE_ID, lineId)) add(IntPoint(FIELD_LINE_ID, lineId.toInt())) add(StoredField(FIELD_LINE_INDEX, lineIndex)) diff --git a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/LuceneSearchEngine.kt b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/LuceneSearchEngine.kt index 6fc6ab1..3e992c7 100644 --- a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/LuceneSearchEngine.kt +++ b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/LuceneSearchEngine.kt @@ -152,9 +152,10 @@ class LuceneSearchEngine( bookFilter: Long?, categoryFilter: Long?, bookIds: Collection<Long>?, - lineIds: Collection<Long>? + lineIds: Collection<Long>?, + baseBookOnly: Boolean ): SearchSession? { - val context = buildSearchContext(query, near, bookFilter, categoryFilter, bookIds, lineIds) ?: return null + val context = buildSearchContext(query, near, bookFilter, categoryFilter, bookIds, lineIds, baseBookOnly) ?: return null val reader = DirectoryReader.open(dir) return LuceneSearchSession(context.query, context.anchorTerms, context.highlightTerms, reader) } @@ -252,9 +253,10 @@ class LuceneSearchEngine( bookFilter: Long?, categoryFilter: Long?, bookIds: Collection<Long>?, - lineIds: Collection<Long>? + lineIds: Collection<Long>?, + baseBookOnly: Boolean ): SearchFacets? { - val context = buildSearchContext(query, near, bookFilter, categoryFilter, bookIds, lineIds) + val context = buildSearchContext(query, near, bookFilter, categoryFilter, bookIds, lineIds, baseBookOnly) ?: return null return withSearcher { searcher -> @@ -373,7 +375,8 @@ class LuceneSearchEngine( bookFilter: Long?, categoryFilter: Long?, bookIds: Collection<Long>?, - lineIds: Collection<Long>? + lineIds: Collection<Long>?, + baseBookOnly: Boolean = false ): SearchContext? { val norm = HebrewTextUtils.normalizeHebrew(rawQuery) if (norm.isBlank()) return null @@ -452,6 +455,8 @@ class LuceneSearchEngine( builder.add(TermQuery(Term("type", "line")), BooleanClause.Occur.FILTER) if (bookFilter != null) builder.add(IntPoint.newExactQuery("book_id", bookFilter.toInt()), BooleanClause.Occur.FILTER) if (categoryFilter != null) builder.add(IntPoint.newExactQuery("category_id", categoryFilter.toInt()), BooleanClause.Occur.FILTER) + // Filter by base books only (is_base_book = 1) when baseBookOnly is true + if (baseBookOnly) builder.add(IntPoint.newExactQuery("is_base_book", 1), BooleanClause.Occur.FILTER) val bookIdsArray = bookIds?.map { it.toInt() }?.toIntArray() if (bookIdsArray != null && bookIdsArray.isNotEmpty()) { builder.add(IntPoint.newSetQuery("book_id", *bookIdsArray), BooleanClause.Occur.FILTER) diff --git a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchEngine.kt b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchEngine.kt index e71c57e..827b639 100644 --- a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchEngine.kt +++ b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchEngine.kt @@ -46,6 +46,7 @@ interface SearchEngine : Closeable { * @param categoryFilter Optional category ID to restrict results * @param bookIds Optional collection of book IDs to restrict results (OR logic) * @param lineIds Optional collection of line IDs to restrict results (OR logic) + * @param baseBookOnly If true, restrict results to base books only (default: false) * @return A [SearchSession] for paginated access to results, or null if query is invalid */ fun openSession( @@ -54,7 +55,8 @@ interface SearchEngine : Closeable { bookFilter: Long? = null, categoryFilter: Long? = null, bookIds: Collection<Long>? = null, - lineIds: Collection<Long>? = null + lineIds: Collection<Long>? = null, + baseBookOnly: Boolean = false ): SearchSession? /** @@ -109,6 +111,7 @@ interface SearchEngine : Closeable { * @param categoryFilter Optional category ID to restrict results * @param bookIds Optional collection of book IDs to restrict results (OR logic) * @param lineIds Optional collection of line IDs to restrict results (OR logic) + * @param baseBookOnly If true, restrict results to base books only (default: false) * @return [SearchFacets] with counts, or null if query is invalid */ fun computeFacets( @@ -117,6 +120,7 @@ interface SearchEngine : Closeable { bookFilter: Long? = null, categoryFilter: Long? = null, bookIds: Collection<Long>? = null, - lineIds: Collection<Long>? = null + lineIds: Collection<Long>? = null, + baseBookOnly: Boolean = false ): SearchFacets? } From 9b77f4cbf56b8c7741258f8b7f05b86303a33f6b Mon Sep 17 00:00:00 2001 From: Elie Gambache <elyahou.hadass@gmail.com> Date: Thu, 29 Jan 2026 22:30:58 +0200 Subject: [PATCH 09/10] refactor(dao): extract LineSelectionRepository interface for testability Extract an interface from SeforimRepository to allow mocking in unit tests. The interface includes methods needed for line selection and navigation: - getHeadingTocEntryByLineId - getLineIdsForTocEntry - getTocEntryIdForLine - getTocEntry - getLine - getPreviousLine - getNextLine - getLines --- .../dao/repository/LineSelectionRepository.kt | 50 +++++++++++++++++++ .../dao/repository/SeforimRepository.kt | 24 ++++----- 2 files changed, 62 insertions(+), 12 deletions(-) create mode 100644 dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/LineSelectionRepository.kt diff --git a/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/LineSelectionRepository.kt b/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/LineSelectionRepository.kt new file mode 100644 index 0000000..17bd901 --- /dev/null +++ b/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/LineSelectionRepository.kt @@ -0,0 +1,50 @@ +package io.github.kdroidfilter.seforimlibrary.dao.repository + +import io.github.kdroidfilter.seforimlibrary.core.models.Line +import io.github.kdroidfilter.seforimlibrary.core.models.TocEntry + +/** + * Interface for line selection and navigation related repository operations. + * This interface is extracted to allow mocking in tests. + */ +interface LineSelectionRepository { + /** + * Returns the TOC entry whose heading line is the given line id, or null if not a TOC heading. + */ + suspend fun getHeadingTocEntryByLineId(lineId: Long): TocEntry? + + /** + * Returns all line ids that belong to the given TOC entry (section), ordered by lineIndex. + */ + suspend fun getLineIdsForTocEntry(tocEntryId: Long): List<Long> + + /** + * Returns the TOC entry ID for a given line, or null if the line has no TOC mapping. + */ + suspend fun getTocEntryIdForLine(lineId: Long): Long? + + /** + * Returns a TOC entry by its ID. + */ + suspend fun getTocEntry(id: Long): TocEntry? + + /** + * Returns a line by its ID. + */ + suspend fun getLine(id: Long): Line? + + /** + * Returns the previous line in the book, or null if at the beginning. + */ + suspend fun getPreviousLine(bookId: Long, currentLineIndex: Int): Line? + + /** + * Returns the next line in the book, or null if at the end. + */ + suspend fun getNextLine(bookId: Long, currentLineIndex: Int): Line? + + /** + * Returns lines in a range for a book. + */ + suspend fun getLines(bookId: Long, startIndex: Int, endIndex: Int): List<Line> +} diff --git a/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/SeforimRepository.kt b/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/SeforimRepository.kt index 92e7dae..82d68ff 100644 --- a/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/SeforimRepository.kt +++ b/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/SeforimRepository.kt @@ -36,7 +36,7 @@ import kotlinx.serialization.json.Json * @property driver The SQL driver used to connect to the database * @constructor Creates a repository with the specified database path and driver */ -class SeforimRepository(databasePath: String, private val driver: SqlDriver) { +class SeforimRepository(databasePath: String, private val driver: SqlDriver) : LineSelectionRepository { private val database = SeforimDb(driver) private val json = Json { ignoreUnknownKeys = true } private val logger = Logger.withTag("SeforimRepository") @@ -160,7 +160,7 @@ class SeforimRepository(databasePath: String, private val driver: SqlDriver) { /** * Gets the tocEntryId associated with a line via the mapping table. */ - suspend fun getTocEntryIdForLine(lineId: Long): Long? = withContext(Dispatchers.IO) { + override suspend fun getTocEntryIdForLine(lineId: Long): Long? = withContext(Dispatchers.IO) { database.lineTocQueriesQueries.selectTocEntryIdByLineId(lineId).executeAsOneOrNull() } @@ -183,14 +183,14 @@ class SeforimRepository(databasePath: String, private val driver: SqlDriver) { /** * Returns the TOC entry whose heading line is the given line id, or null if not a TOC heading. */ - suspend fun getHeadingTocEntryByLineId(lineId: Long): TocEntry? = withContext(Dispatchers.IO) { + override suspend fun getHeadingTocEntryByLineId(lineId: Long): TocEntry? = withContext(Dispatchers.IO) { database.tocQueriesQueries.selectByLineId(lineId).executeAsOneOrNull()?.toModel() } /** * Returns all line ids that belong to the given TOC entry (section), ordered by lineIndex. */ - suspend fun getLineIdsForTocEntry(tocEntryId: Long): List<Long> = withContext(Dispatchers.IO) { + override suspend fun getLineIdsForTocEntry(tocEntryId: Long): List<Long> = withContext(Dispatchers.IO) { database.lineTocQueriesQueries.selectLineIdsByTocEntryId(tocEntryId).executeAsList() } @@ -1040,7 +1040,7 @@ class SeforimRepository(databasePath: String, private val driver: SqlDriver) { // --- Lines --- - suspend fun getLine(id: Long): Line? = withContext(Dispatchers.IO) { + override suspend fun getLine(id: Long): Line? = withContext(Dispatchers.IO) { database.lineQueriesQueries.selectById(id).executeAsOneOrNull()?.toModel() } @@ -1049,7 +1049,7 @@ class SeforimRepository(databasePath: String, private val driver: SqlDriver) { .executeAsOneOrNull()?.toModel() } - suspend fun getLines(bookId: Long, startIndex: Int, endIndex: Int): List<Line> = + override suspend fun getLines(bookId: Long, startIndex: Int, endIndex: Int): List<Line> = withContext(Dispatchers.IO) { database.lineQueriesQueries.selectByBookIdRange( bookId = bookId, @@ -1071,22 +1071,22 @@ class SeforimRepository(databasePath: String, private val driver: SqlDriver) { * @param currentLineIndex The index of the current line * @return The previous line, or null if there is no previous line */ - suspend fun getPreviousLine(bookId: Long, currentLineIndex: Int): Line? = withContext(Dispatchers.IO) { + override suspend fun getPreviousLine(bookId: Long, currentLineIndex: Int): Line? = withContext(Dispatchers.IO) { if (currentLineIndex <= 0) return@withContext null - + val previousIndex = currentLineIndex - 1 database.lineQueriesQueries.selectByBookIdAndIndex(bookId, previousIndex.toLong()) .executeAsOneOrNull()?.toModel() } - + /** * Gets the next line for a given book and line index. - * + * * @param bookId The ID of the book * @param currentLineIndex The index of the current line * @return The next line, or null if there is no next line */ - suspend fun getNextLine(bookId: Long, currentLineIndex: Int): Line? = withContext(Dispatchers.IO) { + override suspend fun getNextLine(bookId: Long, currentLineIndex: Int): Line? = withContext(Dispatchers.IO) { val nextIndex = currentLineIndex + 1 database.lineQueriesQueries.selectByBookIdAndIndex(bookId, nextIndex.toLong()) .executeAsOneOrNull()?.toModel() @@ -1165,7 +1165,7 @@ class SeforimRepository(databasePath: String, private val driver: SqlDriver) { // --- Table of Contents --- - suspend fun getTocEntry(id: Long): TocEntry? = withContext(Dispatchers.IO) { + override suspend fun getTocEntry(id: Long): TocEntry? = withContext(Dispatchers.IO) { database.tocQueriesQueries.selectTocById(id).executeAsOneOrNull()?.toModel() } From c36fc66a224ff183e26f35e8d0636204fdceb649 Mon Sep 17 00:00:00 2001 From: Elie Gambache <elyahou.hadass@gmail.com> Date: Sat, 31 Jan 2026 20:57:25 +0200 Subject: [PATCH 10/10] docs: add CLI README with build and usage instructions --- cli/README.md | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 cli/README.md diff --git a/cli/README.md b/cli/README.md new file mode 100644 index 0000000..4e69d06 --- /dev/null +++ b/cli/README.md @@ -0,0 +1,93 @@ +# Seforim CLI + +Command-line tool for searching the Seforim database. + +## Building + +### Fat JAR (development) + +```bash +./gradlew :cli:fatJar +``` + +The JAR is generated at `cli/build/libs/seforim-cli-1.0.0-all.jar`. + +### Direct execution + +```bash +java -jar cli/build/libs/seforim-cli-1.0.0-all.jar <command> [options] +``` + +### Native package (jpackage) + +```bash +# Installer (.deb, .dmg, .exe depending on OS) +./gradlew :cli:jpackage + +# Portable app image (no installer) +./gradlew :cli:jpackageAppImage + +# Optimized version with ProGuard +./gradlew :cli:jpackageOptimized +``` + +Packages are generated in `cli/build/jpackage/` or `cli/build/jpackage-image/`. + +## Usage + +### Commands + +```bash +seforim-cli search <query> # Search for text +seforim-cli books <prefix> # Search books by title prefix +seforim-cli facets <query> # Get facets (counts by book/category) +seforim-cli help # Show help +``` + +### Options + +| Option | Description | Default | +|--------|-------------|---------| +| `--db <path>` | Path to seforim.db | Same location as SeforimApp | +| `--index <path>` | Path to Lucene index | `<db>.lucene` | +| `--dict <path>` | Path to lexical.db dictionary | `<db>/../lexical.db` | +| `--limit <n>` | Results per page | 25 | +| `--near <n>` | Proximity slop for phrases (0=exact) | 5 | +| `--book <id>` | Filter by book ID | - | +| `--category <id>` | Filter by category ID | - | +| `--base-only` | Search base books only (not commentaries) | false | +| `--json` | Output as JSON | false | +| `--no-snippets` | Disable snippets (faster) | false | +| `--all` | Fetch all results (not just first page) | false | + +### Examples + +```bash +# Simple search +seforim-cli search "בראשית ברא" --limit 10 + +# Search with filter and JSON output +seforim-cli search "אברהם" --book 123 --json + +# Search books by prefix +seforim-cli books "בראש" --limit 5 + +# Get facets +seforim-cli facets "משה" --base-only + +# With custom database path +seforim-cli search "תורה" --db /path/to/seforim.db --index /path/to/seforim.db.lucene +``` + +## Requirements + +- JDK 21+ (JetBrains Runtime recommended) +- `seforim.db` database with its Lucene index +- Optional: `lexical.db` for search expansion + +## File structure + +The CLI uses the same default paths as the SeforimApp: +- Database: `~/.local/share/io.github.kdroidfilter.seforimapp/databases/seforim.db` +- Lucene index: `seforim.db.lucene` (next to the DB) +- Dictionary: `lexical.db` (next to the DB)