diff --git a/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/SeforimRepository.kt b/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/SeforimRepository.kt index dd0111f..92e7dae 100644 --- a/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/SeforimRepository.kt +++ b/dao/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/dao/repository/SeforimRepository.kt @@ -289,6 +289,14 @@ class SeforimRepository(databasePath: String, private val driver: SqlDriver) { database.categoryClosureQueriesQueries.selectDescendants(ancestorId).executeAsList() } + /** + * Returns all ancestor category IDs (including the category itself) using the + * category_closure table. Used for pre-indexing ancestors in search indexes. + */ + suspend fun getAncestorCategoryIds(categoryId: Long): List = withContext(Dispatchers.IO) { + database.categoryClosureQueriesQueries.selectAncestors(categoryId).executeAsList() + } + /** * Finds categories whose title matches the LIKE pattern. Use %term% for contains. */ diff --git a/generator/searchindex/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/TextIndexWriter.kt b/generator/searchindex/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/TextIndexWriter.kt index ed4f16c..41bc731 100644 --- a/generator/searchindex/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/TextIndexWriter.kt +++ b/generator/searchindex/src/commonMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/TextIndexWriter.kt @@ -12,6 +12,7 @@ interface TextIndexWriter : AutoCloseable { * @param bookId The book id * @param bookTitle The book title (for display) * @param categoryId The category id of the book + * @param ancestorCategoryIds List of ancestor category IDs (including categoryId itself) for filtering * @param lineId The line id * @param lineIndex The 0-based line index within the book * @param normalizedText Normalized text to index in the primary field (typically StandardAnalyzer) @@ -24,6 +25,7 @@ interface TextIndexWriter : AutoCloseable { bookId: Long, bookTitle: String, categoryId: Long, + ancestorCategoryIds: List = emptyList(), lineId: Long, lineIndex: Int, normalizedText: String, diff --git a/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/BuildLuceneIndex.kt b/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/BuildLuceneIndex.kt index 7aa2b51..039f322 100644 --- a/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/BuildLuceneIndex.kt +++ b/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/BuildLuceneIndex.kt @@ -13,6 +13,7 @@ import kotlinx.coroutines.runBlocking import kotlinx.coroutines.async import kotlinx.coroutines.awaitAll import kotlinx.coroutines.Dispatchers +import java.util.concurrent.ConcurrentHashMap import org.apache.lucene.analysis.standard.StandardAnalyzer import org.apache.lucene.analysis.Analyzer import org.apache.lucene.analysis.TokenStream @@ -123,6 +124,9 @@ fun main() = runBlocking { logger.i { "Indexing $totalBooks books into $indexDir using StandardAnalyzer + 4-gram field" } val progress = java.util.concurrent.atomic.AtomicInteger(0) + // Pre-compute ancestor category IDs per category (cached across books) + val ancestorsByCategory = ConcurrentHashMap>() + books.map { book -> async(workerDispatcher) { val current = progress.incrementAndGet() @@ -191,6 +195,10 @@ fun main() = runBlocking { val allLines = runCatching { localRepo.getLines(book.id, 0, total - 1) }.getOrDefault(emptyList()) // Note: rawPlainText is no longer stored in the index. // Snippet source is fetched from DB at query time by RepositorySnippetSourceProvider. + // Pre-compute ancestor category IDs for this book's category (cached) + val ancestors = ancestorsByCategory.getOrPut(book.categoryId) { + runBlocking { localRepo.getAncestorCategoryIds(book.categoryId) } + } var processed = 0 var nextLogPct = 10 for (ln in allLines) { @@ -199,6 +207,7 @@ fun main() = runBlocking { bookId = book.id, bookTitle = book.title, categoryId = book.categoryId, + ancestorCategoryIds = ancestors, lineId = ln.id, lineIndex = ln.lineIndex, normalizedText = normalized, diff --git a/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/lucene/LuceneTextIndexWriter.kt b/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/lucene/LuceneTextIndexWriter.kt index 9c57405..dcbbe18 100644 --- a/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/lucene/LuceneTextIndexWriter.kt +++ b/generator/searchindex/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/searchindex/lucene/LuceneTextIndexWriter.kt @@ -33,6 +33,7 @@ class LuceneTextIndexWriter( const val FIELD_BOOK_ID = "book_id" const val FIELD_CATEGORY_ID = "category_id" + const val FIELD_ANCESTOR_CATEGORY_IDS = "ancestor_category_ids" const val FIELD_BOOK_TITLE = "book_title" const val FIELD_LINE_ID = "line_id" const val FIELD_LINE_INDEX = "line_index" @@ -59,6 +60,7 @@ class LuceneTextIndexWriter( bookId: Long, bookTitle: String, categoryId: Long, + ancestorCategoryIds: List, lineId: Long, lineIndex: Int, normalizedText: String, @@ -76,6 +78,14 @@ class LuceneTextIndexWriter( add(IntPoint(FIELD_CATEGORY_ID, categoryId.toInt())) add(StoredField(FIELD_BOOK_TITLE, bookTitle)) + // Index ancestor category IDs for efficient filtering and retrieval + // IntPoint for filtering (multi-valued) + for (ancestorId in ancestorCategoryIds) { + add(IntPoint(FIELD_ANCESTOR_CATEGORY_IDS, ancestorId.toInt())) + } + // StoredField for retrieval (comma-separated) + add(StoredField(FIELD_ANCESTOR_CATEGORY_IDS, ancestorCategoryIds.joinToString(","))) + add(StoredField(FIELD_LINE_ID, lineId)) add(IntPoint(FIELD_LINE_ID, lineId.toInt())) add(StoredField(FIELD_LINE_INDEX, lineIndex)) diff --git a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/LuceneSearchEngine.kt b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/LuceneSearchEngine.kt index a1e5b8e..3e992c7 100644 --- a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/LuceneSearchEngine.kt +++ b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/LuceneSearchEngine.kt @@ -8,14 +8,19 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute import org.apache.lucene.index.DirectoryReader import org.apache.lucene.index.StoredFields import org.apache.lucene.index.Term +import org.apache.lucene.index.LeafReaderContext import org.apache.lucene.search.BooleanClause import org.apache.lucene.search.BooleanQuery import org.apache.lucene.search.BoostQuery +import org.apache.lucene.search.Collector import org.apache.lucene.search.FuzzyQuery import org.apache.lucene.search.IndexSearcher +import org.apache.lucene.search.LeafCollector import org.apache.lucene.search.PrefixQuery import org.apache.lucene.search.Query +import org.apache.lucene.search.Scorable import org.apache.lucene.search.ScoreDoc +import org.apache.lucene.search.ScoreMode import org.apache.lucene.search.TermQuery import org.apache.lucene.util.QueryBuilder import org.apache.lucene.store.FSDirectory @@ -147,9 +152,10 @@ class LuceneSearchEngine( bookFilter: Long?, categoryFilter: Long?, bookIds: Collection?, - lineIds: Collection? + lineIds: Collection?, + baseBookOnly: Boolean ): SearchSession? { - val context = buildSearchContext(query, near, bookFilter, categoryFilter, bookIds, lineIds) ?: return null + val context = buildSearchContext(query, near, bookFilter, categoryFilter, bookIds, lineIds, baseBookOnly) ?: return null val reader = DirectoryReader.open(dir) return LuceneSearchSession(context.query, context.anchorTerms, context.highlightTerms, reader) } @@ -241,6 +247,68 @@ class LuceneSearchEngine( // Directory is closed automatically when readers are closed } + override fun computeFacets( + query: String, + near: Int, + bookFilter: Long?, + categoryFilter: Long?, + bookIds: Collection?, + lineIds: Collection?, + baseBookOnly: Boolean + ): SearchFacets? { + val context = buildSearchContext(query, near, bookFilter, categoryFilter, bookIds, lineIds, baseBookOnly) + ?: return null + + return withSearcher { searcher -> + val categoryCounts = mutableMapOf() + val bookCounts = mutableMapOf() + var totalHits = 0L + + // Lightweight collector that only reads stored fields for aggregation + val collector = object : Collector { + override fun getLeafCollector(leafContext: LeafReaderContext): LeafCollector { + val storedFields = leafContext.reader().storedFields() + + return object : LeafCollector { + override fun setScorer(scorer: Scorable) { + // No scoring needed for facet counting + } + + override fun collect(doc: Int) { + totalHits++ + val luceneDoc = storedFields.document(doc) + + // Book count + val bookId = luceneDoc.getField("book_id")?.numericValue()?.toLong() + if (bookId != null) { + bookCounts[bookId] = (bookCounts[bookId] ?: 0) + 1 + } + + // Category counts from ancestors (stored as comma-separated string) + val ancestorStr = luceneDoc.getField("ancestor_category_ids")?.stringValue() ?: "" + if (ancestorStr.isNotEmpty()) { + for (idStr in ancestorStr.split(",")) { + val catId = idStr.trim().toLongOrNull() ?: continue + categoryCounts[catId] = (categoryCounts[catId] ?: 0) + 1 + } + } + } + } + } + + override fun scoreMode(): ScoreMode = ScoreMode.COMPLETE_NO_SCORES + } + + searcher.search(context.query, collector) + + SearchFacets( + totalHits = totalHits, + categoryCounts = categoryCounts.toMap(), + bookCounts = bookCounts.toMap() + ) + } + } + // --- Inner SearchSession class --- inner class LuceneSearchSession internal constructor( @@ -307,7 +375,8 @@ class LuceneSearchEngine( bookFilter: Long?, categoryFilter: Long?, bookIds: Collection?, - lineIds: Collection? + lineIds: Collection?, + baseBookOnly: Boolean = false ): SearchContext? { val norm = HebrewTextUtils.normalizeHebrew(rawQuery) if (norm.isBlank()) return null @@ -386,6 +455,8 @@ class LuceneSearchEngine( builder.add(TermQuery(Term("type", "line")), BooleanClause.Occur.FILTER) if (bookFilter != null) builder.add(IntPoint.newExactQuery("book_id", bookFilter.toInt()), BooleanClause.Occur.FILTER) if (categoryFilter != null) builder.add(IntPoint.newExactQuery("category_id", categoryFilter.toInt()), BooleanClause.Occur.FILTER) + // Filter by base books only (is_base_book = 1) when baseBookOnly is true + if (baseBookOnly) builder.add(IntPoint.newExactQuery("is_base_book", 1), BooleanClause.Occur.FILTER) val bookIdsArray = bookIds?.map { it.toInt() }?.toIntArray() if (bookIdsArray != null && bookIdsArray.isNotEmpty()) { builder.add(IntPoint.newSetQuery("book_id", *bookIdsArray), BooleanClause.Occur.FILTER) diff --git a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchEngine.kt b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchEngine.kt index 2b7b247..827b639 100644 --- a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchEngine.kt +++ b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchEngine.kt @@ -46,6 +46,7 @@ interface SearchEngine : Closeable { * @param categoryFilter Optional category ID to restrict results * @param bookIds Optional collection of book IDs to restrict results (OR logic) * @param lineIds Optional collection of line IDs to restrict results (OR logic) + * @param baseBookOnly If true, restrict results to base books only (default: false) * @return A [SearchSession] for paginated access to results, or null if query is invalid */ fun openSession( @@ -54,7 +55,8 @@ interface SearchEngine : Closeable { bookFilter: Long? = null, categoryFilter: Long? = null, bookIds: Collection? = null, - lineIds: Collection? = null + lineIds: Collection? = null, + baseBookOnly: Boolean = false ): SearchSession? /** @@ -95,4 +97,30 @@ interface SearchEngine : Closeable { * @return List of normalized terms to highlight (includes original tokens + expansions) */ fun buildHighlightTerms(query: String): List + + /** + * Computes aggregate facet counts without loading full results. + * + * Uses a lightweight Lucene collector that only reads book IDs and ancestor + * category IDs from the index. This is much faster than streaming all results + * and allows the UI to display the category/book tree immediately. + * + * @param query The search query in Hebrew (may contain nikud/teamim) + * @param near Proximity slop for phrase matching (default: 5) + * @param bookFilter Optional single book ID to restrict results + * @param categoryFilter Optional category ID to restrict results + * @param bookIds Optional collection of book IDs to restrict results (OR logic) + * @param lineIds Optional collection of line IDs to restrict results (OR logic) + * @param baseBookOnly If true, restrict results to base books only (default: false) + * @return [SearchFacets] with counts, or null if query is invalid + */ + fun computeFacets( + query: String, + near: Int = 5, + bookFilter: Long? = null, + categoryFilter: Long? = null, + bookIds: Collection? = null, + lineIds: Collection? = null, + baseBookOnly: Boolean = false + ): SearchFacets? } diff --git a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchSession.kt b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchSession.kt index 8984aaa..8a72ba1 100644 --- a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchSession.kt +++ b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchSession.kt @@ -72,3 +72,17 @@ data class LineHit( val score: Float, val rawText: String ) + +/** + * Aggregated facet counts from a search query. + * Computed once via a lightweight Lucene collector without loading full results. + * + * @property totalHits Total number of matching documents + * @property categoryCounts Map of categoryId to count (includes ancestor categories) + * @property bookCounts Map of bookId to count + */ +data class SearchFacets( + val totalHits: Long, + val categoryCounts: Map, + val bookCounts: Map, +)