From 5f51c614c45399de0ab483ac1aaf4c57a7afec12 Mon Sep 17 00:00:00 2001 From: Elie Gambache Date: Sun, 25 Jan 2026 02:59:50 +0200 Subject: [PATCH] feat(search): add computeFacets() for instant aggregates --- .../search/LuceneSearchEngine.kt | 66 +++++++++++++++++++ .../seforimlibrary/search/SearchEngine.kt | 24 +++++++ .../seforimlibrary/search/SearchSession.kt | 14 ++++ 3 files changed, 104 insertions(+) diff --git a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/LuceneSearchEngine.kt b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/LuceneSearchEngine.kt index a1e5b8e..6fc6ab1 100644 --- a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/LuceneSearchEngine.kt +++ b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/LuceneSearchEngine.kt @@ -8,14 +8,19 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute import org.apache.lucene.index.DirectoryReader import org.apache.lucene.index.StoredFields import org.apache.lucene.index.Term +import org.apache.lucene.index.LeafReaderContext import org.apache.lucene.search.BooleanClause import org.apache.lucene.search.BooleanQuery import org.apache.lucene.search.BoostQuery +import org.apache.lucene.search.Collector import org.apache.lucene.search.FuzzyQuery import org.apache.lucene.search.IndexSearcher +import org.apache.lucene.search.LeafCollector import org.apache.lucene.search.PrefixQuery import org.apache.lucene.search.Query +import org.apache.lucene.search.Scorable import org.apache.lucene.search.ScoreDoc +import org.apache.lucene.search.ScoreMode import org.apache.lucene.search.TermQuery import org.apache.lucene.util.QueryBuilder import org.apache.lucene.store.FSDirectory @@ -241,6 +246,67 @@ class LuceneSearchEngine( // Directory is closed automatically when readers are closed } + override fun computeFacets( + query: String, + near: Int, + bookFilter: Long?, + categoryFilter: Long?, + bookIds: Collection?, + lineIds: Collection? + ): SearchFacets? { + val context = buildSearchContext(query, near, bookFilter, categoryFilter, bookIds, lineIds) + ?: return null + + return withSearcher { searcher -> + val categoryCounts = mutableMapOf() + val bookCounts = mutableMapOf() + var totalHits = 0L + + // Lightweight collector that only reads stored fields for aggregation + val collector = object : Collector { + override fun getLeafCollector(leafContext: LeafReaderContext): LeafCollector { + val storedFields = leafContext.reader().storedFields() + + return object : LeafCollector { + override fun setScorer(scorer: Scorable) { + // No scoring needed for facet counting + } + + override fun collect(doc: Int) { + totalHits++ + val luceneDoc = storedFields.document(doc) + + // Book count + val bookId = luceneDoc.getField("book_id")?.numericValue()?.toLong() + if (bookId != null) { + bookCounts[bookId] = (bookCounts[bookId] ?: 0) + 1 + } + + // Category counts from ancestors (stored as comma-separated string) + val ancestorStr = luceneDoc.getField("ancestor_category_ids")?.stringValue() ?: "" + if (ancestorStr.isNotEmpty()) { + for (idStr in ancestorStr.split(",")) { + val catId = idStr.trim().toLongOrNull() ?: continue + categoryCounts[catId] = (categoryCounts[catId] ?: 0) + 1 + } + } + } + } + } + + override fun scoreMode(): ScoreMode = ScoreMode.COMPLETE_NO_SCORES + } + + searcher.search(context.query, collector) + + SearchFacets( + totalHits = totalHits, + categoryCounts = categoryCounts.toMap(), + bookCounts = bookCounts.toMap() + ) + } + } + // --- Inner SearchSession class --- inner class LuceneSearchSession internal constructor( diff --git a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchEngine.kt b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchEngine.kt index 2b7b247..e71c57e 100644 --- a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchEngine.kt +++ b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchEngine.kt @@ -95,4 +95,28 @@ interface SearchEngine : Closeable { * @return List of normalized terms to highlight (includes original tokens + expansions) */ fun buildHighlightTerms(query: String): List + + /** + * Computes aggregate facet counts without loading full results. + * + * Uses a lightweight Lucene collector that only reads book IDs and ancestor + * category IDs from the index. This is much faster than streaming all results + * and allows the UI to display the category/book tree immediately. + * + * @param query The search query in Hebrew (may contain nikud/teamim) + * @param near Proximity slop for phrase matching (default: 5) + * @param bookFilter Optional single book ID to restrict results + * @param categoryFilter Optional category ID to restrict results + * @param bookIds Optional collection of book IDs to restrict results (OR logic) + * @param lineIds Optional collection of line IDs to restrict results (OR logic) + * @return [SearchFacets] with counts, or null if query is invalid + */ + fun computeFacets( + query: String, + near: Int = 5, + bookFilter: Long? = null, + categoryFilter: Long? = null, + bookIds: Collection? = null, + lineIds: Collection? = null + ): SearchFacets? } diff --git a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchSession.kt b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchSession.kt index 8984aaa..8a72ba1 100644 --- a/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchSession.kt +++ b/search/src/jvmMain/kotlin/io/github/kdroidfilter/seforimlibrary/search/SearchSession.kt @@ -72,3 +72,17 @@ data class LineHit( val score: Float, val rawText: String ) + +/** + * Aggregated facet counts from a search query. + * Computed once via a lightweight Lucene collector without loading full results. + * + * @property totalHits Total number of matching documents + * @property categoryCounts Map of categoryId to count (includes ancestor categories) + * @property bookCounts Map of bookId to count + */ +data class SearchFacets( + val totalHits: Long, + val categoryCounts: Map, + val bookCounts: Map, +)