Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,19 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute
import org.apache.lucene.index.DirectoryReader
import org.apache.lucene.index.StoredFields
import org.apache.lucene.index.Term
import org.apache.lucene.index.LeafReaderContext
import org.apache.lucene.search.BooleanClause
import org.apache.lucene.search.BooleanQuery
import org.apache.lucene.search.BoostQuery
import org.apache.lucene.search.Collector
import org.apache.lucene.search.FuzzyQuery
import org.apache.lucene.search.IndexSearcher
import org.apache.lucene.search.LeafCollector
import org.apache.lucene.search.PrefixQuery
import org.apache.lucene.search.Query
import org.apache.lucene.search.Scorable
import org.apache.lucene.search.ScoreDoc
import org.apache.lucene.search.ScoreMode
import org.apache.lucene.search.TermQuery
import org.apache.lucene.util.QueryBuilder
import org.apache.lucene.store.FSDirectory
Expand Down Expand Up @@ -241,6 +246,67 @@ class LuceneSearchEngine(
// Directory is closed automatically when readers are closed
}

override fun computeFacets(
query: String,
near: Int,
bookFilter: Long?,
categoryFilter: Long?,
bookIds: Collection<Long>?,
lineIds: Collection<Long>?
): SearchFacets? {
val context = buildSearchContext(query, near, bookFilter, categoryFilter, bookIds, lineIds)
?: return null

return withSearcher { searcher ->
val categoryCounts = mutableMapOf<Long, Int>()
val bookCounts = mutableMapOf<Long, Int>()
var totalHits = 0L

// Lightweight collector that only reads stored fields for aggregation
val collector = object : Collector {
override fun getLeafCollector(leafContext: LeafReaderContext): LeafCollector {
val storedFields = leafContext.reader().storedFields()

return object : LeafCollector {
override fun setScorer(scorer: Scorable) {
// No scoring needed for facet counting
}

override fun collect(doc: Int) {
totalHits++
val luceneDoc = storedFields.document(doc)

// Book count
val bookId = luceneDoc.getField("book_id")?.numericValue()?.toLong()
if (bookId != null) {
bookCounts[bookId] = (bookCounts[bookId] ?: 0) + 1
}

// Category counts from ancestors (stored as comma-separated string)
val ancestorStr = luceneDoc.getField("ancestor_category_ids")?.stringValue() ?: ""
if (ancestorStr.isNotEmpty()) {
for (idStr in ancestorStr.split(",")) {
val catId = idStr.trim().toLongOrNull() ?: continue
categoryCounts[catId] = (categoryCounts[catId] ?: 0) + 1
}
}
}
}
}

override fun scoreMode(): ScoreMode = ScoreMode.COMPLETE_NO_SCORES
}

searcher.search(context.query, collector)

SearchFacets(
totalHits = totalHits,
categoryCounts = categoryCounts.toMap(),
bookCounts = bookCounts.toMap()
)
}
}

// --- Inner SearchSession class ---

inner class LuceneSearchSession internal constructor(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,4 +95,28 @@ interface SearchEngine : Closeable {
* @return List of normalized terms to highlight (includes original tokens + expansions)
*/
fun buildHighlightTerms(query: String): List<String>

/**
* Computes aggregate facet counts without loading full results.
*
* Uses a lightweight Lucene collector that only reads book IDs and ancestor
* category IDs from the index. This is much faster than streaming all results
* and allows the UI to display the category/book tree immediately.
*
* @param query The search query in Hebrew (may contain nikud/teamim)
* @param near Proximity slop for phrase matching (default: 5)
* @param bookFilter Optional single book ID to restrict results
* @param categoryFilter Optional category ID to restrict results
* @param bookIds Optional collection of book IDs to restrict results (OR logic)
* @param lineIds Optional collection of line IDs to restrict results (OR logic)
* @return [SearchFacets] with counts, or null if query is invalid
*/
fun computeFacets(
query: String,
near: Int = 5,
bookFilter: Long? = null,
categoryFilter: Long? = null,
bookIds: Collection<Long>? = null,
lineIds: Collection<Long>? = null
): SearchFacets?
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,17 @@ data class LineHit(
val score: Float,
val rawText: String
)

/**
* Aggregated facet counts from a search query.
* Computed once via a lightweight Lucene collector without loading full results.
*
* @property totalHits Total number of matching documents
* @property categoryCounts Map of categoryId to count (includes ancestor categories)
* @property bookCounts Map of bookId to count
*/
data class SearchFacets(
val totalHits: Long,
val categoryCounts: Map<Long, Int>,
val bookCounts: Map<Long, Int>,
)
Loading