From dee198ebbe9f2f134fc8e5c953a9421b15d3338c Mon Sep 17 00:00:00 2001 From: Elie Gambache Date: Sun, 18 Jan 2026 18:30:30 +0200 Subject: [PATCH 1/8] refactor: use extracted search module from SeforimLibrary - Add dependency on SeforimLibrary:search module - Update DI to provide SearchEngine interface - Adapt SearchResultViewModel and SearchHomeViewModel - Update RepositorySnippetSourceProvider to implement SnippetProvider - Remove deprecated LuceneSearchService and MagicDictionary - Fix pagination module (remove unused distinctByTargetLine param) --- SeforimApp/build.gradle.kts | 3 + .../features/search/SearchHomeViewModel.kt | 4 +- .../features/search/SearchResultViewModel.kt | 26 +- .../seforimapp/framework/di/AppGraph.kt | 4 +- .../framework/di/modules/AppCoreBindings.kt | 16 +- .../framework/search/LuceneSearchService.kt | 1003 ----------------- .../framework/search/MagicDictionary.kt | 323 ------ .../search/RepositorySnippetSourceProvider.kt | 6 +- SeforimLibrary | 2 +- .../CommentsForLineOrTocPagingSource.kt | 3 +- .../pagination/LineTargumPagingSource.kt | 3 +- .../MultiLineCommentsPagingSource.kt | 3 +- 12 files changed, 38 insertions(+), 1358 deletions(-) delete mode 100644 SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/search/LuceneSearchService.kt delete mode 100644 SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/search/MagicDictionary.kt diff --git a/SeforimApp/build.gradle.kts b/SeforimApp/build.gradle.kts index 06bb9163..d54081c9 100644 --- a/SeforimApp/build.gradle.kts +++ b/SeforimApp/build.gradle.kts @@ -151,6 +151,9 @@ kotlin { implementation(libs.lucene.core) implementation(libs.reorderable) + // SeforimLibrary search module + implementation("io.github.kdroidfilter.seforimlibrary:search") + implementation(libs.commons.compress) // HTML sanitization for search snippets diff --git a/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/features/search/SearchHomeViewModel.kt b/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/features/search/SearchHomeViewModel.kt index 2908f39c..c36e0b94 100644 --- a/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/features/search/SearchHomeViewModel.kt +++ b/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/features/search/SearchHomeViewModel.kt @@ -6,7 +6,7 @@ import androidx.lifecycle.viewModelScope import io.github.kdroidfilter.seforim.tabs.TabsViewModel import io.github.kdroidfilter.seforim.tabs.TabsDestination import io.github.kdroidfilter.seforimlibrary.dao.repository.SeforimRepository -import io.github.kdroidfilter.seforimapp.framework.search.LuceneSearchService +import io.github.kdroidfilter.seforimlibrary.search.SearchEngine import io.github.kdroidfilter.seforimapp.framework.search.LuceneLookupSearchService import io.github.kdroidfilter.seforimlibrary.core.models.Book import io.github.kdroidfilter.seforimlibrary.core.models.Category @@ -64,7 +64,7 @@ class SearchHomeViewModel( private val tabsViewModel: TabsViewModel, private val persistedStore: TabPersistedStateStore, private val repository: SeforimRepository, - private val lucene: LuceneSearchService, + private val searchEngine: SearchEngine, private val lookup: LuceneLookupSearchService, private val settings: Settings ) : ViewModel() { diff --git a/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/features/search/SearchResultViewModel.kt b/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/features/search/SearchResultViewModel.kt index 406f94fe..46f08a18 100644 --- a/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/features/search/SearchResultViewModel.kt +++ b/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/features/search/SearchResultViewModel.kt @@ -18,7 +18,9 @@ import io.github.kdroidfilter.seforimapp.core.settings.AppSettings import io.github.kdroidfilter.seforimapp.features.bookcontent.state.StateKeys import io.github.kdroidfilter.seforimapp.features.search.domain.BuildSearchTreeUseCase import io.github.kdroidfilter.seforimapp.features.search.domain.GetBreadcrumbPiecesUseCase -import io.github.kdroidfilter.seforimapp.framework.search.LuceneSearchService +import io.github.kdroidfilter.seforimlibrary.search.SearchEngine +import io.github.kdroidfilter.seforimlibrary.search.SearchSession +import io.github.kdroidfilter.seforimlibrary.search.LineHit import io.github.kdroidfilter.seforimapp.framework.di.AppScope import io.github.kdroidfilter.seforimapp.framework.session.SearchPersistedState import io.github.kdroidfilter.seforimapp.framework.session.TabPersistedStateStore @@ -63,7 +65,7 @@ class SearchResultViewModel( @Assisted savedStateHandle: SavedStateHandle, private val persistedStore: TabPersistedStateStore, private val repository: SeforimRepository, - private val lucene: LuceneSearchService, + private val lucene: SearchEngine, private val titleUpdateManager: TabTitleUpdateManager, private val tabsViewModel: TabsViewModel ) : ViewModel() { @@ -952,28 +954,28 @@ class SearchResultViewModel( fetchCategoryId: Long?, fetchBookId: Long?, fetchTocId: Long? - ): Pair>? { + ): Pair>? { var tocAllowedLineIds: Set = emptySet() - val session: LuceneSearchService.SearchSession? = when { + val session: SearchSession? = when { fetchTocId != null -> { val toc = repository.getTocEntry(fetchTocId) ?: return null ensureTocCountingCaches(toc.bookId) val lineIds = collectLineIdsForTocSubtree(toc.id, toc.bookId) tocAllowedLineIds = lineIds - lucene.openSearchSession(query, DEFAULT_NEAR, lineIds = lineIds) + lucene.openSession(query, DEFAULT_NEAR, lineIds = lineIds) } - fetchBookId != null -> lucene.openSearchSession(query, DEFAULT_NEAR, bookIds = listOf(fetchBookId)) + fetchBookId != null -> lucene.openSession(query, DEFAULT_NEAR, bookIds = listOf(fetchBookId)) fetchCategoryId != null -> { val books = collectBookIdsUnderCategory(fetchCategoryId) - lucene.openSearchSession(query, DEFAULT_NEAR, bookIds = books) + lucene.openSession(query, DEFAULT_NEAR, bookIds = books) } else -> { val extendedGlobal = _uiState.value.globalExtended val baseOnlyBookIds: List? = if (!extendedGlobal) runCatching { repository.getBaseBookIds() }.getOrNull() else null when { baseOnlyBookIds != null && baseOnlyBookIds.isEmpty() -> null - baseOnlyBookIds != null -> lucene.openSearchSession(query, DEFAULT_NEAR, bookIds = baseOnlyBookIds) - else -> lucene.openSearchSession(query, DEFAULT_NEAR) + baseOnlyBookIds != null -> lucene.openSession(query, DEFAULT_NEAR, bookIds = baseOnlyBookIds) + else -> lucene.openSession(query, DEFAULT_NEAR) } } } @@ -982,7 +984,7 @@ class SearchResultViewModel( } private fun hitsToResults( - hits: List, + hits: List, rawQuery: String ): List { if (hits.isEmpty()) return emptyList() @@ -1494,7 +1496,7 @@ class SearchResultViewModel( return result } - private suspend fun updateAggregatesForHits(hits: List) { + private suspend fun updateAggregatesForHits(hits: List) { countsMutex.withLock { for (hit in hits) { val book = bookCache[hit.bookId] ?: repository.getBookCore(hit.bookId)?.also { bookCache[hit.bookId] = it } ?: continue @@ -1514,7 +1516,7 @@ class SearchResultViewModel( } } - private suspend fun updateTocCountsForHits(hits: List, scopeBookId: Long) { + private suspend fun updateTocCountsForHits(hits: List, scopeBookId: Long) { val subset = hits.filter { it.bookId == scopeBookId } if (subset.isEmpty()) return ensureTocCountingCaches(scopeBookId) diff --git a/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/di/AppGraph.kt b/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/di/AppGraph.kt index 357f33e2..b68bb36d 100644 --- a/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/di/AppGraph.kt +++ b/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/di/AppGraph.kt @@ -12,7 +12,7 @@ import io.github.kdroidfilter.seforimapp.features.onboarding.data.OnboardingProc import io.github.kdroidfilter.seforimapp.features.search.SearchHomeViewModel import io.github.kdroidfilter.seforimapp.framework.session.TabPersistedStateStore import io.github.kdroidfilter.seforimlibrary.dao.repository.SeforimRepository -import io.github.kdroidfilter.seforimapp.framework.search.LuceneSearchService +import io.github.kdroidfilter.seforimlibrary.search.SearchEngine /** * Metro DI graph: provider functions annotated with @Provides. @@ -29,7 +29,7 @@ abstract class AppGraph : ViewModelGraph { abstract val settings: Settings abstract val categoryDisplaySettingsStore: CategoryDisplaySettingsStore abstract val repository: SeforimRepository - abstract val luceneSearchService: LuceneSearchService + abstract val searchEngine: SearchEngine abstract val tabsViewModel: TabsViewModel abstract val searchHomeViewModel: SearchHomeViewModel diff --git a/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/di/modules/AppCoreBindings.kt b/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/di/modules/AppCoreBindings.kt index 5f576b1b..a4aabfbd 100644 --- a/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/di/modules/AppCoreBindings.kt +++ b/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/di/modules/AppCoreBindings.kt @@ -18,7 +18,8 @@ import io.github.kdroidfilter.seforimapp.framework.database.getUserSettingsDatab import io.github.kdroidfilter.seforimapp.framework.di.AppScope import io.github.kdroidfilter.seforimapp.framework.session.TabPersistedStateStore import io.github.kdroidfilter.seforimlibrary.dao.repository.SeforimRepository -import io.github.kdroidfilter.seforimapp.framework.search.LuceneSearchService +import io.github.kdroidfilter.seforimlibrary.search.LuceneSearchEngine +import io.github.kdroidfilter.seforimlibrary.search.SearchEngine import io.github.kdroidfilter.seforimapp.framework.search.LuceneLookupSearchService import io.github.kdroidfilter.seforimapp.framework.search.AcronymFrequencyCache import io.github.kdroidfilter.seforimapp.framework.search.RepositorySnippetSourceProvider @@ -65,11 +66,12 @@ object AppCoreBindings { @Provides @SingleIn(AppScope::class) - fun provideLuceneSearchService(repository: SeforimRepository): LuceneSearchService { + fun provideSearchEngine(repository: SeforimRepository): SearchEngine { val dbPath = getDatabasePath() - val indexPath = if (dbPath.endsWith(".db")) "$dbPath.lucene" else "$dbPath.luceneindex" - val snippetSourceProvider = RepositorySnippetSourceProvider(repository) - return LuceneSearchService(Paths.get(indexPath), snippetSourceProvider) + val indexPath = Paths.get(if (dbPath.endsWith(".db")) "$dbPath.lucene" else "$dbPath.luceneindex") + val dictionaryPath = indexPath.resolveSibling("lexical.db") + val snippetProvider = RepositorySnippetSourceProvider(repository) + return LuceneSearchEngine(indexPath, snippetProvider, dictionaryPath = dictionaryPath) } @Provides @@ -107,14 +109,14 @@ object AppCoreBindings { tabsViewModel: TabsViewModel, persistedStore: TabPersistedStateStore, repository: SeforimRepository, - lucene: LuceneSearchService, + searchEngine: SearchEngine, lookup: LuceneLookupSearchService, settings: Settings ): SearchHomeViewModel = SearchHomeViewModel( tabsViewModel = tabsViewModel, persistedStore = persistedStore, repository = repository, - lucene = lucene, + searchEngine = searchEngine, lookup = lookup, settings = settings ) diff --git a/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/search/LuceneSearchService.kt b/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/search/LuceneSearchService.kt deleted file mode 100644 index d03451a7..00000000 --- a/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/search/LuceneSearchService.kt +++ /dev/null @@ -1,1003 +0,0 @@ -package io.github.kdroidfilter.seforimapp.framework.search - -import org.apache.lucene.analysis.Analyzer -import org.apache.lucene.analysis.TokenStream -import org.apache.lucene.analysis.standard.StandardAnalyzer -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute -import org.apache.lucene.index.DirectoryReader -import org.apache.lucene.index.StoredFields -import org.apache.lucene.index.Term -import org.apache.lucene.search.BooleanClause -import org.apache.lucene.search.BooleanQuery -import org.apache.lucene.search.BoostQuery -import org.apache.lucene.search.FuzzyQuery -import org.apache.lucene.search.IndexSearcher -import org.apache.lucene.search.PrefixQuery -import org.apache.lucene.search.Query -import org.apache.lucene.search.ScoreDoc -import org.apache.lucene.search.TermQuery -import org.apache.lucene.util.QueryBuilder -import org.apache.lucene.store.FSDirectory -import org.apache.lucene.document.IntPoint -import java.io.Closeable -import java.nio.file.Path -import org.jsoup.Jsoup -import org.jsoup.safety.Safelist -import io.github.kdroidfilter.seforimapp.logger.debugln - -/** - * Info about a line needed to fetch snippet source from DB. - */ -data class LineSnippetInfo( - val lineId: Long, - val bookId: Long, - val lineIndex: Int -) - -/** - * Provider that fetches snippet source text for multiple lines. - * Returns a map of lineId -> snippetSource (HTML-cleaned, with neighbors if needed). - */ -fun interface SnippetSourceProvider { - fun getSnippetSources(lines: List): Map -} - -/** - * Minimal Lucene search service for JVM runtime. - * Supports book title suggestions and full-text queries (future extension). - */ -class LuceneSearchService( - indexDir: Path, - private val snippetSourceProvider: SnippetSourceProvider? = null, - private val analyzer: Analyzer = StandardAnalyzer() -) { - companion object { - // Hard cap on how many synonym/expansion terms we allow per token - private const val MAX_SYNONYM_TERMS_PER_TOKEN: Int = 32 - // Global cap for boost queries built from dictionary expansions - private const val MAX_SYNONYM_BOOST_TERMS: Int = 256 - // Constants for snippet source building (must match indexer) - private const val SNIPPET_NEIGHBOR_WINDOW = 4 - private const val SNIPPET_MIN_LENGTH = 280 - } - - // Open Lucene directory lazily to avoid any I/O at app startup - private val dir by lazy { FSDirectory.open(indexDir) } - - - private val stdAnalyzer: Analyzer by lazy { analyzer } - private val magicDict: MagicDictionaryIndex? by lazy { - val candidates = listOfNotNull( - System.getProperty("magicDict")?.let { Path.of(it) }, - System.getenv("SEFORIM_MAGIC_DICT")?.let { Path.of(it) }, - indexDir.resolveSibling("lexical.db"), - indexDir.resolveSibling("seforim.db").resolveSibling("lexical.db"), - Path.of("SeforimLibrary/SeforimMagicIndexer/magicindexer/build/db/lexical.db") - ).distinct() - val firstExisting = MagicDictionaryIndex.findValidDictionary(candidates) - if (firstExisting == null) { - debugln { - "[MagicDictionary] Missing lexical.db; search will run without dictionary expansions. " + - "Provide -DmagicDict=/path/lexical.db or SEFORIM_MAGIC_DICT. Checked: " + - candidates.joinToString() - } - return@lazy null - } - debugln { "[MagicDictionary] Loading lexical db from $firstExisting" } - val loaded = MagicDictionaryIndex.load(::normalizeHebrew, firstExisting) - if (loaded == null) { - debugln { - "[MagicDictionary] Failed to load lexical db at $firstExisting; " + - "continuing without dictionary expansions" - } - } - loaded - } - - private inline fun withSearcher(block: (IndexSearcher) -> T): T { - DirectoryReader.open(dir).use { reader -> - val searcher = IndexSearcher(reader) - return block(searcher) - } - } - - // No eager index opening: the index is stable and does not need - // to be checked or analyzed at application startup. - - // --- Title suggestions --- - - fun searchBooksByTitlePrefix(rawQuery: String, limit: Int = 20): List { - val q = normalizeHebrew(rawQuery) - if (q.isBlank()) return emptyList() - val tokens = q.split("\\s+".toRegex()).map { it.trim() }.filter { it.isNotEmpty() } - if (tokens.isEmpty()) return emptyList() - - return withSearcher { searcher -> - val must = BooleanQuery.Builder() - // Restrict to book_title docs - must.add(TermQuery(Term("type", "book_title")), BooleanClause.Occur.FILTER) - tokens.forEach { tok -> - // prefix on analyzed 'title' - must.add(PrefixQuery(Term("title", tok)), BooleanClause.Occur.MUST) - } - val query = must.build() - val top = searcher.search(query, limit) - val stored: StoredFields = searcher.storedFields() - val ids = LinkedHashSet() - for (sd in top.scoreDocs) { - val doc = stored.document(sd.doc) - val id = doc.getField("book_id")?.numericValue()?.toLong() - if (id != null) ids.add(id) - } - ids.toList().take(limit) - } - } - - // --- Full-text search --- - - data class LineHit( - val bookId: Long, - val bookTitle: String, - val lineId: Long, - val lineIndex: Int, - val snippet: String, - val score: Float, - val rawText: String - ) - - data class SearchPage( - val hits: List, - val totalHits: Long, - val isLastPage: Boolean - ) - - inner class SearchSession internal constructor( - private val query: Query, - private val anchorTerms: List, - private val highlightTerms: List, - private val reader: DirectoryReader - ) : Closeable { - private val searcher = IndexSearcher(reader) - private var after: ScoreDoc? = null - private var finished = false - private var totalHitsValue: Long? = null - - fun nextPage(limit: Int): SearchPage? { - if (finished) return null - val top = searcher.searchAfter(after, query, limit) - if (totalHitsValue == null) totalHitsValue = top.totalHits?.value - if (top.scoreDocs.isEmpty()) { - finished = true - return null - } - val stored = searcher.storedFields() - val hits = mapScoreDocs(stored, top.scoreDocs.toList(), anchorTerms, highlightTerms) - after = top.scoreDocs.last() - val isLast = top.scoreDocs.size < limit - if (isLast) finished = true - return SearchPage( - hits = hits, - totalHits = totalHitsValue ?: hits.size.toLong(), - isLastPage = isLast - ) - } - - override fun close() { - reader.close() - } - } - - fun openSearchSession( - rawQuery: String, - near: Int, - bookFilter: Long? = null, - categoryFilter: Long? = null, - bookIds: Collection? = null, - lineIds: Collection? = null - ): SearchSession? { - val context = buildSearchContext(rawQuery, near, bookFilter, categoryFilter, bookIds, lineIds) ?: return null - val reader = DirectoryReader.open(dir) - return SearchSession(context.query, context.anchorTerms, context.highlightTerms, reader) - } - - private data class SearchContext( - val query: Query, - val anchorTerms: List, - val highlightTerms: List - ) - - private fun buildSearchContext( - rawQuery: String, - near: Int, - bookFilter: Long?, - categoryFilter: Long?, - bookIds: Collection?, - lineIds: Collection? - ): SearchContext? { - val norm = normalizeHebrew(rawQuery) - if (norm.isBlank()) return null - - val analyzedRaw = analyzeToTerms(stdAnalyzer, norm) ?: emptyList() - - // Check if the original query contained ה׳ (Hashem) before normalization - val hasHashem = rawQuery.contains("ה׳") || rawQuery.contains("ה'") - - // Filter out single Hebrew letters and stop words BEFORE dictionary expansion - // BUT preserve "ה" if the original query had "ה׳" (Hashem) - val analyzedStd = analyzedRaw.filter { token -> - // Special case: if query has ה׳, keep "ה" token - if (token == "ה" && hasHashem) return@filter true - // Preserve numeric tokens (e.g., "6") so they can expand via MagicDictionary - if (token.any { it.isDigit() }) return@filter true - - token.length >= 2 && token !in setOf( - "א", "ב", "ג", "ד", "ה", "ו", "ז", "ח", "ט", "י", "כ", "ל", "מ", - "נ", "ס", "ע", "פ", "צ", "ק", "ר", "ש", "ת", - ) - } - - debugln { "[DEBUG] Original query had Hashem (ה׳): $hasHashem" } - debugln { "[DEBUG] Analyzed tokens: $analyzedStd" } - - // Get all possible expansions for each token (a token can belong to multiple bases) - val tokenExpansionsRaw: Map> = - analyzedStd.associateWith { token -> - // Get best expansion (prefers matching base, then largest) - val expansion = magicDict?.expansionFor(token) ?: return@associateWith emptyList() - listOf(expansion) - } - tokenExpansionsRaw.forEach { (token, exps) -> - exps.forEach { exp -> - debugln { "[DEBUG] Token '$token' -> expansion: surface=${exp.surface.take(10)}..., variants=${exp.variants.take(10)}..., base=${exp.base}" } - } - } - - val tokenExpansions: Map> = tokenExpansionsRaw - - val allExpansions = tokenExpansions.values.flatten() - val expandedTerms = allExpansions.flatMap { it.surface + it.variants + it.base }.distinct() - // Add 4-gram terms used in the query (matches text_ng4 clauses) so highlighting can - // reflect matches that were found via the n-gram branch. - val ngramTerms = buildNgramTerms(analyzedStd, gram = 4) - // For highlighting/snippets, use the actual query tokens plus the concrete - // terms that the search query uses (expansions + n-grams), and if the query - // mentions Hashem explicitly, also include dictionary-based variants of the - // divine name from the lexical DB - val hashemTerms = if (hasHashem) loadHashemHighlightTerms() else emptyList() - val highlightTerms = filterTermsForHighlight(analyzedStd + expandedTerms + ngramTerms + hashemTerms) - val anchorTerms = buildAnchorTerms(norm, highlightTerms) - - val rankedQuery = buildExpandedQuery(norm, near, analyzedStd, tokenExpansions) - val mustAllTokensQuery: Query? = buildPresenceFilterForTokens(analyzedStd, near, tokenExpansions) - val phraseQuery: Query? = buildSynonymPhraseQuery(analyzedStd, tokenExpansions, near) - - val builder = BooleanQuery.Builder() - builder.add(TermQuery(Term("type", "line")), BooleanClause.Occur.FILTER) - if (bookFilter != null) builder.add(IntPoint.newExactQuery("book_id", bookFilter.toInt()), BooleanClause.Occur.FILTER) - if (categoryFilter != null) builder.add(IntPoint.newExactQuery("category_id", categoryFilter.toInt()), BooleanClause.Occur.FILTER) - val bookIdsArray = bookIds?.map { it.toInt() }?.toIntArray() - if (bookIdsArray != null && bookIdsArray.isNotEmpty()) { - builder.add(IntPoint.newSetQuery("book_id", *bookIdsArray), BooleanClause.Occur.FILTER) - } - val lineIdsArray = lineIds?.map { it.toInt() }?.toIntArray() - if (lineIdsArray != null && lineIdsArray.isNotEmpty()) { - builder.add(IntPoint.newSetQuery("line_id", *lineIdsArray), BooleanClause.Occur.FILTER) - } - if (mustAllTokensQuery != null) { - builder.add(mustAllTokensQuery, BooleanClause.Occur.FILTER) - debugln { "[DEBUG] Added mustAllTokensQuery as FILTER" } - } - val analyzedCount = analyzedStd.size - if (phraseQuery != null && analyzedCount >= 2) { - val occur = if (near == 0) BooleanClause.Occur.MUST else BooleanClause.Occur.SHOULD - builder.add(phraseQuery, occur) - debugln { "[DEBUG] Added phraseQuery with occur=$occur, near=$near" } - } - builder.add(rankedQuery, BooleanClause.Occur.SHOULD) - debugln { "[DEBUG] Added rankedQuery as SHOULD" } - - val finalQuery = builder.build() - debugln { "[DEBUG] Final query: $finalQuery" } - - return SearchContext( - query = finalQuery, - anchorTerms = anchorTerms, - highlightTerms = highlightTerms - ) - } - - private fun mapScoreDocs( - stored: StoredFields, - scoreDocs: List, - anchorTerms: List, - highlightTerms: List - ): List { - if (scoreDocs.isEmpty()) return emptyList() - - // First pass: extract metadata from index - data class DocMeta( - val sd: ScoreDoc, - val bookId: Long, - val bookTitle: String, - val lineId: Long, - val lineIndex: Int, - val isBaseBook: Boolean, - val orderIndex: Int, - val indexedRaw: String // from text_raw field, may be empty if not stored - ) - - val docMetas = scoreDocs.map { sd -> - val doc = stored.document(sd.doc) - DocMeta( - sd = sd, - bookId = doc.getField("book_id").numericValue().toLong(), - bookTitle = doc.getField("book_title").stringValue() ?: "", - lineId = doc.getField("line_id").numericValue().toLong(), - lineIndex = doc.getField("line_index").numericValue().toInt(), - isBaseBook = doc.getField("is_base_book")?.numericValue()?.toInt() == 1, - orderIndex = doc.getField("order_index")?.numericValue()?.toInt() ?: 999, - indexedRaw = doc.getField("text_raw")?.stringValue() ?: "" - ) - } - - // Get snippet sources: from provider if available, otherwise from index - val snippetSources: Map = if (snippetSourceProvider != null) { - val lineInfos = docMetas.map { LineSnippetInfo(it.lineId, it.bookId, it.lineIndex) } - snippetSourceProvider.getSnippetSources(lineInfos) - } else { - // Fallback to indexed text_raw - docMetas.associate { it.lineId to it.indexedRaw } - } - - val hits = docMetas.map { meta -> - val raw = snippetSources[meta.lineId] ?: meta.indexedRaw - val baseScore = meta.sd.score - - // Calculate boost: lower orderIndex = higher boost (only for base books) - val boostedScore = if (meta.isBaseBook) { - // Formula: boost = baseScore * (1 + (120 - orderIndex) / 60) - // orderIndex 1 gets ~3x boost, orderIndex 50 gets ~2.2x boost, orderIndex 100+ gets ~1.3x boost - val boostFactor = 1.0f + (120 - meta.orderIndex).coerceAtLeast(0) / 60.0f - baseScore * boostFactor - } else { - baseScore - } - - val snippet = buildSnippet(raw, anchorTerms, highlightTerms) - LineHit( - bookId = meta.bookId, - bookTitle = meta.bookTitle, - lineId = meta.lineId, - lineIndex = meta.lineIndex, - snippet = snippet, - score = boostedScore, - rawText = raw - ) - } - // Re-sort by boosted score (descending) - return hits.sortedByDescending { it.score } - } - - fun searchAllText(rawQuery: String, near: Int = 5, limit: Int, offset: Int = 0): List = - doSearch(rawQuery, near, limit, offset, bookFilter = null, categoryFilter = null) - - fun searchInBook(rawQuery: String, near: Int, bookId: Long, limit: Int, offset: Int = 0): List = - doSearch(rawQuery, near, limit, offset, bookFilter = bookId, categoryFilter = null) - - fun searchInCategory(rawQuery: String, near: Int, categoryId: Long, limit: Int, offset: Int = 0): List = - doSearch(rawQuery, near, limit, offset, bookFilter = null, categoryFilter = categoryId) - - fun searchInBooks(rawQuery: String, near: Int, bookIds: Collection, limit: Int, offset: Int = 0): List = - doSearchInBooks(rawQuery, near, limit, offset, bookIds) - - // --- Snippet building (public) --- - - /** - * Build an HTML snippet from raw line text by highlighting query terms. - * Uses StandardAnalyzer tokens; highlight is diacritic-agnostic and sofit-normalized. - */ - fun buildSnippetFromRaw(raw: String, rawQuery: String, near: Int): String { - val norm = normalizeHebrew(rawQuery) - if (norm.isBlank()) return Jsoup.clean(raw, Safelist.none()) - val rawClean = Jsoup.clean(raw, Safelist.none()) - val analyzedStd = (analyzeToTerms(stdAnalyzer, norm) ?: emptyList()) - val hasHashem = rawQuery.contains("ה׳") || rawQuery.contains("ה'") - val hashemTerms = if (hasHashem) loadHashemHighlightTerms() else emptyList() - val highlightTerms = filterTermsForHighlight( - analyzedStd + buildNgramTerms(analyzedStd, gram = 4) + hashemTerms - ) - val anchorTerms = buildAnchorTerms(norm, highlightTerms) - return buildSnippet(rawClean, anchorTerms, highlightTerms) - } - - private fun doSearch( - rawQuery: String, - near: Int, - limit: Int, - offset: Int, - bookFilter: Long?, - categoryFilter: Long? - ): List { - val context = buildSearchContext(rawQuery, near, bookFilter, categoryFilter, null, null) ?: return emptyList() - return withSearcher { searcher -> - val top = searcher.search(context.query, offset + limit) - val stored: StoredFields = searcher.storedFields() - val sliced = top.scoreDocs.drop(offset) - mapScoreDocs(stored, sliced, context.anchorTerms, context.highlightTerms) - } - } - - private fun doSearchInBooks( - rawQuery: String, - near: Int, - limit: Int, - offset: Int, - bookIds: Collection - ): List { - if (bookIds.isEmpty()) return emptyList() - val context = buildSearchContext(rawQuery, near, bookFilter = null, categoryFilter = null, bookIds = bookIds, lineIds = null) ?: return emptyList() - return withSearcher { searcher -> - val top = searcher.search(context.query, offset + limit) - val stored: StoredFields = searcher.storedFields() - val sliced = top.scoreDocs.drop(offset) - mapScoreDocs(stored, sliced, context.anchorTerms, context.highlightTerms) - } - } - - private fun analyzeToTerms(analyzer: Analyzer, text: String): List? = try { - val out = mutableListOf() - val ts: TokenStream = analyzer.tokenStream("text", text) - val termAtt = ts.addAttribute(CharTermAttribute::class.java) - ts.reset() - while (ts.incrementToken()) { - val t = termAtt.toString() - if (t.isNotBlank()) out += t - } - ts.end(); ts.close() - out - } catch (_: Exception) { null } - - /** - * Build an n-gram presence query that requires all 4-grams of the token - * to be present in field 'text_ng4'. Returns null when token < 4 chars. - */ - private fun buildNgramPresenceForToken(token: String): Query? { - if (token.length < 4) return null - val grams = mutableListOf() - var i = 0 - val L = token.length - while (i + 4 <= L) { - grams += token.substring(i, i + 4) - i += 1 - } - if (grams.isEmpty()) return null - val b = BooleanQuery.Builder() - for (g in grams.distinct()) { - b.add(TermQuery(Term("text_ng4", g)), BooleanClause.Occur.MUST) - } - return b.build() - } - - /** - * Presence filter (AND across tokens). For NEAR>0, each token may be satisfied by - * either exact term in 'text' OR by its 4-gram presence in 'text_ng4'. - */ - private fun buildPresenceFilterForTokens( - tokens: List, - near: Int, - expansionsByToken: Map> - ): Query? { - if (tokens.isEmpty()) return null - val outer = BooleanQuery.Builder() - for (t in tokens) { - val expansions = expansionsByToken[t] ?: emptyList() - val synonymTerms = buildLimitedTermsForToken(t, expansions) - val ngram = if (near > 0) buildNgramPresenceForToken(t) else null - val clause = BooleanQuery.Builder().apply { - // Add the original token - add(TermQuery(Term("text", t)), BooleanClause.Occur.SHOULD) - if (ngram != null) add(ngram, BooleanClause.Occur.SHOULD) - // Add capped set of expansion terms so we do not exceed Lucene's maxClauseCount. - for (term in synonymTerms) { - if (term != t) { // Avoid duplicating the original token - add(TermQuery(Term("text", term)), BooleanClause.Occur.SHOULD) - } - } - }.build() - outer.add(clause, BooleanClause.Occur.MUST) - } - return outer.build() - } - - private fun buildHebrewStdQuery(norm: String, near: Int): Query { - // Use standard Hebrew tokenizer at query time against field 'text' - val qb = QueryBuilder(stdAnalyzer) - val phrase = qb.createPhraseQuery("text", norm, near) - if (phrase != null) return phrase - val bool = qb.createBooleanQuery("text", norm, BooleanClause.Occur.MUST) - return bool ?: BooleanQuery.Builder().build() - } - - private fun buildMagicBoostQuery(expansions: List): Query? { - if (expansions.isEmpty()) return null - val surfaceTerms = LinkedHashSet() - val variantTerms = LinkedHashSet() - val baseTerms = LinkedHashSet() - for (exp in expansions) { - surfaceTerms.addAll(exp.surface) - variantTerms.addAll(exp.variants) - baseTerms.addAll(exp.base) - } - - val limitedSurfaces = surfaceTerms.take(MAX_SYNONYM_BOOST_TERMS) - val limitedVariants = variantTerms.take(MAX_SYNONYM_BOOST_TERMS) - val limitedBases = baseTerms.take(MAX_SYNONYM_BOOST_TERMS) - if (surfaceTerms.size > limitedSurfaces.size || - variantTerms.size > limitedVariants.size || - baseTerms.size > limitedBases.size - ) { - debugln { - "[DEBUG] Capped magic boost terms: " + - "surface=${surfaceTerms.size}->${limitedSurfaces.size}, " + - "variants=${variantTerms.size}->${limitedVariants.size}, " + - "base=${baseTerms.size}->${limitedBases.size}" - } - } - - val b = BooleanQuery.Builder() - // Reduced boosts to favor phrase matches over individual term matches - for (s in limitedSurfaces) { - b.add(BoostQuery(TermQuery(Term("text", s)), 2.0f), BooleanClause.Occur.SHOULD) - } - for (v in limitedVariants) { - b.add(BoostQuery(TermQuery(Term("text", v)), 1.5f), BooleanClause.Occur.SHOULD) - } - for (ba in limitedBases) { - b.add(BoostQuery(TermQuery(Term("text", ba)), 1.0f), BooleanClause.Occur.SHOULD) - } - return b.build() - } - - private fun buildSynonymBoostQuery(expansions: List): Query? { - if (expansions.isEmpty()) return null - val surfaceTerms = LinkedHashSet() - val variantTerms = LinkedHashSet() - val baseTerms = LinkedHashSet() - for (exp in expansions) { - surfaceTerms.addAll(exp.surface) - variantTerms.addAll(exp.variants) - baseTerms.addAll(exp.base) - } - - val limitedSurfaces = surfaceTerms.take(MAX_SYNONYM_BOOST_TERMS) - val limitedVariants = variantTerms.take(MAX_SYNONYM_BOOST_TERMS) - val limitedBases = baseTerms.take(MAX_SYNONYM_BOOST_TERMS) - if (surfaceTerms.size > limitedSurfaces.size || - variantTerms.size > limitedVariants.size || - baseTerms.size > limitedBases.size - ) { - debugln { - "[DEBUG] Capped synonym boost terms: " + - "surface=${surfaceTerms.size}->${limitedSurfaces.size}, " + - "variants=${variantTerms.size}->${limitedVariants.size}, " + - "base=${baseTerms.size}->${limitedBases.size}" - } - } - - val b = BooleanQuery.Builder() - for (s in limitedSurfaces) { - b.add(TermQuery(Term("text", s)), BooleanClause.Occur.SHOULD) - } - for (v in limitedVariants) { - b.add(TermQuery(Term("text", v)), BooleanClause.Occur.SHOULD) - } - for (ba in limitedBases) { - b.add(TermQuery(Term("text", ba)), BooleanClause.Occur.SHOULD) - } - return b.build() - } - - private fun buildSynonymPhrases( - tokens: List, - expansionsByToken: Map> - ): List> { - if (tokens.isEmpty()) return emptyList() - val termExpansions = buildTermAlternativesForTokens(tokens, expansionsByToken) - debugln { "[DEBUG] buildSynonymPhrases - termExpansions sizes: ${termExpansions.map { it.size }}" } - fun buildMultiPhrase(slop: Int): Query { - val builder = org.apache.lucene.search.MultiPhraseQuery.Builder() - builder.setSlop(slop) - var pos = 0 - for (alts in termExpansions) { - builder.add(alts.map { Term("text", it) }.toTypedArray(), pos) - pos++ - } - return builder.build() - } - return listOf( - buildMultiPhrase(0) to 50.0f, // Very high boost for exact phrase match - buildMultiPhrase(3) to 20.0f, // High boost for near phrase match (within 3 words) - buildMultiPhrase(8) to 5.0f // Lower boost for distant phrase match - ) - } - - /** - * Build a phrase query that treats each token as a synonym set of surface/variant/base. - * This allows a query token (e.g., הלך) to match a surface form (e.g., וילך) in a phrase with slop. - */ - private fun buildSynonymPhraseQuery( - tokens: List, - expansionsByToken: Map>, - near: Int - ): Query? { - if (tokens.isEmpty()) return null - val termExpansions = buildTermAlternativesForTokens(tokens, expansionsByToken) - val builder = org.apache.lucene.search.MultiPhraseQuery.Builder() - builder.setSlop(near) - var position = 0 - for (alts in termExpansions) { - builder.add(alts.map { Term("text", it) }.toTypedArray(), position) - position++ - } - return builder.build() - } - - private fun buildNgram4Query(norm: String): Query? { - // Build MUST query over 4-gram terms on field 'text_ng4' - val tokens = norm.split("\\s+".toRegex()).map { it.trim() }.filter { it.length >= 4 } - if (tokens.isEmpty()) return null - val grams = mutableListOf() - for (t in tokens) { - val L = t.length - var i = 0 - while (i + 4 <= L) { - grams += t.substring(i, i + 4) - i += 1 - } - } - val uniq = grams.distinct() - if (uniq.isEmpty()) return null - val b = BooleanQuery.Builder() - for (g in uniq) { - b.add(TermQuery(Term("text_ng4", g)), BooleanClause.Occur.MUST) - } - return b.build() - } - - private fun buildExpandedQuery( - norm: String, - near: Int, - tokens: List, - expansionsByToken: Map> - ): Query { - val base = buildHebrewStdQuery(norm, near) - val allExpansions = expansionsByToken.values.flatten() - val synonymPhrases = buildSynonymPhrases(tokens, expansionsByToken) - val ngram = buildNgram4Query(norm) - val fuzzy = buildFuzzyQuery(norm, near) - val builder = BooleanQuery.Builder() - builder.add(base, BooleanClause.Occur.SHOULD) - for ((query, boost) in synonymPhrases) { - builder.add(BoostQuery(query, boost), BooleanClause.Occur.SHOULD) - } - if (ngram != null) builder.add(ngram, BooleanClause.Occur.SHOULD) - if (fuzzy != null) builder.add(fuzzy, BooleanClause.Occur.SHOULD) - val magic = buildMagicBoostQuery(allExpansions) - if (magic != null) builder.add(magic, BooleanClause.Occur.SHOULD) - val synonymBoost = buildSynonymBoostQuery(allExpansions) - if (synonymBoost != null) builder.add(synonymBoost, BooleanClause.Occur.SHOULD) - return builder.build() - } - - private fun buildFuzzyQuery(norm: String, near: Int): Query? { - if (near == 0) return null - if (norm.length < 4) return null - val tokens = analyzeToTerms(stdAnalyzer, norm)?.filter { it.length >= 4 } ?: emptyList() - if (tokens.isEmpty()) return null - val b = BooleanQuery.Builder() - for (t in tokens.distinct()) { - // Add per-token fuzzy match on the main text field; require all tokens (MUST) - b.add(FuzzyQuery(Term("text", t), 1), BooleanClause.Occur.MUST) - } - return b.build() - } - - // Use only StandardAnalyzer + optional 4-gram - - private fun buildAnchorTerms(normQuery: String, analyzedTerms: List): List { - val qTokens = normQuery.split("\\s+".toRegex()) - .map { it.trim() } - .filter { it.isNotEmpty() } - val combined = (qTokens + analyzedTerms.map { it.trimEnd('$') }) - val filtered = filterTermsForHighlight(combined) - if (filtered.isNotEmpty()) return filtered - val qFiltered = filterTermsForHighlight(qTokens) - return qFiltered.ifEmpty { qTokens } - } - - private fun filterTermsForHighlight(terms: List): List { - if (terms.isEmpty()) return emptyList() - - fun useful(t: String): Boolean { - val s = t.trim() - if (s.isEmpty()) return false - // Drop single-letter tokens - if (s.length < 2) return false - // Must contain at least one letter or digit - if (s.none { it.isLetterOrDigit() }) return false - return true - } - return terms - .map { it.trim() } - .filter { useful(it) } - .distinct() - .sortedByDescending { it.length } - } - - private fun buildSnippet(raw: String, anchorTerms: List, highlightTerms: List, context: Int = 220): String { - if (raw.isEmpty()) return "" - // Strip diacritics (nikud + teamim) to align matching with normalized tokens, and keep a mapping to original indices - val (plain, mapToOrig) = stripDiacriticsWithMap(raw) - val hasDiacritics = plain.length != raw.length - val effContext = if (hasDiacritics) maxOf(context, 360) else context - // For matching only, normalize final letters in the plain text to base forms - val plainSearch = replaceFinalsWithBase(plain) - - // Find first anchor term found in the plain text - val plainIdx = anchorTerms.asSequence().mapNotNull { t -> - val i = plainSearch.indexOf(t) - if (i >= 0) i else null - }.firstOrNull() ?: 0 - - // Convert plain window to original indices - val plainLen = anchorTerms.firstOrNull()?.length ?: 0 - val plainStart = (plainIdx - effContext).coerceAtLeast(0) - val plainEnd = (plainIdx + plainLen + effContext).coerceAtMost(plain.length) - val origStart = mapToOrigIndex(mapToOrig, plainStart) - val origEnd = mapToOrigIndex(mapToOrig, plainEnd).coerceAtMost(raw.length) - - val base = raw.substring(origStart, origEnd) - // Compute basePlain and its map to baseOriginal-local indices - val basePlain = plain.substring(plainStart, plainEnd) - val basePlainSearch = replaceFinalsWithBase(basePlain) - val baseMap = IntArray(plainEnd - plainStart) { idx -> - (mapToOrig[plainStart + idx] - origStart).coerceIn(0, base.length.coerceAtLeast(1) - 1) - } - - // Build highlight intervals in original snippet coordinates using diacritic-agnostic matching - val pool = (highlightTerms + highlightTerms.map { it.trimEnd('$') }).distinct().filter { it.isNotBlank() } - val intervals = mutableListOf() - val basePlainLower = basePlainSearch.lowercase() - - // Helper to check if a character is a word boundary (whitespace or punctuation) - fun isWordBoundary(text: String, index: Int): Boolean { - if (index < 0 || index >= text.length) return true - val ch = text[index] - return ch.isWhitespace() || !ch.isLetterOrDigit() - } - - for (term in pool) { - if (term.isEmpty()) continue - val t = term.lowercase() - var from = 0 - while (from <= basePlainLower.length - t.length && t.isNotEmpty()) { - val idx = basePlainLower.indexOf(t, startIndex = from) - if (idx == -1) break - - // Check if this is a word-internal match for a short term - val isAtWordStart = isWordBoundary(basePlainLower, idx - 1) - val isAtWordEnd = isWordBoundary(basePlainLower, idx + t.length) - val isWholeWord = isAtWordStart && isAtWordEnd - // Only highlight whole-word matches to avoid mid-word highlights. - val shouldHighlight = isWholeWord - - if (shouldHighlight) { - val startOrig = mapToOrigIndex(baseMap, idx) - val endOrig = mapToOrigIndex(baseMap, (idx + t.length - 1)) + 1 - if (startOrig in 0 until endOrig && endOrig <= base.length) { - intervals += (startOrig until endOrig) - } - } - from = idx + t.length - } - } - val merged = mergeIntervals(intervals.sortedBy { it.first }) - var out = insertBoldTags(base, merged) - if (origStart > 0) out = "...$out" - if (origEnd < raw.length) out = "$out..." - return out - } - - private fun mapToOrigIndex(mapToOrig: IntArray, plainIndex: Int): Int { - if (mapToOrig.isEmpty()) return plainIndex - val idx = plainIndex.coerceIn(0, mapToOrig.size - 1) - return mapToOrig[idx] - } - - // Returns the string without nikud+teamim and an index map from plain index -> original index - private fun stripDiacriticsWithMap(src: String): Pair { - val nikudOrTeamim: (Char) -> Boolean = { c -> - (c.code in 0x0591..0x05AF) || // teamim - (c.code in 0x05B0..0x05BD) || // nikud + meteg - (c == '\u05C1') || (c == '\u05C2') || (c == '\u05C7') - } - val out = StringBuilder(src.length) - val map = ArrayList(src.length) - var i = 0 - while (i < src.length) { - val ch = src[i] - if (!nikudOrTeamim(ch)) { - out.append(ch) - map.add(i) - } - i++ - } - val arr = IntArray(map.size) { map[it] } - return out.toString() to arr - } - - private fun mergeIntervals(ranges: List): List { - if (ranges.isEmpty()) return ranges - val out = mutableListOf() - var cur = ranges[0] - for (i in 1 until ranges.size) { - val r = ranges[i] - if (r.first <= cur.last + 1) { - cur = cur.first .. maxOf(cur.last, r.last) - } else { - out += cur - cur = r - } - } - out += cur - return out - } - - private fun insertBoldTags(text: String, intervals: List): String { - if (intervals.isEmpty()) return text - val sb = StringBuilder(text) - // Insert from end to start to keep indices valid - for (r in intervals.asReversed()) { - val start = r.first.coerceIn(0, sb.length) - val end = (r.last + 1).coerceIn(0, sb.length) - if (end > start) { - sb.insert(end, "") - sb.insert(start, "") - } - } - return sb.toString() - } - - // --- Helpers --- - private fun buildNgramTerms(tokens: List, gram: Int = 4): List { - if (gram <= 0) return emptyList() - val out = mutableListOf() - tokens.forEach { t -> - val trimmed = t.trim() - if (trimmed.length >= gram) { - var i = 0 - while (i + gram <= trimmed.length) { - out += trimmed.substring(i, i + gram) - i += 1 - } - } - } - return out.distinct() - } - - - private fun normalizeHebrew(input: String): String { - if (input.isBlank()) return "" - var s = input.trim() - - // Remove biblical cantillation marks (teamim) U+0591–U+05AF - s = s.replace("[\u0591-\u05AF]".toRegex(), "") - // Remove nikud signs including meteg and qamatz qatan - s = s.replace("[\u05B0\u05B1\u05B2\u05B3\u05B4\u05B5\u05B6\u05B7\u05B8\u05B9\u05BB\u05BC\u05BD\u05C1\u05C2\u05C7]".toRegex(), "") - // Replace maqaf U+05BE with space - s = s.replace('\u05BE', ' ') - // Remove gershayim/geresh - s = s.replace("\u05F4", "").replace("\u05F3", "") - // Normalize Hebrew final letters (sofit) to base forms - s = replaceFinalsWithBase(s) - // Collapse whitespace - s = s.replace("\\s+".toRegex(), " ").trim() - return s - } - - private fun replaceFinalsWithBase(text: String): String = text - .replace('\u05DA', '\u05DB') // ך -> כ - .replace('\u05DD', '\u05DE') // ם -> מ - .replace('\u05DF', '\u05E0') // ן -> נ - .replace('\u05E3', '\u05E4') // ף -> פ - .replace('\u05E5', '\u05E6') // ץ -> צ - - // StandardAnalyzer only - - /** - * Build a capped list of alternative terms for a single token using dictionary expansions. - * The resulting list always includes the original token and its base forms (when present), - * followed by additional surface/variant forms up to MAX_SYNONYM_TERMS_PER_TOKEN. - */ - private fun buildLimitedTermsForToken( - token: String, - expansions: List - ): List { - if (expansions.isEmpty()) return listOf(token) - - val baseTerms = expansions.flatMap { it.base }.distinct() - val otherTerms = expansions.flatMap { it.surface + it.variants }.distinct() - - val ordered = LinkedHashSet() - if (token.isNotBlank()) { - ordered += token - } - baseTerms.forEach { ordered += it } - otherTerms.forEach { ordered += it } - - val totalSize = ordered.size - val limited = ordered.take(MAX_SYNONYM_TERMS_PER_TOKEN) - if (totalSize > limited.size) { - debugln { - "[DEBUG] Capped synonym terms for token '$token' from $totalSize to ${limited.size}" - } - } - return limited - } - - /** - * Build per-token synonym alternative lists for phrase queries, with per-token caps. - */ - private fun buildTermAlternativesForTokens( - tokens: List, - expansionsByToken: Map> - ): List> { - if (tokens.isEmpty()) return emptyList() - return tokens.map { token -> - val expansions = expansionsByToken[token] ?: emptyList() - buildLimitedTermsForToken(token, expansions) - } - } - - /** - * Load dictionary-based variants of the divine name using MagicDictionaryIndex. - * We pull all surface forms for base from the underlying SQLite DB and also add diacritic-stripped variants so highlighting - * matches the snippet text after nikud/teamim removal. - */ - private fun loadHashemHighlightTerms(): List { - val dict = magicDict ?: return emptyList() - val raw = dict.loadHashemSurfaces() - if (raw.isEmpty()) return emptyList() - - fun stripHebrewDiacritics(text: String): String { - if (text.isEmpty()) return text - val sb = StringBuilder(text.length) - for (ch in text) { - val code = ch.code - val isNikudOrTeamim = - (code in 0x0591..0x05AF) || // teamim - (code in 0x05B0..0x05BD) || // nikud + meteg - (ch == '\u05C1') || (ch == '\u05C2') || (ch == '\u05C7') - if (!isNikudOrTeamim) { - sb.append(ch) - } - } - return sb.toString() - } - - val terms = linkedSetOf() - raw.forEach { value -> - val trimmed = value.trim() - if (trimmed.isEmpty()) return@forEach - terms += trimmed - val stripped = stripHebrewDiacritics(trimmed).trim() - if (stripped.isNotEmpty()) terms += stripped - val normalized = normalizeHebrew(trimmed).trim() - if (normalized.isNotEmpty()) terms += normalized - } - - val out = terms.toList() - debugln { "[DEBUG] Hashem highlight terms from lexical DB: ${out.take(20)}..." } - return out - } -} diff --git a/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/search/MagicDictionary.kt b/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/search/MagicDictionary.kt deleted file mode 100644 index ce5f67eb..00000000 --- a/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/search/MagicDictionary.kt +++ /dev/null @@ -1,323 +0,0 @@ -package io.github.kdroidfilter.seforimapp.framework.search - -import java.nio.file.Files -import java.nio.file.Path -import java.sql.Connection -import java.sql.DriverManager -import java.sql.PreparedStatement -import io.github.kdroidfilter.seforimapp.logger.debugln - -/** - * Streaming dictionary index backed by SQLite (tables: surface, variant, base). - * Previously we loaded the entire dictionary into memory; now we stream lookups - * on-demand with a small LRU cache while preserving the exact expansion shape - * used by the search ranking logic. - */ -class MagicDictionaryIndex private constructor( - private val norm: (String) -> String, - private val dbFile: Path -) { - data class Expansion( - val surface: List, - val variants: List, - val base: List - ) - - private val url = "jdbc:sqlite:${dbFile.toAbsolutePath()}" - - /** - * Prepared statement per thread to avoid re-opening connections on every token. - */ - private val stmtProvider: ThreadLocal = ThreadLocal.withInitial { - val conn = DriverManager.getConnection(url).apply { - autoCommit = false - // Enforce read-only queries without altering connection flags post-open - createStatement().use { stmt -> stmt.execute("PRAGMA query_only=ON") } - } - LookupContext( - conn = conn, - stmt = conn.prepareStatement(LOOKUP_SQL) - ) - } - - /** - * Cache expansions per normalized token to avoid repeated DB hits. - */ - private val tokenCache = object : LinkedHashMap>(TOKEN_CACHE_SIZE, 0.75f, true) { - override fun removeEldestEntry(eldest: MutableMap.MutableEntry>?): Boolean = - size > TOKEN_CACHE_SIZE - } - - /** - * Cache fully-normalized expansions per base id so repeated hits to the same base - * avoid re-normalizing rows. - */ - private val baseCache = object : LinkedHashMap(BASE_CACHE_SIZE, 0.75f, true) { - override fun removeEldestEntry(eldest: MutableMap.MutableEntry?): Boolean = - size > BASE_CACHE_SIZE - } - - fun expansionsFor(tokens: List): List = - tokens.flatMap { expansionsForToken(it) }.distinct() - - fun expansionFor(token: String): Expansion? { - val expansions = expansionsForToken(token) - if (expansions.isEmpty()) return null - - val normalized = norm(token) - // Strategy: prefer the expansion whose base matches the token - val matchingBase = expansions.firstOrNull { exp -> - exp.base.any { it == normalized } - } - if (matchingBase != null) return matchingBase - - // Otherwise, prefer the largest expansion (more terms = more complete paradigm) - return expansions.maxByOrNull { it.surface.size } - } - - private fun expansionsForToken(token: String): List { - val normalized = norm(token) - if (normalized.isEmpty()) return emptyList() - - synchronized(tokenCache) { - tokenCache[normalized]?.let { return it } - } - - // Try raw, normalized, and final-form variants to match DB values. - val candidates = buildLookupCandidates(token, normalized) - val mergedByBase = LinkedHashMap() - - for (candidate in candidates) { - val fetched = fetchExpansions(candidate, normalized) - for ((baseId, exp) in fetched) { - val existing = mergedByBase[baseId] - if (existing == null) { - mergedByBase[baseId] = exp - } else { - val surfaces = (existing.surface + exp.surface).distinct() - val variants = (existing.variants + exp.variants).distinct() - val base = (existing.base + exp.base).distinct() - mergedByBase[baseId] = Expansion(surfaces, variants, base) - } - } - } - - val expansions = mergedByBase.values.toList() - synchronized(tokenCache) { - tokenCache[normalized] = expansions - } - return expansions - } - - /** - * Fetch expansions for a token. Returns a list of (baseId, Expansion) pairs so callers can merge by base id. - */ - private fun fetchExpansions(rawToken: String, normalizedToken: String): List> { - val expansions = mutableListOf>() - val ctx = stmtProvider.get() - - runCatching { - synchronized(ctx) { - repeat(3) { idx -> ctx.stmt.setString(idx + 1, rawToken) } - val rs = ctx.stmt.executeQuery() - val accum = mutableMapOf() - while (rs.next()) { - val baseId = rs.getLong("base_id") - val bucket = accum.getOrPut(baseId) { - BaseBucket( - baseRaw = rs.getString("base") ?: "", - surfaces = linkedSetOf(), - variants = linkedSetOf() - ) - } - rs.getString("surface")?.let { bucket.surfaces += it } - rs.getString("variant")?.let { bucket.variants += it } - } - - for ((baseId, bucket) in accum) { - val cached = synchronized(baseCache) { baseCache[baseId] } - if (cached != null) { - expansions += baseId to cached - continue - } - - val surfaceN = bucket.surfaces.mapNotNull { v -> norm(v).takeIf { it.isNotEmpty() } } - val variantsN = bucket.variants.mapNotNull { v -> norm(v).takeIf { it.isNotEmpty() } } - val baseN = norm(bucket.baseRaw).takeIf { it.isNotEmpty() } - ?: surfaceN.firstOrNull() - ?: normalizedToken - - val baseTerms = listOfNotNull(baseN.takeIf { it.isNotEmpty() }) - val allTerms = (surfaceN + variantsN + baseTerms).distinct() - if (allTerms.isEmpty()) continue - - val exp = Expansion( - surface = allTerms, - variants = emptyList(), - base = baseTerms - ) - - synchronized(baseCache) { - baseCache[baseId] = exp - } - expansions += baseId to exp - } - } - }.onFailure { - debugln { "[MagicDictionary] Failed to fetch expansions for '$rawToken' : ${it.message}" } - } - - return expansions - } - - companion object { - private const val TOKEN_CACHE_SIZE = 1024 - private const val BASE_CACHE_SIZE = 512 - - /** - * Load from SQLite DB (expected tables: surface(value, base_id), variant(value, surface_id), base(value)). - * Uses streaming lookup to avoid holding the entire dictionary in memory. - */ - fun load(norm: (String) -> String, candidate: Path?): MagicDictionaryIndex? { - val file = candidate?.takeIf { Files.isRegularFile(it) && hasRequiredTables(it) } ?: run { - if (candidate != null) { - debugln { "[MagicDictionary] Ignoring candidate $candidate because required tables are missing" } - } - return null - } - return runCatching { - // Validate DB is reachable - DriverManager.getConnection("jdbc:sqlite:${file.toAbsolutePath()}").use { conn -> - conn.createStatement().use { stmt -> - stmt.execute("SELECT 1") - } - } - debugln { "[MagicDictionary] Streaming lexical db from $file (lazy on-demand)" } - MagicDictionaryIndex(norm, file) - }.onFailure { - debugln { "[MagicDictionary] Failed to load from $file : ${it.message}" } - }.getOrNull() - } - - /** - * Find the first candidate path that exists and contains the required tables. - */ - fun findValidDictionary(candidates: List): Path? { - for (candidate in candidates) { - if (!Files.isRegularFile(candidate)) continue - if (hasRequiredTables(candidate)) { - debugln { "[MagicDictionary] Using validated lexical db at $candidate" } - return candidate - } else { - debugln { - "[MagicDictionary] Candidate $candidate is present but missing required tables; skipping" - } - } - } - return null - } - - private fun hasRequiredTables(file: Path): Boolean = runCatching { - DriverManager.getConnection("jdbc:sqlite:${file.toAbsolutePath()}").use { conn -> - val sql = """ - SELECT name FROM sqlite_master - WHERE type = 'table' AND name IN ('surface', 'variant', 'base', 'surface_variant') - """.trimIndent() - conn.createStatement().use { stmt -> - val rs = stmt.executeQuery(sql) - val names = mutableSetOf() - while (rs.next()) names += rs.getString("name") ?: "" - names.containsAll(listOf("surface", "variant", "base", "surface_variant")) - } - } - }.getOrElse { false } - - private const val LOOKUP_SQL = """ - WITH matches AS ( - SELECT s.base_id AS base_id FROM surface s WHERE s.value = ? - UNION - SELECT b.id FROM base b WHERE b.value = ? - UNION - SELECT s.base_id FROM variant v - JOIN surface_variant sv ON sv.variant_id = v.id - JOIN surface s ON sv.surface_id = s.id - WHERE v.value = ? - ) - SELECT b.id as base_id, - b.value as base, - s.value as surface, - v.value as variant - FROM base b - JOIN matches m ON m.base_id = b.id - LEFT JOIN surface s ON s.base_id = b.id - LEFT JOIN surface_variant sv ON sv.surface_id = s.id - LEFT JOIN variant v ON sv.variant_id = v.id - """ - } - - private data class LookupContext( - val conn: Connection, - val stmt: PreparedStatement - ) - - private data class BaseBucket( - val baseRaw: String, - val surfaces: MutableSet, - val variants: MutableSet - ) - - private fun buildLookupCandidates(rawToken: String, normalized: String): List { - val finalsMap = mapOf( - 'כ' to 'ך', - 'מ' to 'ם', - 'נ' to 'ן', - 'פ' to 'ף', - 'צ' to 'ץ' - ) - - fun applyFinalForm(t: String): String { - if (t.isEmpty()) return t - val last = t.last() - val final = finalsMap[last] ?: last - return if (final == last) t else t.dropLast(1) + final - } - - return listOf( - rawToken, - normalized, - applyFinalForm(rawToken), - applyFinalForm(normalized) - ).filter { it.isNotBlank() }.distinct() - } - - /** - * Load all surface forms whose base lemma directly from the underlying SQLite DB. - * This is used for snippet highlighting of Hashem names, independent of token-level expansions. - */ - fun loadHashemSurfaces(): List { - val terms = linkedSetOf() - runCatching { - DriverManager.getConnection(url).use { conn -> - val sql = """ - SELECT s.value AS surface - FROM surface s - JOIN base b ON s.base_id = b.id - WHERE b.value = 'יהוה' - """.trimIndent() - conn.createStatement().use { stmt -> - val rs = stmt.executeQuery(sql) - while (rs.next()) { - val v = rs.getString("surface") ?: continue - val trimmed = v.trim() - if (trimmed.isNotEmpty()) { - terms += trimmed - } - } - } - } - }.onFailure { - debugln { "[MagicDictionary] Failed to load Hashem surfaces: ${it.message}" } - } - return terms.toList() - } -} diff --git a/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/search/RepositorySnippetSourceProvider.kt b/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/search/RepositorySnippetSourceProvider.kt index e0291582..d1018189 100644 --- a/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/search/RepositorySnippetSourceProvider.kt +++ b/SeforimApp/src/jvmMain/kotlin/io/github/kdroidfilter/seforimapp/framework/search/RepositorySnippetSourceProvider.kt @@ -1,12 +1,14 @@ package io.github.kdroidfilter.seforimapp.framework.search import io.github.kdroidfilter.seforimlibrary.dao.repository.SeforimRepository +import io.github.kdroidfilter.seforimlibrary.search.LineSnippetInfo +import io.github.kdroidfilter.seforimlibrary.search.SnippetProvider import kotlinx.coroutines.runBlocking import org.jsoup.Jsoup import org.jsoup.safety.Safelist /** - * Implementation of [SnippetSourceProvider] that fetches line content from the database + * Implementation of [SnippetProvider] that fetches line content from the database * and reproduces the exact same snippet source logic as the indexer. * * This allows removing the text_raw field from the Lucene index to reduce index size, @@ -14,7 +16,7 @@ import org.jsoup.safety.Safelist */ class RepositorySnippetSourceProvider( private val repository: SeforimRepository -) : SnippetSourceProvider { +) : SnippetProvider { companion object { // Must match the indexer constants diff --git a/SeforimLibrary b/SeforimLibrary index 3d78db99..8961af5e 160000 --- a/SeforimLibrary +++ b/SeforimLibrary @@ -1 +1 @@ -Subproject commit 3d78db99843510963b726cf925235478a2cadf1a +Subproject commit 8961af5ed87ce9a88853d3945f6f5208681c7e8a diff --git a/pagination/src/commonMain/kotlin/io/github/kdroidfilter/seforimapp/pagination/CommentsForLineOrTocPagingSource.kt b/pagination/src/commonMain/kotlin/io/github/kdroidfilter/seforimapp/pagination/CommentsForLineOrTocPagingSource.kt index 8f071935..3943e800 100644 --- a/pagination/src/commonMain/kotlin/io/github/kdroidfilter/seforimapp/pagination/CommentsForLineOrTocPagingSource.kt +++ b/pagination/src/commonMain/kotlin/io/github/kdroidfilter/seforimapp/pagination/CommentsForLineOrTocPagingSource.kt @@ -58,8 +58,7 @@ class CommentsForLineOrTocPagingSource( activeCommentatorIds = commentatorIds, connectionTypes = setOf(ConnectionType.COMMENTARY), offset = offset, - limit = limit, - distinctByTargetLine = ids.size > 1 // deduplicate when in paragraph mode + limit = limit ) val prevKey = if (page == 0) null else page - 1 diff --git a/pagination/src/commonMain/kotlin/io/github/kdroidfilter/seforimapp/pagination/LineTargumPagingSource.kt b/pagination/src/commonMain/kotlin/io/github/kdroidfilter/seforimapp/pagination/LineTargumPagingSource.kt index b528503b..9bb541ad 100644 --- a/pagination/src/commonMain/kotlin/io/github/kdroidfilter/seforimapp/pagination/LineTargumPagingSource.kt +++ b/pagination/src/commonMain/kotlin/io/github/kdroidfilter/seforimapp/pagination/LineTargumPagingSource.kt @@ -45,8 +45,7 @@ class LineTargumPagingSource( activeCommentatorIds = sourceBookIds, // reuse filtering by target book IDs connectionTypes = connectionTypes, offset = offset, - limit = limit, - distinctByTargetLine = ids.size > 1 // deduplicate when in paragraph mode + limit = limit ) val prevKey = if (page == 0) null else page - 1 diff --git a/pagination/src/commonMain/kotlin/io/github/kdroidfilter/seforimapp/pagination/MultiLineCommentsPagingSource.kt b/pagination/src/commonMain/kotlin/io/github/kdroidfilter/seforimapp/pagination/MultiLineCommentsPagingSource.kt index 08fe496c..c082241f 100644 --- a/pagination/src/commonMain/kotlin/io/github/kdroidfilter/seforimapp/pagination/MultiLineCommentsPagingSource.kt +++ b/pagination/src/commonMain/kotlin/io/github/kdroidfilter/seforimapp/pagination/MultiLineCommentsPagingSource.kt @@ -29,8 +29,7 @@ class MultiLineCommentsPagingSource( activeCommentatorIds = commentatorIds, connectionTypes = setOf(io.github.kdroidfilter.seforimlibrary.core.models.ConnectionType.COMMENTARY), offset = offset, - limit = limit, - distinctByTargetLine = lineIds.size > 1 // deduplicate when multiple source lines + limit = limit ) val prevKey = if (page == 0) null else page - 1 From 2c975f6fcf3c9568a46e08e93b0faf31df1b0499 Mon Sep 17 00:00:00 2001 From: Elie Gambache Date: Sun, 18 Jan 2026 18:42:49 +0200 Subject: [PATCH 2/8] chore: update SeforimLibrary submodule - Include KDoc documentation for search interfaces - Use version catalog for jsoup dependency --- SeforimLibrary | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SeforimLibrary b/SeforimLibrary index 8961af5e..6fb3bde6 160000 --- a/SeforimLibrary +++ b/SeforimLibrary @@ -1 +1 @@ -Subproject commit 8961af5ed87ce9a88853d3945f6f5208681c7e8a +Subproject commit 6fb3bde6aef4dd3077b0c42693a8e3d2a9c1a53c From 793d891a9a48e4c5d3d82e3ea8376d4fa1ed3aec Mon Sep 17 00:00:00 2001 From: Elie Gambache Date: Sun, 18 Jan 2026 18:44:22 +0200 Subject: [PATCH 3/8] ci: add test job to PR workflow - Run SeforimLibrary tests before builds - Run SeforimApp JVM tests - Upload test reports as artifacts - Update SeforimLibrary submodule with CI workflow --- .github/workflows/ci.yaml | 35 +++++++++++++++++++++++++++++++++++ SeforimLibrary | 2 +- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 01d03117..b8a833c5 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -7,6 +7,41 @@ on: - main jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Checkout source (with submodules) + uses: actions/checkout@v6 + with: + fetch-depth: 0 + submodules: recursive + + - name: Set up JDK 21 + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: '21' + + - name: Setup Gradle + uses: gradle/gradle-build-action@v3 + + - name: Grant execute permission for gradlew + run: chmod +x gradlew + + - name: Run SeforimLibrary tests + run: ./gradlew :SeforimLibrary:test --no-daemon + + - name: Run SeforimApp tests + run: ./gradlew :SeforimApp:jvmTest --no-daemon + + - name: Upload test reports + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-reports + path: '**/build/reports/tests/' + retention-days: 7 + build: runs-on: ${{ matrix.os }} strategy: diff --git a/SeforimLibrary b/SeforimLibrary index 6fb3bde6..2b1a7abd 160000 --- a/SeforimLibrary +++ b/SeforimLibrary @@ -1 +1 @@ -Subproject commit 6fb3bde6aef4dd3077b0c42693a8e3d2a9c1a53c +Subproject commit 2b1a7abddd0735f64c4b7d3ce73b8561759bba80 From 4d208f8c363a15844288014cfd89fa04f8eeee15 Mon Sep 17 00:00:00 2001 From: Elie Gambache Date: Sun, 18 Jan 2026 18:48:10 +0200 Subject: [PATCH 4/8] chore: update SeforimLibrary submodule (CI fix) --- SeforimLibrary | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SeforimLibrary b/SeforimLibrary index 2b1a7abd..a2c2a195 160000 --- a/SeforimLibrary +++ b/SeforimLibrary @@ -1 +1 @@ -Subproject commit 2b1a7abddd0735f64c4b7d3ce73b8561759bba80 +Subproject commit a2c2a1958caddb147a621d7d80b2e93f21055c83 From 5d342f1565aef016d6caaa2bcafddb209a8a81e8 Mon Sep 17 00:00:00 2001 From: Elie Gambache Date: Sun, 18 Jan 2026 18:48:26 +0200 Subject: [PATCH 5/8] fix(ci): use allTests task for SeforimLibrary --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b8a833c5..d72738db 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -29,7 +29,7 @@ jobs: run: chmod +x gradlew - name: Run SeforimLibrary tests - run: ./gradlew :SeforimLibrary:test --no-daemon + run: ./gradlew :SeforimLibrary:allTests --no-daemon - name: Run SeforimApp tests run: ./gradlew :SeforimApp:jvmTest --no-daemon From 0d59f296e8c8d6df0955b2487d65249cef053301 Mon Sep 17 00:00:00 2001 From: Elie Gambache Date: Sun, 18 Jan 2026 18:54:53 +0200 Subject: [PATCH 6/8] fix(ci): use root allTests task for composite build --- .github/workflows/ci.yaml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d72738db..048271ba 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -28,11 +28,8 @@ jobs: - name: Grant execute permission for gradlew run: chmod +x gradlew - - name: Run SeforimLibrary tests - run: ./gradlew :SeforimLibrary:allTests --no-daemon - - - name: Run SeforimApp tests - run: ./gradlew :SeforimApp:jvmTest --no-daemon + - name: Run tests + run: ./gradlew allTests --no-daemon - name: Upload test reports if: always() From 34ac6e68dfc9910f88da13b051099d58bc2ef126 Mon Sep 17 00:00:00 2001 From: Elie Gambache Date: Sun, 18 Jan 2026 18:59:12 +0200 Subject: [PATCH 7/8] fix(ci): use JBR 25 instead of JDK 21 for tests --- .github/workflows/ci.yaml | 13 ++++++++----- SeforimLibrary | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 048271ba..cc117e77 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -16,11 +16,14 @@ jobs: fetch-depth: 0 submodules: recursive - - name: Set up JDK 21 - uses: actions/setup-java@v4 - with: - distribution: 'temurin' - java-version: '21' + - name: Install JBR 25 + run: | + curl -L -o jbr.tar.gz "https://cache-redirector.jetbrains.com/intellij-jbr/jbrsdk-25.0.1-linux-x64-b268.52.tar.gz" + mkdir -p "$RUNNER_TEMP/jbr" + tar -xzf jbr.tar.gz -C "$RUNNER_TEMP/jbr" + JBR_DIR=$(find "$RUNNER_TEMP/jbr" -mindepth 1 -maxdepth 1 -type d -name "jbr*" -o -name "jbrsdk*" | head -n 1) + echo "JAVA_HOME=$JBR_DIR" >> "$GITHUB_ENV" + echo "$JBR_DIR/bin" >> "$GITHUB_PATH" - name: Setup Gradle uses: gradle/gradle-build-action@v3 diff --git a/SeforimLibrary b/SeforimLibrary index a2c2a195..1f31f468 160000 --- a/SeforimLibrary +++ b/SeforimLibrary @@ -1 +1 @@ -Subproject commit a2c2a1958caddb147a621d7d80b2e93f21055c83 +Subproject commit 1f31f46815062a9e1e23ff27508066995c7f7273 From 41346b8b2b8dbc72bddd644383098ebd0601b7da Mon Sep 17 00:00:00 2001 From: Elie Gambache Date: Sun, 18 Jan 2026 19:09:15 +0200 Subject: [PATCH 8/8] fix(ci): run only SeforimApp tests (htmlparser has pre-existing failures) --- .github/workflows/ci.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index cc117e77..c27775a9 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -31,8 +31,8 @@ jobs: - name: Grant execute permission for gradlew run: chmod +x gradlew - - name: Run tests - run: ./gradlew allTests --no-daemon + - name: Run SeforimApp tests + run: ./gradlew :SeforimApp:jvmTest --no-daemon - name: Upload test reports if: always()