@@ -16,19 +16,25 @@ import (
1616 "github.com/AvengeMedia/danksearch/internal/errdefs"
1717 "github.com/AvengeMedia/danksearch/internal/log"
1818 bleve "github.com/blevesearch/bleve/v2"
19+ _ "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
20+ _ "github.com/blevesearch/bleve/v2/analysis/token/edgengram"
21+ _ "github.com/blevesearch/bleve/v2/analysis/token/lowercase"
22+ _ "github.com/blevesearch/bleve/v2/analysis/token/ngram"
23+ _ "github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
1924 "github.com/blevesearch/bleve/v2/mapping"
2025 query "github.com/blevesearch/bleve/v2/search/query"
2126)
2227
2328type Document struct {
24- Path string `json:"path"`
25- Filename string `json:"filename"`
26- Title string `json:"title"`
27- Body string `json:"body"`
28- ContentType string `json:"content_type"`
29- ModTime time.Time `json:"mtime"`
30- Size int64 `json:"size"`
31- Hash string `json:"hash"`
29+ Path string `json:"path"`
30+ Filename string `json:"filename"`
31+ FilenameSub string `json:"filename_sub"`
32+ FilenamePrefix string `json:"filename_prefix"`
33+ Body string `json:"body"`
34+ ContentType string `json:"content_type"`
35+ ModTime time.Time `json:"mtime"`
36+ Size int64 `json:"size"`
37+ Hash string `json:"hash"`
3238}
3339
3440type Indexer struct {
@@ -111,6 +117,49 @@ func getStoreConfig() map[string]interface{} {
111117
112118func buildIndexMapping () mapping.IndexMapping {
113119 m := bleve .NewIndexMapping ()
120+
121+ err := m .AddCustomTokenFilter ("ngram_2_15" , map [string ]interface {}{
122+ "type" : "ngram" ,
123+ "min" : float64 (2 ),
124+ "max" : float64 (15 ),
125+ })
126+ if err != nil {
127+ panic (err )
128+ }
129+
130+ err = m .AddCustomTokenFilter ("edge_ngram_2_30" , map [string ]interface {}{
131+ "type" : "edge_ngram" ,
132+ "min" : float64 (2 ),
133+ "max" : float64 (30 ),
134+ })
135+ if err != nil {
136+ panic (err )
137+ }
138+
139+ err = m .AddCustomAnalyzer ("filename_ngram" , map [string ]interface {}{
140+ "type" : "custom" ,
141+ "tokenizer" : "single" ,
142+ "token_filters" : []string {
143+ "to_lower" ,
144+ "ngram_2_15" ,
145+ },
146+ })
147+ if err != nil {
148+ panic (err )
149+ }
150+
151+ err = m .AddCustomAnalyzer ("filename_edge" , map [string ]interface {}{
152+ "type" : "custom" ,
153+ "tokenizer" : "single" ,
154+ "token_filters" : []string {
155+ "to_lower" ,
156+ "edge_ngram_2_30" ,
157+ },
158+ })
159+ if err != nil {
160+ panic (err )
161+ }
162+
114163 docMapping := bleve .NewDocumentMapping ()
115164
116165 pathField := bleve .NewTextFieldMapping ()
@@ -120,14 +169,18 @@ func buildIndexMapping() mapping.IndexMapping {
120169
121170 filenameField := bleve .NewTextFieldMapping ()
122171 filenameField .Store = true
123- filenameField .IncludeTermVectors = true
124172 filenameField .Analyzer = "keyword"
125173 docMapping .AddFieldMappingsAt ("filename" , filenameField )
126174
127- titleField := bleve .NewTextFieldMapping ()
128- titleField .Store = true
129- titleField .IncludeTermVectors = true
130- docMapping .AddFieldMappingsAt ("title" , titleField )
175+ filenameSubField := bleve .NewTextFieldMapping ()
176+ filenameSubField .Store = false
177+ filenameSubField .Analyzer = "filename_ngram"
178+ docMapping .AddFieldMappingsAt ("filename_sub" , filenameSubField )
179+
180+ filenamePrefixField := bleve .NewTextFieldMapping ()
181+ filenamePrefixField .Store = false
182+ filenamePrefixField .Analyzer = "filename_edge"
183+ docMapping .AddFieldMappingsAt ("filename_prefix" , filenamePrefixField )
131184
132185 bodyField := bleve .NewTextFieldMapping ()
133186 bodyField .Store = false
@@ -203,12 +256,13 @@ func (i *Indexer) readDocument(path string, info os.FileInfo) (*Document, error)
203256 }
204257
205258 doc := & Document {
206- Path : path ,
207- Filename : filename ,
208- Title : filename ,
209- ContentType : contentType ,
210- ModTime : info .ModTime (),
211- Size : info .Size (),
259+ Path : path ,
260+ Filename : filename ,
261+ FilenameSub : filename ,
262+ FilenamePrefix : filename ,
263+ ContentType : contentType ,
264+ ModTime : info .ModTime (),
265+ Size : info .Size (),
212266 }
213267
214268 if i .config .IsTextFile (path ) {
@@ -266,37 +320,17 @@ func (i *Indexer) SearchWithOptions(opts *SearchOptions) (*bleve.SearchResult, e
266320 // Build the main query
267321 var mainQuery query.Query
268322
269- // Special case: match all
270323 if opts .Query == "*" {
271324 mainQuery = bleve .NewMatchAllQuery ()
272325 } else if opts .Field != "" {
273- // Field-specific search
274326 mainQuery = i .buildFieldQuery (opts .Query , opts .Field , opts .Fuzzy )
275327 } else {
276- // Search across all fields with boosting
277- queryLower := strings .ToLower (opts .Query )
278- filenamePattern := "*" + queryLower + "*"
279-
280- filenameQuery := bleve .NewWildcardQuery (filenamePattern )
281- filenameQuery .SetField ("filename" )
282- filenameQuery .SetBoost (10.0 )
283-
284- titleQuery := bleve .NewWildcardQuery (filenamePattern )
285- titleQuery .SetField ("title" )
286- titleQuery .SetBoost (5.0 )
287-
328+ filenameQuery := i .buildFilenameQuery (opts .Query , 20.0 , 10.0 )
288329 bodyQuery := bleve .NewMatchQuery (opts .Query )
289330 bodyQuery .SetField ("body" )
290331 bodyQuery .SetBoost (1.0 )
291332
292- if opts .Fuzzy {
293- fuzzyBodyQuery := bleve .NewFuzzyQuery (opts .Query )
294- fuzzyBodyQuery .SetField ("body" )
295- fuzzyBodyQuery .SetBoost (0.5 )
296- mainQuery = bleve .NewDisjunctionQuery (filenameQuery , titleQuery , bodyQuery , fuzzyBodyQuery )
297- } else {
298- mainQuery = bleve .NewDisjunctionQuery (filenameQuery , titleQuery , bodyQuery )
299- }
333+ mainQuery = bleve .NewDisjunctionQuery (filenameQuery , bodyQuery )
300334 }
301335
302336 // Build filters
@@ -393,32 +427,51 @@ func (i *Indexer) SearchWithOptions(opts *SearchOptions) (*bleve.SearchResult, e
393427 return result , nil
394428}
395429
430+ func (i * Indexer ) buildFilenameQuery (queryStr string , boostPrefix , boostContains float64 ) query.Query {
431+ q := strings .TrimSpace (queryStr )
432+ if q == "" {
433+ return bleve .NewMatchNoneQuery ()
434+ }
435+
436+ disj := bleve .NewDisjunctionQuery ()
437+
438+ prefixQuery := bleve .NewPrefixQuery (strings .ToLower (q ))
439+ prefixQuery .SetField ("filename_prefix" )
440+ prefixQuery .SetBoost (boostPrefix )
441+ disj .AddQuery (prefixQuery )
442+
443+ if len (q ) >= 2 {
444+ matchQuery := bleve .NewMatchQuery (q )
445+ matchQuery .SetField ("filename_sub" )
446+ matchQuery .SetBoost (boostContains )
447+ disj .AddQuery (matchQuery )
448+ }
449+
450+ if len (disj .Disjuncts ) == 1 {
451+ return disj .Disjuncts [0 ]
452+ }
453+ return disj
454+ }
455+
396456func (i * Indexer ) buildFieldQuery (queryStr , field string , fuzzy bool ) query.Query {
397- queryLower := strings .ToLower (queryStr )
457+ if field == "filename" {
458+ return i .buildFilenameQuery (queryStr , 2.0 , 1.0 )
459+ }
398460
399- switch field {
400- case "filename" , "title" :
401- pattern := "*" + queryLower + "*"
402- q := bleve .NewWildcardQuery (pattern )
403- q .SetField (field )
404- return q
405- case "body" :
461+ if field == "body" {
406462 if fuzzy {
407463 q := bleve .NewFuzzyQuery (queryStr )
408464 q .SetField ("body" )
409465 return q
410466 }
411- // Use match query - searches for all words in the query
412- // Note: Special characters like //, !, etc. are normalized by the analyzer
413467 q := bleve .NewMatchQuery (queryStr )
414468 q .SetField ("body" )
415469 return q
416- default :
417- // Fallback to match query
418- q := bleve .NewMatchQuery (queryStr )
419- q .SetField (field )
420- return q
421470 }
471+
472+ q := bleve .NewMatchQuery (queryStr )
473+ q .SetField (field )
474+ return q
422475}
423476
424477func (i * Indexer ) ReindexAll () error {
0 commit comments