Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions internal/engine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ type Engine struct {
indexMgr IndexManager

// `library` is a "set" that stores documents names to avoid adding the same document multiple times.
library map[string]struct{}
library map[string]int

// `processor` is used to process the tokens before adding them to the index and before querying the index,
// it removes stop words and apply stemming and normalization to the tokens.
Expand All @@ -34,19 +34,26 @@ func NewEngine(processor textprocessing.Processor, delimiterManager *tokenizer.D
return &Engine{
docs: make([]*internal.Document, 0),
indexMgr: idxMgr,
library: make(map[string]struct{}),
library: make(map[string]int),
processor: processor,
delimiterManager: delimiterManager,
}
}

func (e *Engine) AddDocument(doc *internal.Document) {
if _, ok := e.library[doc.Name]; !ok {
doc.ID = e.GetNextDocID()
if _, ok := e.library[doc.GetFilePath()]; !ok {
logger.Info(EnginePrefix, "Adding document %s", doc.GetFilePath())

doc.ID = e.GetNextDocID()
e.docs = append(e.docs, doc)
e.library[doc.DirectoryPath] = struct{}{}
e.parseDocument(doc)
e.library[doc.GetFilePath()] = doc.ID
} else { // document already exists, update the document by removing it from the index and re-adding it
logger.Warn(EnginePrefix, "Updating document %s", doc.GetFilePath())

doc.ID = e.library[doc.GetFilePath()]
e.indexMgr.Remove(doc.ID)
}
e.parseDocument(doc)
}

func (e *Engine) ProcessToken(token string) string {
Expand Down
14 changes: 14 additions & 0 deletions internal/engine/index_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,20 @@ func (idx *IndexManager) PutSlice(key string, values []int) {
}
}

func (idx *IndexManager) Remove(docID int) {
logger.Info(IndexManagerPrefix, fmt.Sprintf("Removing document ID %d from index, A LINEAR SEARCH is performed.", docID))

numRemove := 0

for key := range idx.index {
if idx.index[key].Remove(docID) {
numRemove++
}
}

logger.Info(IndexManagerPrefix, fmt.Sprintf("Removed document ID %d, %d Keys are affected.", docID, numRemove))
}

func (idx *IndexManager) Get(key string) ordered.OrderedStructure[int] {
if _, ok := idx.index[key]; !ok {
logger.Warn(IndexManagerPrefix, fmt.Sprintf("Key %s not found in index", key))
Expand Down
20 changes: 18 additions & 2 deletions internal/structures/ordered/ordered_slice.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ func NewOrderedSliceWithSlice[Entry constraints.Integer](slice []Entry) OrderedS
return &OrderedSlice[Entry]{data: newSlice}
}

func (o *OrderedSlice[Entry]) InsertSorted(entry Entry) {
func (o *OrderedSlice[Entry]) InsertSorted(entry Entry) bool {
var idx = o.UpperBound(entry)

if idx-1 >= 0 && o.data[idx-1] == entry { // neglect duplicates
return
return false
}

o.data = append(o.data, entry)
Expand All @@ -45,6 +45,8 @@ func (o *OrderedSlice[Entry]) InsertSorted(entry Entry) {
for i := idx + 1; i < len(o.data); i++ {
o.data[i], swp = swp, o.data[i]
}

return true;
}

func (o *OrderedSlice[Entry]) BinarySearch(entry Entry) int {
Expand Down Expand Up @@ -194,3 +196,17 @@ func (s1 *OrderedSlice[Entry]) Union(s2 OrderedStructure[Entry]) OrderedStructur

return res
}

func (s *OrderedSlice[Entry]) Remove(entry Entry) bool {
if s == nil || s.IsEmpty() {
return false
}

var idx = s.BinarySearch(entry)
if idx == -1 {
return false
}

s.data = append(s.data[:idx], s.data[idx+1:]...)
return true
}
3 changes: 2 additions & 1 deletion internal/structures/ordered/ordered_structure.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ type SetOperations[Entry constraints.Ordered] interface {
type OrderedStructure[Entry constraints.Ordered] interface {
SetOperations[Entry]

InsertSorted(Entry)
InsertSorted(Entry) bool
Remove(Entry) bool

GetLength() int
IsEmpty() bool
Expand Down
66 changes: 51 additions & 15 deletions internal/structures/ordered/skip_pointer_list.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,23 +84,21 @@ func (s *SkipPointerList[Entry]) UpdateSkipPointers() {
}
}

func (s *SkipPointerList[Entry]) InsertSorted(entry Entry) {
func (s *SkipPointerList[Entry]) InsertSorted(entry Entry) bool {
newNode := &SkipNode[Entry]{entry: entry}
if s.head == nil {
s.head = newNode
s.tail = newNode
s.size++
return
return true
}

if s.head.entry > entry {
s.pushFront(entry)
return
return s.pushFront(entry)
}

if s.tail.entry < entry {
s.pushBack(entry)
return
return s.pushBack(entry)
}

curr := s.head
Expand All @@ -114,7 +112,7 @@ func (s *SkipPointerList[Entry]) InsertSorted(entry Entry) {

// neglect duplicates
if curr.next != nil && curr.next.entry == entry {
return
return false
}

newNode.next = curr.next
Expand All @@ -129,6 +127,8 @@ func (s *SkipPointerList[Entry]) InsertSorted(entry Entry) {
if s.size >= (s.currBlockSize+1)*(s.currBlockSize+1) {
s.UpdateSkipPointers()
}

return true
}

func (s *SkipPointerList[Entry]) GetLength() int {
Expand All @@ -141,42 +141,44 @@ func (s *SkipPointerList[Entry]) IsEmpty() bool {

// Used internally for `SetOperations` functions,
// where it is guaranteed that the insertion is always sorted and added to the end
func (s *SkipPointerList[Entry]) pushBack(entry Entry) {
func (s *SkipPointerList[Entry]) pushBack(entry Entry) bool {
if s.IsEmpty() {
s.InsertSorted(entry)
return
return s.InsertSorted(entry)
}

if s.tail.entry > entry {
panic("[SkipPointer]: Cannot push back an entry that is less than the tail")
}

if s.tail.entry == entry {
return
return false
}

s.tail.next = &SkipNode[Entry]{entry: entry}
s.tail = s.tail.next
s.size++

return true
}

func (s *SkipPointerList[Entry]) pushFront(entry Entry) {
func (s *SkipPointerList[Entry]) pushFront(entry Entry) bool {
if s.IsEmpty() {
s.InsertSorted(entry)
return
return s.InsertSorted(entry)
}

if s.head.entry < entry {
panic("[SkipPointer]: Cannot push front an entry that is greater than the head")
}

if s.head.entry == entry {
return
return false
}

newNode := &SkipNode[Entry]{entry: entry, next: s.head}
s.head = newNode
s.size++

return true
}

func (s *SkipPointerList[Entry]) At(index int) Entry {
Expand Down Expand Up @@ -285,3 +287,37 @@ func (s1 *SkipPointerList[Entry]) Union(s2 OrderedStructure[Entry]) OrderedStruc

return res
}

func (s *SkipPointerList[Entry]) Remove(entry Entry) bool {
if s.IsEmpty() {
return false
}

if s.head.entry == entry {
s.head = s.head.next
s.size--
return true
}

curr := s.head
for curr.next != nil && curr.next.entry < entry {
if curr.skip != nil && curr.skip.entry < entry {
curr = curr.skip
} else {
curr = curr.next
}
}

if curr.next == nil || curr.next.entry != entry {
return false
}

curr.next = curr.next.next
s.size--

if curr.next == nil {
s.tail = curr
}

return true
}
Loading