From be74fb3570cf499cc02737f0f5846fcd04ad3825 Mon Sep 17 00:00:00 2001
From: zaidbinusama <54680765+zaidbinusama@users.noreply.github.com>
Date: Wed, 17 May 2023 13:48:39 +0500
Subject: [PATCH 1/2] made fixes to author updation when new file added

---
 front.py |  41 ++++++-------
 main.py  | 179 +++++++++++++++++++++++++++++--------------------------
 2 files changed, 112 insertions(+), 108 deletions(-)

diff --git a/front.py b/front.py
index 604bc4d..f074c18 100644
--- a/front.py
+++ b/front.py
@@ -1,12 +1,3 @@
-'''
-DSA PROJECT MADE BY:
-1. SAIF ALI (336561)
-2. HASEEB MAHMOOD (334718)
-3. TALHA MAJEED KHAN (336254)
-4. ALI USMAN BUTT (335520)
-
-PRESENTED TO: SIR FAISAL SHAFAIT
-'''
 
 # Importing external dependencies.
 import collections
@@ -31,7 +22,6 @@
 key = int(fp_temp.readline())
 docid = int(fp_temp.readline())
 fp_temp.close()
-
 fp_a = open("author.json", "r")
 author_dictionary = json.load(fp_a)
 fp_a.close()
@@ -288,7 +278,7 @@ def searching(search, lis):
                     if len(mwq[docid][0]) != 0:
                         proximity[docid] = mwq[docid][0]
             if len(common_title) > 0:
-                print("\n------------TITLE OCCURRENCES-----------------\n")
+                # print("\n------------TITLE OCCURRENCES-----------------\n")
                 count = 0
                 for t in common_title:
                     if t is None:
@@ -300,7 +290,7 @@ def searching(search, lis):
                             break
 
             if len(common_set) == 0:
-                print("No such combination of words exist in the database")
+                # print("No such combination of words exist in the database")
                 for wordids in wordslist:  # if there is no word from query in title or in close proximity or common in some document
                     sth = singlewordwithid(wordids)
                     counter = 0
@@ -314,14 +304,14 @@ def searching(search, lis):
                         continue
             else:  # printing words in close proximity first and the ones in common set later
                 if len(proximity) != 0:
-                    print("\n------------CLOSE PROXIMITY OCCURRENCES-----------------\n")
+                    # print("\n------------CLOSE PROXIMITY OCCURRENCES-----------------\n")
                     var = 0
                     for val in proximity.keys():
                         lis.append(url_dic[f"{val}"])
                         var +=1
                         if var == 30:
                             break
-                    print("\n------------OTHER OCCURRENCES---------------------------\n")
+                    # print("\n------------OTHER OCCURRENCES---------------------------\n")
                     var = 0
                     for value in common_set:
                         if value not in proximity.keys():
@@ -330,7 +320,7 @@ def searching(search, lis):
                             if var == 30:
                                 break
                 else:
-                    print("\n------------OTHER OCCURRENCES---------------------------\n")
+                    # print("\n------------OTHER OCCURRENCES---------------------------\n")
                     var = 0
                     for value in common_set:
                         if value not in proximity.keys():
@@ -360,14 +350,14 @@ def searching(search, lis):
         counter = 0
         if len(ts) > 0 or len(sws) > 0:
             if len(ts) > 0:
-                print("\n------------TITLE OCCURRENCES-----------------\n")
+                # print("\n------------TITLE OCCURRENCES-----------------\n")
                 for t in ts:
                     lis.append(url_dic[f"{t}"])
                     counter += 1
                     if counter == 30:
                         break
             if len(sws) > 0:
-                print("\n------------OTHER OCCURRENCES-----------------\n")
+                # print("\n------------OTHER OCCURRENCES-----------------\n")
                 counter = 0
                 for key in sws.keys():
                     lis.append(url_dic[f"{key}"])
@@ -434,7 +424,12 @@ def updateall(filetoadd, key, docid, fp_filenames):
             author_tokens = word_tokenize(y[i]["author"])   #updating authors
             author_tokens = [w.lower() for w in author_tokens]
             author = "".join(author_tokens)
-            author_dictionary[f'{author}'].append(docid)
+            if author not in author_dictionary:
+                author_dictionary[f"{author}"] = [docid]
+            elif author in author_dictionary:
+                author_docids = author_dictionary[f"{author}"]
+                author_docids.append(docid)
+                author_dictionary[f"{author}"] = author_docids
             title_tokens = word_tokenize(y[i]["title"])
             word_tokens = [w.lower() for w in word_tokens]
             table = str.maketrans('', '', string.punctuation)
@@ -511,12 +506,12 @@ def updateall(filetoadd, key, docid, fp_filenames):
 root.maxsize(800, 500)
 lis = []
 user_query = tk.StringVar()
-logo_path = tk.PhotoImage(file="BG.ppm")
-logo = Label(root, image=logo_path).pack()
-button_font = font.Font(family='Arial', size=8)
+# logo_path = tk.PhotoImage(file="BG.ppm")
+# logo = Label(root, image=logo_path).pack()
+button_font = font.Font(family='Calibri', size=8)
 text_entry = tk.Entry(root, textvariable=user_query, width=55, bg='#C0C0C0').place(x=230, y=120)
-search_button = tk.Button(root, text="search", font=button_font, padx=1, pady=1, command=getquery).place(x=375, y=150)
-add_file_button = tk.Button(root, text="add file", font=button_font, padx=4, pady=2, command=getFile).place(x=740, y=10)
+search_button = tk.Button(root, text="Search", font=button_font, padx=1, pady=1, command=getquery).place(x=375, y=150)
+add_file_button = tk.Button(root, text="Add File", font=button_font, padx=4, pady=2, command=getFile).place(x=740, y=10)
 scframe = VerticalScrolledFrame(root)
 scframe.pack(side='bottom', pady=30)
 
diff --git a/main.py b/main.py
index db4cec3..702f365 100644
--- a/main.py
+++ b/main.py
@@ -74,10 +74,18 @@ def check_if_string_in_file(file_name, string_to_search):
     fi_dictionary = defaultdict(list)
     ii_dictionary = multi_dict(2, list)
     title_dictionary = defaultdict(list)
-fp_temp = open("storage.txt", "r")
-key = int(fp_temp.readline())
-docid = int(fp_temp.readline())
-fp_temp.close()
+if os.path.isfile("storage.txt"):
+    fp_temp = open("storage.txt", "r")
+    try:
+        key = int(fp_temp.readline())
+        docid = int(fp_temp.readline())
+    except:
+        key = 0
+        docid = 0
+    fp_temp.close()
+else:
+    key = 0
+    docid = 0
 
 # making of lexicon
 for fname in glob.glob("newsdata/*.json"):
@@ -90,94 +98,95 @@ def check_if_string_in_file(file_name, string_to_search):
             fp_filenames.write("\n")
             y = json.load(fp)
             for i in range(len(y)):
-                # position = 0
-                # word_tokens = word_tokenize(y[i]["content"])
-                # url_dictionary[f"{docid}"] = y[i]["url"]
+                position = 0
+                word_tokens = word_tokenize(y[i]["content"])
+                url_dictionary[f"{docid}"] = y[i]["url"]
 
                 author_tokens = word_tokenize(y[i]["author"])
                 author_tokens = [w.lower() for w in author_tokens]
                 author = "".join(author_tokens)
                 author_dictionary[f'{author}'].append(docid)
 
-                # title_tokens = word_tokenize(y[i]["title"])
-                #
-                # word_tokens = [w.lower() for w in word_tokens]
-                # table = str.maketrans('', '', string.punctuation)
-                # strip = [w.translate(table) for w in word_tokens]
-                #
-                # title_tokens = [t.lower() for t in title_tokens]
-                # title_table = str.maketrans('', '', string.punctuation)
-                # title_strip = [t.translate(title_table) for t in title_tokens]
-
-#                 for w in strip:
-#                     if w.isalpha() and w not in stop_words:
-#                         x = snow_stemmer.stem(w)
-#                         if x not in lex_dictionary:
-#                             # making of lexicon
-#                             lex_dictionary[x] = key
-#                             key += 1
-#                         if x in lex_dictionary:  # making of inverted index
-#                             if str(lex_dictionary[x]) not in ii_dictionary:
-#                                 # wordid does not exist
-#                                 ii_dictionary.update({f"{lex_dictionary[x]}": {}})
-#                             if str(lex_dictionary[x]) in ii_dictionary:
-#                                 if str(docid) not in ii_dictionary[f"{lex_dictionary[x]}"]:
-#                                     # wordid exists but doc id doesnt
-#                                     ii_dictionary[f"{lex_dictionary[x]}"][f"{docid}"] = [position]
-#                                 elif str(docid) in ii_dictionary[f"{lex_dictionary[x]}"]:
-#                                     # wordid and docid exists
-#                                     temp_list = [ii_dictionary[f"{lex_dictionary[x]}"][f"{docid}"]]
-#                                     temp_list = list(flatten(temp_list))
-#                                     temp_list.append(position)
-#                                     ii_dictionary[f"{lex_dictionary[x]}"][f"{docid}"] = temp_list
-#                             if str(docid) not in fi_dictionary:
-#                                 # making of forward index
-#                                 fi_dictionary[f"{docid}"] = [lex_dictionary[x]]
-#                             if str(docid) in fi_dictionary:
-#                                 if lex_dictionary[x] not in fi_dictionary[f"{docid}"]:
-#                                     fi_dictionary[f"{docid}"].append(lex_dictionary[x])
-#                         position += 1
-#                 for t in title_strip:
-#                     if t.isalpha() and t not in stop_words:
-#                         t = snow_stemmer.stem(t)
-#                         if t not in lex_dictionary:
-#                             lex_dictionary[t] = key
-#                             key += 1
-#                         if t in lex_dictionary:
-#                             if str(lex_dictionary[t]) not in title_dictionary:
-#                                 title_dictionary[f"{lex_dictionary[t]}"] = [docid]
-#                             if str(lex_dictionary[t]) in title_dictionary:
-#                                 if docid not in title_dictionary[f"{lex_dictionary[t]}"]:
-#                                     title_dictionary[f"{lex_dictionary[t]}"].append(docid)
+                title_tokens = word_tokenize(y[i]["title"])
+                
+                word_tokens = [w.lower() for w in word_tokens]
+                table = str.maketrans('', '', string.punctuation)
+                strip = [w.translate(table) for w in word_tokens]
+                
+                title_tokens = [t.lower() for t in title_tokens]
+                title_table = str.maketrans('', '', string.punctuation)
+                title_strip = [t.translate(title_table) for t in title_tokens]
+                for w in strip:
+                    if w.isalpha() and w not in stop_words:
+                        x = snow_stemmer.stem(w)
+                        if x not in lex_dictionary:
+                            # making of lexicon
+                            lex_dictionary[x] = key
+                            key += 1
+                        if x in lex_dictionary:  # making of inverted index
+                            if str(lex_dictionary[x]) not in ii_dictionary:
+                                ii_dictionary.update({f"{lex_dictionary[x]}": {}})
+                                ii_dictionary.update({f"{lex_dictionary[x]}": {}})
+                            if str(lex_dictionary[x]) in ii_dictionary:
+                                # wordid does not exist
+                                if str(docid) not in ii_dictionary[f"{lex_dictionary[x]}"]:
+                                    # wordid exists but doc id doesnt
+                                    ii_dictionary[f"{lex_dictionary[x]}"][f"{docid}"] = [position]
+                                elif str(docid) in ii_dictionary[f"{lex_dictionary[x]}"]:
+                                    # wordid and docid exists
+                                    temp_list = [ii_dictionary[f"{lex_dictionary[x]}"][f"{docid}"]]
+                                    temp_list = list(flatten(temp_list))
+                                    temp_list.append(position)
+                                    ii_dictionary[f"{lex_dictionary[x]}"][f"{docid}"] = temp_list
+                            if str(docid) not in fi_dictionary:
+                                # making of forward index                              fi_dictionary[f"{docid}"] = [lex_dictionary[x]]                          if str(docid) in fi_dictionary:
+                                if lex_dictionary[x] not in fi_dictionary[f"{docid}"]:
+                                    fi_dictionary[f"{docid}"].append(lex_dictionary[x])
+                        position += 1
+                for t in title_strip:
+                    if t.isalpha() and t not in stop_words:
+                        t = snow_stemmer.stem(t)
+                        if t not in lex_dictionary:
+                            lex_dictionary[t] = key
+                            key += 1
+                        if t in lex_dictionary:
+                            if str(lex_dictionary[t]) not in title_dictionary:
+                                title_dictionary[f"{lex_dictionary[t]}"] = [docid]
+                            if str(lex_dictionary[t]) in title_dictionary:
+                                if docid not in title_dictionary[f"{lex_dictionary[t]}"]:
+                                    title_dictionary[f"{lex_dictionary[t]}"].append(docid)
                 docid = docid + 1
-#             print(fp.name)
-#             print(docid)
-#             write = 1
-#         else:
-#             continue
-#
-# fp_temp = open("storage.txt", "w")
-# fp_temp.write(str(key))
-# fp_temp.write("\n")
-# fp_temp.write(str(docid))
-# fp_temp.close()
-# if write == 1:
-#     fp_fi = open("forwardindex.json", "w")
-#     fp_ii = open("invertedindex.json", "w")
-#     fp_lex = open("lexicon.json", "w")
-#     fp_url = open("urls.json", "w")
-#     fp_ti = open("titleinverted.json", "w")
-#     json.dump(lex_dictionary, fp_lex)
-#     json.dump(fi_dictionary, fp_fi)
-#     json.dump(ii_dictionary, fp_ii)
-#     json.dump(url_dictionary, fp_url)
-#     json.dump(title_dictionary, fp_ti)
-#     fp_url.close()
-#     fp_ii.close()
-#     fp_lex.close()
-#     fp_ti.close()
-#
-# fp_fi.close()
+            print(fp.name)
+            print(docid)
+            write = 1
+        else:
+            continue
+
+fp_temp = open("storage.txt", "w")
+fp_temp.write(str(key))
+fp_temp.write("\n")
+fp_temp.write(str(docid))
+fp_temp.close()
+if write == 1:
+    fp_fi = open("forwardindex.json", "w")
+    fp_ii = open("invertedindex.json", "w")
+    fp_lex = open("lexicon.json", "w")
+    fp_url = open("urls.json", "w")
+    fp_ti = open("titleinverted.json", "w")
+    json.dump(lex_dictionary, fp_lex)
+    json.dump(fi_dictionary, fp_fi)
+    json.dump(ii_dictionary, fp_ii)
+    json.dump(url_dictionary, fp_url)
+    json.dump(title_dictionary, fp_ti)
+    fp_url.close()
+    fp_ii.close()
+    fp_lex.close()
+    fp_ti.close()
+
+try:
+    fp_fi.close()
+except:
+    pass
 fp_a = open("author.json", "w")
 json.dump(author_dictionary,fp_a)
 fp_a.close()

From 3872ffe555e126278f4187be349d015f96dab1ee Mon Sep 17 00:00:00 2001
From: zaidbinusama <54680765+zaidbinusama@users.noreply.github.com>
Date: Wed, 17 May 2023 17:26:19 +0500
Subject: [PATCH 2/2] added client server architecture

---
 client_socket.py | 118 +++++++++++++++++++++++++++++++++++++++++++++++
 front.py         |  51 ++++++++++----------
 server_socket.py |  42 +++++++++++++++++
 3 files changed, 186 insertions(+), 25 deletions(-)
 create mode 100644 client_socket.py
 create mode 100644 server_socket.py

diff --git a/client_socket.py b/client_socket.py
new file mode 100644
index 0000000..185d846
--- /dev/null
+++ b/client_socket.py
@@ -0,0 +1,118 @@
+import socket
+import tkinter as tk
+from tkinter import font
+import pickle
+import webbrowser
+
+Response = []
+# Class to create vertical scrollable frame to show urls to the user.
+class VerticalScrolledFrame(tk.Frame):
+    """A pure Tkinter scrollable frame that actually works!
+
+    * Use the 'interior' attribute to place widgets inside the scrollable frame
+    * Construct and pack/place/grid normally
+    * This frame only allows vertical scrolling
+    """
+
+    def __init__(self, parent, *args, **kw):
+        tk.Frame.__init__(self, parent, *args, **kw)
+
+        # create a canvas object and a vertical scrollbar for scrolling it
+        vscrollbar = tk.Scrollbar(self, orient=tk.VERTICAL)
+        vscrollbar.pack(fill=tk.Y, side=tk.RIGHT, expand=tk.FALSE)
+        canvas = tk.Canvas(self, bd=0, highlightthickness=0,
+                           yscrollcommand=vscrollbar.set)
+        # canvas.place(x=2,y=200)
+        canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=tk.TRUE)
+        vscrollbar.config(command=canvas.yview)
+
+        # reset the view
+        canvas.xview_moveto(0)
+        canvas.yview_moveto(0)
+
+        # create a frame inside the canvas which will be scrolled with it
+        self.interior = interior = tk.Frame(canvas)
+        interior_id = canvas.create_window(0, 0, window=interior,
+                                           anchor=tk.NW)
+
+        # track changes to the canvas and frame width and sync them,
+        # also updating the scrollbar
+        def _configure_interior(event):
+            # update the scrollbars to match the size of the inner frame
+            size = (interior.winfo_reqwidth(), interior.winfo_reqheight())
+            canvas.config(scrollregion="0 0 %s %s" % size)
+            if interior.winfo_reqwidth() != canvas.winfo_width():
+                # update the canvas's width to fit the inner frame
+                canvas.config(width=interior.winfo_reqwidth())
+
+        interior.bind('<Configure>', _configure_interior)
+
+        def _configure_canvas(event):
+            if interior.winfo_reqwidth() != canvas.winfo_width():
+                # update the inner frame's width to fill the canvas
+                canvas.itemconfigure(interior_id, width=canvas.winfo_width())
+
+        canvas.bind('<Configure>', _configure_canvas)
+
+
+# Destroy the current result frame and create a new frame.
+def frame_clear():
+    scframe.destroy()
+    create()
+
+# Create a scrollable frame to show to the user.
+def create():
+    global scframe
+    scframe = VerticalScrolledFrame(root)
+    scframe.pack(side='bottom', pady=30)
+
+def openlink(x):
+    webbrowser.open_new(x)
+
+def getquery():
+    frame_clear()
+    e = user_query.get()
+    ClientSocket.send(str.encode(e))
+    Response = ClientSocket.recv(2048)
+    Response = pickle.loads(Response)
+    for i, x in enumerate(Response):
+
+        btn = tk.Button(scframe.interior, height=1, width=100, relief=tk.FLAT, bg="gray99", fg="blue", font="Dosis",
+                        text=f"{Response[i]}", command=lambda i=i, x=x: openlink(x))
+        btn.pack(padx=10, pady=5, side=tk.TOP)
+    Response.clear()
+    if e == 'end':
+        ClientSocket.close()
+
+
+print('** Ready to connect **')
+while True:
+    host = input('Enter server IP:\t')
+    port = 95
+
+    ClientSocket = socket.socket()
+
+    try:
+        ClientSocket.connect((host, port))
+        print('\t** Connected to server **')
+        break
+    except socket.error as e:
+        print("Incorrect IP or server is not running")
+
+root = tk.Tk()
+root.title("Search Engine")
+root.geometry('800x500')
+root['bg'] = '#FFFFFF'
+root.minsize(800, 500)
+root.maxsize(800, 500)
+# lis = []
+user_query = tk.StringVar()
+# logo_path = tk.PhotoImage(file="BG.ppm")
+# logo = Label(root, image=logo_path).pack()
+button_font = font.Font(family='Calibri', size=8)
+text_entry = tk.Entry(root, textvariable=user_query, width=55, bg='#C0C0C0').place(x=230, y=120)
+search_button = tk.Button(root, text="Search", font=button_font, padx=1, pady=1, command=getquery).place(x=375, y=150)
+scframe = VerticalScrolledFrame(root)
+scframe.pack(side='bottom', pady=30)
+
+root.mainloop()
diff --git a/front.py b/front.py
index f074c18..8fbb5e8 100644
--- a/front.py
+++ b/front.py
@@ -104,6 +104,11 @@ def clear():
     scframe.destroy()
     create()
 
+# Create a scrollable frame to show to the user.
+def create():
+    global scframe
+    scframe = VerticalScrolledFrame(root)
+    scframe.pack(side='bottom', pady=30)
 
 # Get query from the user. (The text written in search bar)
 def getquery():
@@ -117,11 +122,6 @@ def getquery():
         btn.pack(padx=10, pady=5, side=tk.TOP)
 
 
-# Create a scrollable frame to show to the user.
-def create():
-    global scframe
-    scframe = VerticalScrolledFrame(root)
-    scframe.pack(side='bottom', pady=30)
 
 
 # Make the urls that are received as strings and convert them to hyperlinks.
@@ -496,23 +496,24 @@ def updateall(filetoadd, key, docid, fp_filenames):
     print("updation successfull")
 
 
-print("Initializing GUI")
-
-root = tk.Tk()
-root.title("Search Engine")
-root.geometry('800x500')
-root['bg'] = '#FFFFFF'
-root.minsize(800, 500)
-root.maxsize(800, 500)
-lis = []
-user_query = tk.StringVar()
-# logo_path = tk.PhotoImage(file="BG.ppm")
-# logo = Label(root, image=logo_path).pack()
-button_font = font.Font(family='Calibri', size=8)
-text_entry = tk.Entry(root, textvariable=user_query, width=55, bg='#C0C0C0').place(x=230, y=120)
-search_button = tk.Button(root, text="Search", font=button_font, padx=1, pady=1, command=getquery).place(x=375, y=150)
-add_file_button = tk.Button(root, text="Add File", font=button_font, padx=4, pady=2, command=getFile).place(x=740, y=10)
-scframe = VerticalScrolledFrame(root)
-scframe.pack(side='bottom', pady=30)
-
-root.mainloop()
+if __name__ == '__main__':
+    print("Initializing GUI")
+
+    root = tk.Tk()
+    root.title("Search Engine")
+    root.geometry('800x500')
+    root['bg'] = '#FFFFFF'
+    root.minsize(800, 500)
+    root.maxsize(800, 500)
+    lis = []
+    user_query = tk.StringVar()
+    # logo_path = tk.PhotoImage(file="BG.ppm")
+    # logo = Label(root, image=logo_path).pack()
+    button_font = font.Font(family='Calibri', size=8)
+    text_entry = tk.Entry(root, textvariable=user_query, width=55, bg='#C0C0C0').place(x=230, y=120)
+    search_button = tk.Button(root, text="Search", font=button_font, padx=1, pady=1, command=getquery).place(x=375, y=150)
+    add_file_button = tk.Button(root, text="Add File", font=button_font, padx=4, pady=2, command=getFile).place(x=740, y=10)
+    scframe = VerticalScrolledFrame(root)
+    scframe.pack(side='bottom', pady=30)
+
+    root.mainloop()
diff --git a/server_socket.py b/server_socket.py
new file mode 100644
index 0000000..a165873
--- /dev/null
+++ b/server_socket.py
@@ -0,0 +1,42 @@
+import socket
+from _thread import *
+import front
+import pickle
+
+host = socket.gethostname()
+print(socket.gethostbyname(host))
+port = 95
+
+def client_handler(connection):
+
+    while True:
+        results = []
+        data = connection.recv(2048)
+        query = data.decode('utf-8')
+        front.searching(str(query),results)
+        results = pickle.dumps(results)
+        connection.send(results)
+        print(results)
+    connection.close()
+
+
+def accept_connections(ServerSocket):
+    Client, address = ServerSocket.accept()
+    print('Connected to socket ---> ' + address[0] + ':' + str(address[1]))
+    start_new_thread(client_handler, (Client, ))
+
+
+def start_server(host, port):
+    ServerSocket = socket.socket()
+    try:
+        ServerSocket.bind((socket.gethostbyname(host), port))
+    except socket.error as e:
+        print(str(e))
+    print(f'\n\t** Server is listing on the port {port} **')
+    ServerSocket.listen()
+
+    while True:
+        accept_connections(ServerSocket)
+
+
+start_server(host, port)