From be74fb3570cf499cc02737f0f5846fcd04ad3825 Mon Sep 17 00:00:00 2001 From: zaidbinusama <54680765+zaidbinusama@users.noreply.github.com> Date: Wed, 17 May 2023 13:48:39 +0500 Subject: [PATCH 1/2] made fixes to author updation when new file added --- front.py | 41 ++++++------- main.py | 179 +++++++++++++++++++++++++++++-------------------------- 2 files changed, 112 insertions(+), 108 deletions(-) diff --git a/front.py b/front.py index 604bc4d..f074c18 100644 --- a/front.py +++ b/front.py @@ -1,12 +1,3 @@ -''' -DSA PROJECT MADE BY: -1. SAIF ALI (336561) -2. HASEEB MAHMOOD (334718) -3. TALHA MAJEED KHAN (336254) -4. ALI USMAN BUTT (335520) - -PRESENTED TO: SIR FAISAL SHAFAIT -''' # Importing external dependencies. import collections @@ -31,7 +22,6 @@ key = int(fp_temp.readline()) docid = int(fp_temp.readline()) fp_temp.close() - fp_a = open("author.json", "r") author_dictionary = json.load(fp_a) fp_a.close() @@ -288,7 +278,7 @@ def searching(search, lis): if len(mwq[docid][0]) != 0: proximity[docid] = mwq[docid][0] if len(common_title) > 0: - print("\n------------TITLE OCCURRENCES-----------------\n") + # print("\n------------TITLE OCCURRENCES-----------------\n") count = 0 for t in common_title: if t is None: @@ -300,7 +290,7 @@ def searching(search, lis): break if len(common_set) == 0: - print("No such combination of words exist in the database") + # print("No such combination of words exist in the database") for wordids in wordslist: # if there is no word from query in title or in close proximity or common in some document sth = singlewordwithid(wordids) counter = 0 @@ -314,14 +304,14 @@ def searching(search, lis): continue else: # printing words in close proximity first and the ones in common set later if len(proximity) != 0: - print("\n------------CLOSE PROXIMITY OCCURRENCES-----------------\n") + # print("\n------------CLOSE PROXIMITY OCCURRENCES-----------------\n") var = 0 for val in proximity.keys(): lis.append(url_dic[f"{val}"]) var +=1 if var == 30: break - print("\n------------OTHER OCCURRENCES---------------------------\n") + # print("\n------------OTHER OCCURRENCES---------------------------\n") var = 0 for value in common_set: if value not in proximity.keys(): @@ -330,7 +320,7 @@ def searching(search, lis): if var == 30: break else: - print("\n------------OTHER OCCURRENCES---------------------------\n") + # print("\n------------OTHER OCCURRENCES---------------------------\n") var = 0 for value in common_set: if value not in proximity.keys(): @@ -360,14 +350,14 @@ def searching(search, lis): counter = 0 if len(ts) > 0 or len(sws) > 0: if len(ts) > 0: - print("\n------------TITLE OCCURRENCES-----------------\n") + # print("\n------------TITLE OCCURRENCES-----------------\n") for t in ts: lis.append(url_dic[f"{t}"]) counter += 1 if counter == 30: break if len(sws) > 0: - print("\n------------OTHER OCCURRENCES-----------------\n") + # print("\n------------OTHER OCCURRENCES-----------------\n") counter = 0 for key in sws.keys(): lis.append(url_dic[f"{key}"]) @@ -434,7 +424,12 @@ def updateall(filetoadd, key, docid, fp_filenames): author_tokens = word_tokenize(y[i]["author"]) #updating authors author_tokens = [w.lower() for w in author_tokens] author = "".join(author_tokens) - author_dictionary[f'{author}'].append(docid) + if author not in author_dictionary: + author_dictionary[f"{author}"] = [docid] + elif author in author_dictionary: + author_docids = author_dictionary[f"{author}"] + author_docids.append(docid) + author_dictionary[f"{author}"] = author_docids title_tokens = word_tokenize(y[i]["title"]) word_tokens = [w.lower() for w in word_tokens] table = str.maketrans('', '', string.punctuation) @@ -511,12 +506,12 @@ def updateall(filetoadd, key, docid, fp_filenames): root.maxsize(800, 500) lis = [] user_query = tk.StringVar() -logo_path = tk.PhotoImage(file="BG.ppm") -logo = Label(root, image=logo_path).pack() -button_font = font.Font(family='Arial', size=8) +# logo_path = tk.PhotoImage(file="BG.ppm") +# logo = Label(root, image=logo_path).pack() +button_font = font.Font(family='Calibri', size=8) text_entry = tk.Entry(root, textvariable=user_query, width=55, bg='#C0C0C0').place(x=230, y=120) -search_button = tk.Button(root, text="search", font=button_font, padx=1, pady=1, command=getquery).place(x=375, y=150) -add_file_button = tk.Button(root, text="add file", font=button_font, padx=4, pady=2, command=getFile).place(x=740, y=10) +search_button = tk.Button(root, text="Search", font=button_font, padx=1, pady=1, command=getquery).place(x=375, y=150) +add_file_button = tk.Button(root, text="Add File", font=button_font, padx=4, pady=2, command=getFile).place(x=740, y=10) scframe = VerticalScrolledFrame(root) scframe.pack(side='bottom', pady=30) diff --git a/main.py b/main.py index db4cec3..702f365 100644 --- a/main.py +++ b/main.py @@ -74,10 +74,18 @@ def check_if_string_in_file(file_name, string_to_search): fi_dictionary = defaultdict(list) ii_dictionary = multi_dict(2, list) title_dictionary = defaultdict(list) -fp_temp = open("storage.txt", "r") -key = int(fp_temp.readline()) -docid = int(fp_temp.readline()) -fp_temp.close() +if os.path.isfile("storage.txt"): + fp_temp = open("storage.txt", "r") + try: + key = int(fp_temp.readline()) + docid = int(fp_temp.readline()) + except: + key = 0 + docid = 0 + fp_temp.close() +else: + key = 0 + docid = 0 # making of lexicon for fname in glob.glob("newsdata/*.json"): @@ -90,94 +98,95 @@ def check_if_string_in_file(file_name, string_to_search): fp_filenames.write("\n") y = json.load(fp) for i in range(len(y)): - # position = 0 - # word_tokens = word_tokenize(y[i]["content"]) - # url_dictionary[f"{docid}"] = y[i]["url"] + position = 0 + word_tokens = word_tokenize(y[i]["content"]) + url_dictionary[f"{docid}"] = y[i]["url"] author_tokens = word_tokenize(y[i]["author"]) author_tokens = [w.lower() for w in author_tokens] author = "".join(author_tokens) author_dictionary[f'{author}'].append(docid) - # title_tokens = word_tokenize(y[i]["title"]) - # - # word_tokens = [w.lower() for w in word_tokens] - # table = str.maketrans('', '', string.punctuation) - # strip = [w.translate(table) for w in word_tokens] - # - # title_tokens = [t.lower() for t in title_tokens] - # title_table = str.maketrans('', '', string.punctuation) - # title_strip = [t.translate(title_table) for t in title_tokens] - -# for w in strip: -# if w.isalpha() and w not in stop_words: -# x = snow_stemmer.stem(w) -# if x not in lex_dictionary: -# # making of lexicon -# lex_dictionary[x] = key -# key += 1 -# if x in lex_dictionary: # making of inverted index -# if str(lex_dictionary[x]) not in ii_dictionary: -# # wordid does not exist -# ii_dictionary.update({f"{lex_dictionary[x]}": {}}) -# if str(lex_dictionary[x]) in ii_dictionary: -# if str(docid) not in ii_dictionary[f"{lex_dictionary[x]}"]: -# # wordid exists but doc id doesnt -# ii_dictionary[f"{lex_dictionary[x]}"][f"{docid}"] = [position] -# elif str(docid) in ii_dictionary[f"{lex_dictionary[x]}"]: -# # wordid and docid exists -# temp_list = [ii_dictionary[f"{lex_dictionary[x]}"][f"{docid}"]] -# temp_list = list(flatten(temp_list)) -# temp_list.append(position) -# ii_dictionary[f"{lex_dictionary[x]}"][f"{docid}"] = temp_list -# if str(docid) not in fi_dictionary: -# # making of forward index -# fi_dictionary[f"{docid}"] = [lex_dictionary[x]] -# if str(docid) in fi_dictionary: -# if lex_dictionary[x] not in fi_dictionary[f"{docid}"]: -# fi_dictionary[f"{docid}"].append(lex_dictionary[x]) -# position += 1 -# for t in title_strip: -# if t.isalpha() and t not in stop_words: -# t = snow_stemmer.stem(t) -# if t not in lex_dictionary: -# lex_dictionary[t] = key -# key += 1 -# if t in lex_dictionary: -# if str(lex_dictionary[t]) not in title_dictionary: -# title_dictionary[f"{lex_dictionary[t]}"] = [docid] -# if str(lex_dictionary[t]) in title_dictionary: -# if docid not in title_dictionary[f"{lex_dictionary[t]}"]: -# title_dictionary[f"{lex_dictionary[t]}"].append(docid) + title_tokens = word_tokenize(y[i]["title"]) + + word_tokens = [w.lower() for w in word_tokens] + table = str.maketrans('', '', string.punctuation) + strip = [w.translate(table) for w in word_tokens] + + title_tokens = [t.lower() for t in title_tokens] + title_table = str.maketrans('', '', string.punctuation) + title_strip = [t.translate(title_table) for t in title_tokens] + for w in strip: + if w.isalpha() and w not in stop_words: + x = snow_stemmer.stem(w) + if x not in lex_dictionary: + # making of lexicon + lex_dictionary[x] = key + key += 1 + if x in lex_dictionary: # making of inverted index + if str(lex_dictionary[x]) not in ii_dictionary: + ii_dictionary.update({f"{lex_dictionary[x]}": {}}) + ii_dictionary.update({f"{lex_dictionary[x]}": {}}) + if str(lex_dictionary[x]) in ii_dictionary: + # wordid does not exist + if str(docid) not in ii_dictionary[f"{lex_dictionary[x]}"]: + # wordid exists but doc id doesnt + ii_dictionary[f"{lex_dictionary[x]}"][f"{docid}"] = [position] + elif str(docid) in ii_dictionary[f"{lex_dictionary[x]}"]: + # wordid and docid exists + temp_list = [ii_dictionary[f"{lex_dictionary[x]}"][f"{docid}"]] + temp_list = list(flatten(temp_list)) + temp_list.append(position) + ii_dictionary[f"{lex_dictionary[x]}"][f"{docid}"] = temp_list + if str(docid) not in fi_dictionary: + # making of forward index fi_dictionary[f"{docid}"] = [lex_dictionary[x]] if str(docid) in fi_dictionary: + if lex_dictionary[x] not in fi_dictionary[f"{docid}"]: + fi_dictionary[f"{docid}"].append(lex_dictionary[x]) + position += 1 + for t in title_strip: + if t.isalpha() and t not in stop_words: + t = snow_stemmer.stem(t) + if t not in lex_dictionary: + lex_dictionary[t] = key + key += 1 + if t in lex_dictionary: + if str(lex_dictionary[t]) not in title_dictionary: + title_dictionary[f"{lex_dictionary[t]}"] = [docid] + if str(lex_dictionary[t]) in title_dictionary: + if docid not in title_dictionary[f"{lex_dictionary[t]}"]: + title_dictionary[f"{lex_dictionary[t]}"].append(docid) docid = docid + 1 -# print(fp.name) -# print(docid) -# write = 1 -# else: -# continue -# -# fp_temp = open("storage.txt", "w") -# fp_temp.write(str(key)) -# fp_temp.write("\n") -# fp_temp.write(str(docid)) -# fp_temp.close() -# if write == 1: -# fp_fi = open("forwardindex.json", "w") -# fp_ii = open("invertedindex.json", "w") -# fp_lex = open("lexicon.json", "w") -# fp_url = open("urls.json", "w") -# fp_ti = open("titleinverted.json", "w") -# json.dump(lex_dictionary, fp_lex) -# json.dump(fi_dictionary, fp_fi) -# json.dump(ii_dictionary, fp_ii) -# json.dump(url_dictionary, fp_url) -# json.dump(title_dictionary, fp_ti) -# fp_url.close() -# fp_ii.close() -# fp_lex.close() -# fp_ti.close() -# -# fp_fi.close() + print(fp.name) + print(docid) + write = 1 + else: + continue + +fp_temp = open("storage.txt", "w") +fp_temp.write(str(key)) +fp_temp.write("\n") +fp_temp.write(str(docid)) +fp_temp.close() +if write == 1: + fp_fi = open("forwardindex.json", "w") + fp_ii = open("invertedindex.json", "w") + fp_lex = open("lexicon.json", "w") + fp_url = open("urls.json", "w") + fp_ti = open("titleinverted.json", "w") + json.dump(lex_dictionary, fp_lex) + json.dump(fi_dictionary, fp_fi) + json.dump(ii_dictionary, fp_ii) + json.dump(url_dictionary, fp_url) + json.dump(title_dictionary, fp_ti) + fp_url.close() + fp_ii.close() + fp_lex.close() + fp_ti.close() + +try: + fp_fi.close() +except: + pass fp_a = open("author.json", "w") json.dump(author_dictionary,fp_a) fp_a.close() From 3872ffe555e126278f4187be349d015f96dab1ee Mon Sep 17 00:00:00 2001 From: zaidbinusama <54680765+zaidbinusama@users.noreply.github.com> Date: Wed, 17 May 2023 17:26:19 +0500 Subject: [PATCH 2/2] added client server architecture --- client_socket.py | 118 +++++++++++++++++++++++++++++++++++++++++++++++ front.py | 51 ++++++++++---------- server_socket.py | 42 +++++++++++++++++ 3 files changed, 186 insertions(+), 25 deletions(-) create mode 100644 client_socket.py create mode 100644 server_socket.py diff --git a/client_socket.py b/client_socket.py new file mode 100644 index 0000000..185d846 --- /dev/null +++ b/client_socket.py @@ -0,0 +1,118 @@ +import socket +import tkinter as tk +from tkinter import font +import pickle +import webbrowser + +Response = [] +# Class to create vertical scrollable frame to show urls to the user. +class VerticalScrolledFrame(tk.Frame): + """A pure Tkinter scrollable frame that actually works! + + * Use the 'interior' attribute to place widgets inside the scrollable frame + * Construct and pack/place/grid normally + * This frame only allows vertical scrolling + """ + + def __init__(self, parent, *args, **kw): + tk.Frame.__init__(self, parent, *args, **kw) + + # create a canvas object and a vertical scrollbar for scrolling it + vscrollbar = tk.Scrollbar(self, orient=tk.VERTICAL) + vscrollbar.pack(fill=tk.Y, side=tk.RIGHT, expand=tk.FALSE) + canvas = tk.Canvas(self, bd=0, highlightthickness=0, + yscrollcommand=vscrollbar.set) + # canvas.place(x=2,y=200) + canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=tk.TRUE) + vscrollbar.config(command=canvas.yview) + + # reset the view + canvas.xview_moveto(0) + canvas.yview_moveto(0) + + # create a frame inside the canvas which will be scrolled with it + self.interior = interior = tk.Frame(canvas) + interior_id = canvas.create_window(0, 0, window=interior, + anchor=tk.NW) + + # track changes to the canvas and frame width and sync them, + # also updating the scrollbar + def _configure_interior(event): + # update the scrollbars to match the size of the inner frame + size = (interior.winfo_reqwidth(), interior.winfo_reqheight()) + canvas.config(scrollregion="0 0 %s %s" % size) + if interior.winfo_reqwidth() != canvas.winfo_width(): + # update the canvas's width to fit the inner frame + canvas.config(width=interior.winfo_reqwidth()) + + interior.bind('', _configure_interior) + + def _configure_canvas(event): + if interior.winfo_reqwidth() != canvas.winfo_width(): + # update the inner frame's width to fill the canvas + canvas.itemconfigure(interior_id, width=canvas.winfo_width()) + + canvas.bind('', _configure_canvas) + + +# Destroy the current result frame and create a new frame. +def frame_clear(): + scframe.destroy() + create() + +# Create a scrollable frame to show to the user. +def create(): + global scframe + scframe = VerticalScrolledFrame(root) + scframe.pack(side='bottom', pady=30) + +def openlink(x): + webbrowser.open_new(x) + +def getquery(): + frame_clear() + e = user_query.get() + ClientSocket.send(str.encode(e)) + Response = ClientSocket.recv(2048) + Response = pickle.loads(Response) + for i, x in enumerate(Response): + + btn = tk.Button(scframe.interior, height=1, width=100, relief=tk.FLAT, bg="gray99", fg="blue", font="Dosis", + text=f"{Response[i]}", command=lambda i=i, x=x: openlink(x)) + btn.pack(padx=10, pady=5, side=tk.TOP) + Response.clear() + if e == 'end': + ClientSocket.close() + + +print('** Ready to connect **') +while True: + host = input('Enter server IP:\t') + port = 95 + + ClientSocket = socket.socket() + + try: + ClientSocket.connect((host, port)) + print('\t** Connected to server **') + break + except socket.error as e: + print("Incorrect IP or server is not running") + +root = tk.Tk() +root.title("Search Engine") +root.geometry('800x500') +root['bg'] = '#FFFFFF' +root.minsize(800, 500) +root.maxsize(800, 500) +# lis = [] +user_query = tk.StringVar() +# logo_path = tk.PhotoImage(file="BG.ppm") +# logo = Label(root, image=logo_path).pack() +button_font = font.Font(family='Calibri', size=8) +text_entry = tk.Entry(root, textvariable=user_query, width=55, bg='#C0C0C0').place(x=230, y=120) +search_button = tk.Button(root, text="Search", font=button_font, padx=1, pady=1, command=getquery).place(x=375, y=150) +scframe = VerticalScrolledFrame(root) +scframe.pack(side='bottom', pady=30) + +root.mainloop() diff --git a/front.py b/front.py index f074c18..8fbb5e8 100644 --- a/front.py +++ b/front.py @@ -104,6 +104,11 @@ def clear(): scframe.destroy() create() +# Create a scrollable frame to show to the user. +def create(): + global scframe + scframe = VerticalScrolledFrame(root) + scframe.pack(side='bottom', pady=30) # Get query from the user. (The text written in search bar) def getquery(): @@ -117,11 +122,6 @@ def getquery(): btn.pack(padx=10, pady=5, side=tk.TOP) -# Create a scrollable frame to show to the user. -def create(): - global scframe - scframe = VerticalScrolledFrame(root) - scframe.pack(side='bottom', pady=30) # Make the urls that are received as strings and convert them to hyperlinks. @@ -496,23 +496,24 @@ def updateall(filetoadd, key, docid, fp_filenames): print("updation successfull") -print("Initializing GUI") - -root = tk.Tk() -root.title("Search Engine") -root.geometry('800x500') -root['bg'] = '#FFFFFF' -root.minsize(800, 500) -root.maxsize(800, 500) -lis = [] -user_query = tk.StringVar() -# logo_path = tk.PhotoImage(file="BG.ppm") -# logo = Label(root, image=logo_path).pack() -button_font = font.Font(family='Calibri', size=8) -text_entry = tk.Entry(root, textvariable=user_query, width=55, bg='#C0C0C0').place(x=230, y=120) -search_button = tk.Button(root, text="Search", font=button_font, padx=1, pady=1, command=getquery).place(x=375, y=150) -add_file_button = tk.Button(root, text="Add File", font=button_font, padx=4, pady=2, command=getFile).place(x=740, y=10) -scframe = VerticalScrolledFrame(root) -scframe.pack(side='bottom', pady=30) - -root.mainloop() +if __name__ == '__main__': + print("Initializing GUI") + + root = tk.Tk() + root.title("Search Engine") + root.geometry('800x500') + root['bg'] = '#FFFFFF' + root.minsize(800, 500) + root.maxsize(800, 500) + lis = [] + user_query = tk.StringVar() + # logo_path = tk.PhotoImage(file="BG.ppm") + # logo = Label(root, image=logo_path).pack() + button_font = font.Font(family='Calibri', size=8) + text_entry = tk.Entry(root, textvariable=user_query, width=55, bg='#C0C0C0').place(x=230, y=120) + search_button = tk.Button(root, text="Search", font=button_font, padx=1, pady=1, command=getquery).place(x=375, y=150) + add_file_button = tk.Button(root, text="Add File", font=button_font, padx=4, pady=2, command=getFile).place(x=740, y=10) + scframe = VerticalScrolledFrame(root) + scframe.pack(side='bottom', pady=30) + + root.mainloop() diff --git a/server_socket.py b/server_socket.py new file mode 100644 index 0000000..a165873 --- /dev/null +++ b/server_socket.py @@ -0,0 +1,42 @@ +import socket +from _thread import * +import front +import pickle + +host = socket.gethostname() +print(socket.gethostbyname(host)) +port = 95 + +def client_handler(connection): + + while True: + results = [] + data = connection.recv(2048) + query = data.decode('utf-8') + front.searching(str(query),results) + results = pickle.dumps(results) + connection.send(results) + print(results) + connection.close() + + +def accept_connections(ServerSocket): + Client, address = ServerSocket.accept() + print('Connected to socket ---> ' + address[0] + ':' + str(address[1])) + start_new_thread(client_handler, (Client, )) + + +def start_server(host, port): + ServerSocket = socket.socket() + try: + ServerSocket.bind((socket.gethostbyname(host), port)) + except socket.error as e: + print(str(e)) + print(f'\n\t** Server is listing on the port {port} **') + ServerSocket.listen() + + while True: + accept_connections(ServerSocket) + + +start_server(host, port)