diff --git a/HarithaTony/internship/Blank Knowledge Database.xlsx b/HarithaTony/internship/Blank Knowledge Database.xlsx new file mode 100644 index 0000000..0b5d097 Binary files /dev/null and b/HarithaTony/internship/Blank Knowledge Database.xlsx differ diff --git a/HarithaTony/internship/Letter.pdf b/HarithaTony/internship/Letter.pdf new file mode 100644 index 0000000..6b5ccde Binary files /dev/null and b/HarithaTony/internship/Letter.pdf differ diff --git a/HarithaTony/internship/__pycache__/pypdf.cpython-310.pyc b/HarithaTony/internship/__pycache__/pypdf.cpython-310.pyc new file mode 100644 index 0000000..83e430d Binary files /dev/null and b/HarithaTony/internship/__pycache__/pypdf.cpython-310.pyc differ diff --git a/HarithaTony/internship/assitant for visually impaired.pdf b/HarithaTony/internship/assitant for visually impaired.pdf new file mode 100644 index 0000000..e8b7162 Binary files /dev/null and b/HarithaTony/internship/assitant for visually impaired.pdf differ diff --git a/HarithaTony/internship/docs/assitant for visually impaired b/HarithaTony/internship/docs/assitant for visually impaired new file mode 100644 index 0000000..f77b5e4 --- /dev/null +++ b/HarithaTony/internship/docs/assitant for visually impaired @@ -0,0 +1,99 @@ + +Virtual assistant for the visually impaired +Vinayak Iyer +Department of Information Technology +Sardar Patel Institute of Technology +Mumbai, India +vinayak.iyer@spit.ac.in +Sahil Sheth +Department of Information Technology +Sardar Patel Institute of Technology +Mumbai, India +sahil.sheth@spit.ac.in + Kshitij Shah +Department of Information Technology +Sardar Patel Institute of Technology +Mumbai, India +kshitij.shah@spit.ac.in +Kailas Devadkar +Department of Information Technology +Sardar Patel Institute of Technology +Mumbai, India +kailas_devadkar@spit.ac.in + +Abstract—Research shows that people with visual +impairments are 31% less likely to access the internet than +individuals without disabilities. This paper illustrates the +implementation of software that provides assistance to the +visually impaired for accessing the internet. The software shall +prove instrumental in the way the internet has accessed and will +increase the ease of use drastically. Although technology has +grown leaps and bounds, the internet - especially websites are +still inaccessible by the visually impaired. The software provides +a way to interact with these websites with much ease. With the +use of voice commands instead of the traditional keyboard and +mouse, our software provides a new dimension to access and +provide commands to any website. The software will read out the +content of the website and then using speech to text and text to +speech modules along with selenium, our software can automate +any website. The user is free from remembering complex braille +keyboard commands or the hassle of typing, he/she can simply +voice out his/her command and the software will execute it. The +system also has the functionality of providing a summary of the +content on the website and answering questions asked by the user +with reference to the summary using a BERT model trained on +the Stanford Question Answer Dataset. This software will +revolutionize the internet and pave the way for Web3.0. +Keywords — Visually impaired; Voice control; automate +website; blind people +I. INTRODUCTION +Today there are nearly 285 million people in the world that +are visually impaired [12]. Although technology has grown +leaps and bounds, the accessibility, especially that of the +internet for differently-abled people is still far-fetched. In this +modern world, more and more things can be performed online. +From shopping, ordering food, to booking train tickets +everything can be done online. For almost all of these online +facilities a person has to use a website. Using a website can be +a trivial task for most people but it is very difficult for visually +impaired people. The internet is a highly visual form of +communication, different "accessibility blockers" can hinder +different types of websites, unlike brick and mortar businesses where accessibility can be made by including a ramp for +wheelchairs or braille interfaces. For example, researchers +found that 80% of news sites “had significant accessibility +issues," while 70% of respondents said they were “unable to +access information and services through governmen t +websites.” Thus, wanted to come up with a unique way of +allowing visually impaired people to access the internet. +Although the W3C has a set of recommendations that stipulate +the rules to be followed when designing a website for the +visually impaired, not all websites necessarily stick to the high +standards in terms of accessibility. +The major challenge in developing a stable software is to +include as few keystrokes as possible and to provide an end to +end experience with the help of voice alone. The inclusion of +multiple languages and setting the right pace of the speech +when played back to the user are important factors to consider. +To support the widespread usage of the software, a crucial +parameter is the dependency of the software on the local +environment and operating systems. While the tech has +evolved greatly, the accessibility, especially the internet for +the differently abled is still stagnant. +Assistive technologies such as a screen reader or +magnifiers can enable visually impaired individuals to access +the internet. Unfortunately, these screen readers need to keep +the functionality of the website in mind otherwise it becomes +difficult to read data from the website. Some of the screen +readers work only with a particular kind of browser and some +require the user to remember complex commands thus screen +readers are not an effective solution to the problem at hand +and cannot be used to access the internet. + There are the following two common themes visible in +most websites: +1. Web pages are partially accessible. Some parts are +usable for the visually impaired, while others are not. +2. The accessibility of some web pages regressed due to +updates on the web site. Proceedings of the Fifth International Conference on Communication and Electronics Systems (ICCES 2020) +IEEE Conference Record # 48766; IEEE Xplore ISBN: 978-1-7281-5371-1 +978-1-7281-5371-1/20/$31.00 ©2020 IEEE 1057 +Authorized licensed use limited to: Western Sydney University. Downloaded on July 26,2020 at 05:06:57 UTC from IEEE Xplore. Restrictions apply. \ No newline at end of file diff --git a/HarithaTony/internship/docs/assitant for visually impaired.pdf b/HarithaTony/internship/docs/assitant for visually impaired.pdf new file mode 100644 index 0000000..e8b7162 Binary files /dev/null and b/HarithaTony/internship/docs/assitant for visually impaired.pdf differ diff --git a/HarithaTony/internship/interface.py b/HarithaTony/internship/interface.py new file mode 100644 index 0000000..7dd52cd --- /dev/null +++ b/HarithaTony/internship/interface.py @@ -0,0 +1,80 @@ +import streamlit as st +from pypdf import * +import os +from datetime import datetime +st.title("PDF Reader") +st.subheader("Upload your files to extract data!!!") + +def save_uploaded_file(uploaded_file, save_path): + with open(f"docs\\{uploaded_file.name}", "wb") as file: + file.write(uploaded_file.getbuffer()) + return f"docs\\{uploaded_file.name}" + +def entry_db(title,year, journal, author,types, summary): + import openpyxl + + # Load the existing workbook + wb = openpyxl.load_workbook('Blank Knowledge Database.xlsx') + + # Select the active sheet + sheet = wb.active + + # Determine the last row with data in column A + last_row = sheet.max_row + 1 + + # Data to be added + + # Add data to the last row + sheet.cell(row=last_row, column=1).value = last_row-2 + sheet.cell(row=last_row, column=2).value = title + sheet.cell(row=last_row, column=3).value = year + sheet.cell(row=last_row, column=4).value = journal + sheet.cell(row=last_row, column=5).value = author + sheet.cell(row=last_row, column=6).value = summary + sheet.cell(row=last_row, column=7).value = types + + # Save the workbook + wb.save('Blank Knowledge Database.xlsx') + +uploaded_file = st.file_uploader("Upload your file as .pdf",type=["pdf"]) + + + + + + +if uploaded_file is not None: + save_path = os.path.join("docs\\", uploaded_file.name) + saved_path = save_uploaded_file(uploaded_file, save_path) + st.success(f"File saved") + reader = PdfReader(save_path) + text = "" + meta = reader.metadata + number_of_pages = len(reader.pages) + page = reader.pages[0] + text = page.extract_text() + title = pdf_title() + year = pdf_date() + journal = pdf_journal() + author = pdf_author() + summary = pdf_summary() + types ="" + for i in author: + auth=i[:-1] + timestamp = datetime.fromisoformat(str(year)) + year= timestamp.strftime("%Y-%m-%d") + author=str(auth) + if title != "": + st.text_input("Title",title) + if year != "": + st.text_input("Year",year) + if journal!= "": + st.text_input("Journal",journal) + if author != "": + st.text_input("Auther",author) + if summary != "": + st.text_area("Summary",summary) + if st.button("submit"): + entry_db(title,year, journal, author,types, summary) + + diff --git a/HarithaTony/internship/pypdf.py b/HarithaTony/internship/pypdf.py new file mode 100644 index 0000000..09c5ff2 --- /dev/null +++ b/HarithaTony/internship/pypdf.py @@ -0,0 +1,61 @@ +from PyPDF2 import PdfReader +from transformers import pipeline +import dotenv +import torch + + +dotenv.load_dotenv() + +reader = PdfReader("assitant for visually impaired.pdf") +text = "" +meta = reader.metadata +number_of_pages = len(reader.pages) +page = reader.pages[0] +text = page.extract_text() +def pdf_title(): + return meta.title #title +def pdf_date(): + return meta.creation_date #date +def pdf_journal(): + return meta.subject +def pdf_author(): + try: + with open("sample.txt", "w", encoding='utf-8') as file: + file.write(text) + print("File created successfully.") + except Exception as e: + print("An error occurred:", str(e)) + + ner = pipeline("ner", grouped_entities=True) + t = ner(text) + auth = [] + for i in range(len(t)): + if t[i]['entity_group'] == 'PER': + if(len(t[i]['word'])<=1): + auth.append(t[i]['word'].replace(",","").replace("\'","").replace(" ","").replace(" ","") + ".") #removing unwanted characters and white-spaces + else: + auth.append(t[i]['word'] + ",") + auth_name = " ".join(auth) + return auth +# Initialize the summarization pipeline +def pdf_summary(): + summarizer = pipeline("summarization", model="facebook/bart-large-cnn") + + # Load text from a file + file_path = "sample.txt" + with open(file_path, "r", encoding="utf-8") as file: + text = file.read() + + # Split the text into smaller chunks + max_chunk_length = 512 # Maximum sequence length supported by the model + chunks = [text[i:i + max_chunk_length] for i in range(0, len(text), max_chunk_length)] + + # Summarize each chunk + summaries = [] + for chunk in chunks: + summary = summarizer(chunk, min_length=10, max_length=30)[0]['summary_text'] + summaries.append(summary) + + # Combine the summaries into a single summary + final_summary = ' '.join(summaries) + return final_summary \ No newline at end of file