-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTextExtract.py
More file actions
44 lines (37 loc) · 1.57 KB
/
TextExtract.py
File metadata and controls
44 lines (37 loc) · 1.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import numpy as np
import time
from selenium import webdriver
from PIL import Image
import cv2
import pytesseract
def crop():
try:
firefox_browser = webdriver.Firefox(executable_path=r'C:\Program Files\gecko\geckodriver.exe')
time.sleep(2)
firefox_browser.get("http://results.drait.in/")
time.sleep(2)
login_form = firefox_browser.find_element_by_xpath("//*[contains(text(), 'Student')]").click()
time.sleep(4)
firefox_browser.get_screenshot_as_file(r"C:\Users\V Sangarya\Desktop\ResultsHomePage.png")
im = Image.open(r"C:\Users\V Sangarya\Desktop\ResultsHomePage.png")
im1 = im.crop((600, 235, 1050, 280))
im1.show()
im1.save(r"C:\Users\V Sangarya\Desktop\CaptchaCropped.png")
return "Success",
except:
return "Failed"
def textExtract():
cropCode=crop()
if cropCode == "Success":
img = cv2.imread(r"C:\Users\V Sangarya\Desktop\CaptchaCropped.png")
config = ('-l eng --oem 1 --psm 3')
pytesseract.pytesseract.tesseract_cmd = r'C:\Users\V Sangarya\AppData\Local\Programs\Tesseract-OCR\tesseract.exe'
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)
text = pytesseract.image_to_string(img, config=config)
return text
else:
print("Retry, image crop failed")