-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest2.py
More file actions
91 lines (67 loc) · 4.11 KB
/
test2.py
File metadata and controls
91 lines (67 loc) · 4.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# -*- coding: utf-8 -*-
"""
Created on Fri May 17 11:56:31 2019
@author: ashrith
"""
import io
import os
import re
# Imports the Google Cloud client library
from google.cloud import vision
from google.cloud.vision import types
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="VisionApiTest-b5708e0fda63.json"
# Instantiates a client
client = vision.ImageAnnotatorClient()
# The name of the image file to annotate
file_name = os.path.join(
os.path.dirname(__file__),
'deg/degree/rr.jpg')
def parameterPrint(str):
match_date= ""
match_degree = ""
reg_date = "((((Jan)|(Feb)|(Mar)|(Apr)|(May)|(Jun)|(Jul)|(Aug)|(Sep)|(Oct)|(Nov)|(Dec))|((January)|(February)|(March)|(April)|(May)|(June)|(July)|(August)|(September)|(October)|(November)|(December)))(\s((1(st){0,1})|(2(nd){0,1})|(3(rd){0,1})|([0-9]{1,2}(th){0,1}))){0,1}\s?,{0,1}\s\d\d\d\d)|([0-9]{1,2}\s?[-.|\/]{1}\s?[0-9]{1,2}\s?[-.|\/]{1}\s?[0-9]{4})|(((1(st){0,1})|(2(nd){0,1})|(3(rd){0,1})|([0-9]{1,2}(th){0,1}))\s(((Jan)|(Feb)|(Mar)|(Apr)|(May)|(Jun)|(Jul)|(Aug)|(Sep)|(Oct)|(Nov)|(Dec))|((January)|(February)|(March)|(April)|(May)|(June)|(July)|(August)|(September)|(October)|(November)|(December)))\s[0-9]{4})|(([0-9]{4}-)[0-9]{4})"
reg_degree = "(Bachelor of Science)|(Bachelor of Engineering)|((Bachelor of Commerce)|(B\s?.\s?Com))|((Bachelor of Computer Application)|(BCA))|((Bachelor of Hotel Management)|(BHM))|((Bachelor of Business ((Management)|(Administration)))|(BBA))|((Bachelor of Law)|(LLB))|((Bachelor of Fashion Technology)|(BFT))|((Bachelor of Commerce)|(B.Com))|((Bachelor of Computer Application)|(BCA))|((Bachelor of Hotel Management)|(BHM))|((Bachelor of Business Management)|(BBA))|((Bachelor of Law)|(LLB))|((Bachelor of Technology)|(B\s?.(\s){0,1}Tech))|((Bachelor of Medicine)|(MBBS))|(Bachelor of Arts)|((Master(s)? (degree|program)? (of|in) Science)|(MS))|((Master(s)? (degree|program)? (of|in) Engineering))|((Master(s)? (degree|program)? (of|in) Commerce)|(M.Com))|((Master(s)? (degree|program)? (of|in) Business ((Management)|(Administration)))|(MBA))|((Master(s)? (degree|program)? (of|in) Fine Arts)|(MFA))|((Master(s)? (degree|program)? (of|in) Computer Application)|(MCA))|((Master(s)? (degree|program)? in Management)|(MIM))|((Master(s)? (degree|program)? (of|in) Technology)|(M.\s{0,1}Tech))|((Master(s)? (degree|program)? (of|in) Arts))"
match_date_obj = re.search(reg_date,str)
if match_date_obj == None:
print("date not found")
else:
match_date = match_date_obj.group()
match_degree_obj = re.search(reg_degree,str)
if match_degree_obj == None:
print("degree not found")
else:
match_degree = match_degree_obj.group()
return [match_date,match_degree]
# Loads the image into memory
with io.open(file_name, 'rb') as image_file:
content = image_file.read()
image = types.Image(content=content)
response = client.document_text_detection(image=image)
str = ""
for page in response.full_text_annotation.pages:
for block in page.blocks:
#print('\nBlock confidence: {}\n'.format(block.confidence))
for paragraph in block.paragraphs:
for word in paragraph.words:
word_text = ''.join([
symbol.text for symbol in word.symbols
])
str =str + word_text+ " "
print(str)
stri = str.lower()
flag = (stri.find("degree") >= 0 or stri.find("master") >= 0 or stri.find("masters") >= 0 or stri.find("bachelors") >= 0 or stri.find("bachelor") >= 0 or stri.find("diploma") >= 0 or stri.find("doctor") >= 0)
response = client.label_detection(image=image)
labels = response.label_annotations
stri = ""
#print('Labels:')
for label in labels:
stri += " " + label.description
stri = stri.lower()
flag2 = stri.find("academic certificate") >= 0 or stri.find("diploma") >= 0 or stri.find("degree")>=0
if flag and flag2 :
print("It is a Degree Certificate.")
lis = parameterPrint(str)
print("DATE : ",lis[0],"\n","DEGREE : ",lis[1])
else :
print("Not a Degree Certificate.")
exit()