-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPreProcessing.py
More file actions
91 lines (75 loc) · 3.19 KB
/
PreProcessing.py
File metadata and controls
91 lines (75 loc) · 3.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from PIL import Image
from scipy import misc, ndimage
from skimage import data, io, filters, exposure, img_as_uint
from skimage.exposure import equalize_hist
from skimage.color import rgb2gray
from skimage.filters import threshold_mean, median, rank
from skimage.morphology import disk
import matplotlib.pyplot as plt
import numpy as np
import threading, os, glob
SIZE = 28
#Create array of charaters in folder
destinationPath = "/Users/daniellages/Documents/Computer Science/Year 3/OCR/OCR-Project/processed/"
def noiseReduction(image):
image = filters.median(image, np.ones((3 , 3)))
noise = np.random.random(image.shape)
image[noise > 0.99] = 255
image[noise < 0.01] = 0
image = median(image, disk(1))
return image
def thresholding(image):
thresh = threshold_mean(image)
image = image > thresh
return image
def modify(path):
#function for running functions
characterArray = []
directoryList = os.listdir(path)
i = 0
for file in directoryList: #loop through folder destination
if file.endswith(".jpeg") or file.endswith(".jpg"):
img = os.path.join(charactersPath, file)
i = i + 1
image = io.imread(img)
image = rgb2gray(image)
image = noiseReduction(image)
image = thresholding(image)
characterArray.append(image) #can append to array to be used in other functions :D
io.imshow(image)
io.show()
return characterArray
def saveImage(characterArray, path):
for i, image in enumerate(characterArray):
io.imsave(path + "file" + str(i) + ".png", img_as_uint(image))
def prepareForMNIST():
arrayOfOutputs = []
directoryList = os.listdir(destinationPath)
for file in directoryList:
if file.endswith(".png"):
img = os.path.join(destinationPath, file)
data = Image.open(img).convert('L')
width = float(data.size[0])
height = float(data.size[1])
newMnistInput = Image.new('L', (SIZE, SIZE), 255)
if height > width:
imageWidth = int(round((20.0/height*width),0))
mnistImage = data.resize((imageWidth,20), Image.LANCZOS)
positionV = int(round(((SIZE - imageWidth)/2),0))
newMnistInput.paste(mnistImage, (positionV, 4))
elif width > height:
imageHeight = int(round((20.0/width*height),0))
mnistImage = data.resize((imageHeight,20), Image.LANCZOS)
positionH = int(round(((SIZE - imageHeight)/2),0))
newMnistInput.paste(mnistImage, (positionH, 4))
arrayOfOutputs.append(newMnistInput)
return arrayOfOutputs
if __name__ == "__main__":
charactersPath = "/Users/daniellages/Documents/Computer Science/Year 3/OCR/OCR-Project/inputImages/" #Destination of images to process
finalPath = "/Users/daniellages/Documents/Computer Science/Year 3/OCR/OCR-Project/imagesForOCR/"
arrayOfCharacters = []
arrayOfMNIST = []
arrayOfCharacters = modify(charactersPath)
saveImage(arrayOfCharacters, destinationPath) #Save Image for preperation
arrayOfMNIST = prepareForMNIST()
saveImage(arrayOfMNIST, finalPath)