diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..c5d7e85 Binary files /dev/null and b/.DS_Store differ diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index b879910..c7e6962 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -38,7 +38,8 @@ // Add the IDs of extensions you want installed when the container is created. "extensions": [ "ms-python.python", - "ms-python.vscode-pylance" + "ms-python.vscode-pylance", + "stevensona.character-count" ] } }, diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index ad7f05d..cb7b74a 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -9,9 +9,9 @@ jobs: matrix: python-version: ["3.8", "3.9", "3.10"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install dependencies @@ -24,4 +24,4 @@ jobs: echo "PYTHONPATH=./" >> $GITHUB_ENV - name: Analysing the code with pylint run: | - pylint --disable=missing-docstring --indent-string=' ' $(git ls-files "*.py") + pylint --disable=missing-docstring --disable=no-member --indent-string=' ' $(git ls-files "*.py") diff --git a/.gitignore b/.gitignore index 2955a67..daead37 100644 --- a/.gitignore +++ b/.gitignore @@ -130,5 +130,14 @@ dmypy.json # fake data generated by the tests .fake + # upload directory data/ + +# JPEG +*.jpg +*.jpeg +*.jpe +*.jif +*.jfif +*.jfi diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..f7f2287 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,5 @@ +{ + "recommendations": [ + "stevensona.character-count" + ] +} \ No newline at end of file diff --git a/application/code_generator.py b/application/code_generator.py index 15c1b81..ff5f61f 100644 --- a/application/code_generator.py +++ b/application/code_generator.py @@ -1,95 +1,60 @@ -from model import code_blocks -class CodeGenerator: - def __init__(self, template_mapping, parse_template): - self.blocks = code_blocks.AllBlocks() - self.function_mapping = template_mapping - self.parse_template = parse_template - self.data = {} - +from application.code_parser import CodeParser +class CodeGenerator(CodeParser): def get_data(self): return self.data['dataframe'] def load_data(self, csv_file): - dataframe = self._parse_and_execute('read_csv', [csv_file]) - self._save('dataframe', dataframe) + dataframe = self.parse_and_execute('read_csv', [csv_file]) + self.save('dataframe', dataframe) return self.data['dataframe'].shape def describe_data(self): - output = self._parse_and_execute('describe_data', ['dataframe']) + output = self.parse_and_execute('describe_data', ['dataframe']) return output def get_labels(self): - keys = self._parse_and_execute('get_keys', ['dataframe']) + keys = self.parse_and_execute('get_keys', ['dataframe']) return keys.values.tolist() def select_y(self, output_label): - x_values, y_values = self._parse_and_execute('select_y', ['dataframe', output_label]) - self._save('x_values', x_values) - self._save('y_values', y_values) + x_values, y_values = self.parse_and_execute('select_y', ['dataframe', output_label]) + self.save('x_values', x_values) + self.save('y_values', y_values) def drop_x(self, input_labels): - dataframe = self._parse_and_execute('drop_x', ['dataframe', input_labels]) - self._save('dataframe', dataframe) + dataframe = self.parse_and_execute('drop_x', ['dataframe', input_labels]) + self.save('dataframe', dataframe) def clean_data(self): - dataframe = self._parse_and_execute('clean_data', ['dataframe']) - self._save('dataframe', dataframe) + dataframe = self.parse_and_execute('clean_data', ['dataframe']) + self.save('dataframe', dataframe) return self.data['dataframe'].shape def split_data(self, train_ratio = 0.8, seed = 200): # the ordering of x/y train/test is different here but I don't know why - (x_train, y_train, x_test, y_test)=self._parse_and_execute( + (x_train, y_train, x_test, y_test)=self.parse_and_execute( 'split',['x_values','y_values',1-train_ratio,seed] ) - self._save('x_train', x_train) - self._save('x_test', x_test) - self._save('y_train', y_train) - self._save('y_test', y_test) + self.save('x_train', x_train) + self.save('x_test', x_test) + self.save('y_train', y_train) + self.save('y_test', y_test) return self.data['x_train'].shape def train_lin_reg(self): - model = self._parse_and_execute('train_lin_reg', ['x_train', 'y_train']) - self._save('model', model) + model = self.parse_and_execute('train_lin_reg', ['x_train', 'y_train']) + self.save('model', model) return model def lin_reg_predict(self): - (train_preds, test_preds) = self._parse_and_execute( + (train_preds, test_preds) = self.parse_and_execute( 'lin_reg_predict', ['model', 'x_train', 'x_test'] ) - self._save('train_preds', train_preds) - self._save('test_preds', test_preds) + self.save('train_preds', train_preds) + self.save('test_preds', test_preds) def eval_lin_reg(self): - error_change = self._parse_and_execute( + error_change = self.parse_and_execute( 'eval_lin_reg', ['y_train', 'y_test', 'train_preds', 'test_preds'] ) - self._save('error_change', error_change) - - def download_code(self): - return self.blocks.to_text() - - def _create_new_block(self, comment, statements): - block = code_blocks.CodeBlock(comment, statements) - self.blocks.add_next_block(block) - - def _parse_and_execute(self, template, args): - replaced_args = [] - string_args = [] - for arg in args: - if isinstance(arg, str) and arg in self.data: - replaced_args.append(self.data[arg]) - string_args.append(arg) - else: - replaced_args.append(arg) - if isinstance(arg, str): - string_args.append('\"'+arg+'\"') - else: - string_args.append(str(arg)) - - (comments, code) = self.parse_template(template, string_args) - self._create_new_block(comments[0], code) - output = self.function_mapping[template](replaced_args) # where the code is executed - return output - - def _save(self, key, value): - self.data[key] = value + self.save('error_change', error_change) diff --git a/application/code_parser.py b/application/code_parser.py new file mode 100644 index 0000000..305172a --- /dev/null +++ b/application/code_parser.py @@ -0,0 +1,36 @@ +from model import code_blocks +class CodeParser: + def __init__(self, template_mapping, parse_template): + self.blocks = code_blocks.AllBlocks() + self.function_mapping = template_mapping + self.parse_template = parse_template + self.data = {} + + def download_code(self): + return self.blocks.to_text() + + def create_new_block(self, comment, statements): + block = code_blocks.CodeBlock(comment, statements) + self.blocks.add_next_block(block) + + def parse_and_execute(self, template, args): + replaced_args = [] + string_args = [] + for arg in args: + if isinstance(arg, str) and arg in self.data: + replaced_args.append(self.data[arg]) + string_args.append(arg) + else: + replaced_args.append(arg) + if isinstance(arg, str): + string_args.append('\"'+arg+'\"') + else: + string_args.append(str(arg)) + + (comments, code) = self.parse_template(template, string_args) + self.create_new_block(comments[0], code) + output = self.function_mapping[template](replaced_args) + return output + + def save(self, key, value): + self.data[key] = value diff --git a/application/image_generator.py b/application/image_generator.py new file mode 100644 index 0000000..0669604 --- /dev/null +++ b/application/image_generator.py @@ -0,0 +1,62 @@ +import random +import matplotlib.pyplot as plt +from application.code_parser import CodeParser +class ImageGenerator(CodeParser): + def load_images(self, images_directory): + #Dividing the data in different batches and showing labels + data = self.\ + parse_and_execute('load_images', [images_directory]) + print(data) + + def validate_images(self, images_directory): + self.\ + parse_and_execute('validate_images', [images_directory]) + + def preprocess_images(self, images_directory): #directory after validating the images + (data, normalized_data) = self.\ + parse_and_execute('preprocess_images', [images_directory]) + class_mapping = {} + print("Train classes:: ", data.class_names) + print("Number of training files:", len(data.file_paths)) + self.save('data', data) + self.save('normalized_data', normalized_data) + class_names = self.data['data'].class_names + plt.figure(figsize=(10, 10)) + for images, labels in self.data['data'].take(random.randint(1,len(data))): + for i in range(9): + plt.subplot(3, 3, i + 1) + # plt.imshow(cv2.cvtColor(images[i].numpy().astype('uint8'), cv2.COLOR_BGR2RGB)) + plt.imshow(images[i].numpy().astype("uint8")) + class_mapping[int(labels[i].numpy())]= class_names[labels[i]] + plt.title(class_names[labels[i]] + ' => ' + str(labels[i].numpy())) + plt.axis("off") + self.save('class_mapping', class_mapping) + plt.show() + return self.data['data'] + + def split_images(self, train_ratio, val_ratio, test_ratio): + (train, val, test) = self.\ + parse_and_execute('split_images', ['normalized_data', train_ratio, val_ratio, test_ratio]) + self.save('train', train) + self.save('val', val) + self.save('test', test) + return self.data['train'], self.data['val'], self.data['test'] + + def train_model(self, epochs): + (history, model) = self.\ + parse_and_execute('images_model', ['normalized_data', 'train', 'val', epochs]) + self.save('history', history) + self.save('model', model) + return self.data['history'] + + def model_metrics(self): + (precision, recall, accuracy) = self.parse_and_execute('metrics', ['test', 'model']) + return precision, recall, accuracy + + def visualize(self): + self.\ + parse_and_execute('visualize', ['history']) + + def test_image(self, location): + self.\ + parse_and_execute('test_image', [location, 'model', 'class_mapping']) diff --git a/data/.DS_Store b/data/.DS_Store new file mode 100644 index 0000000..59cdcee Binary files /dev/null and b/data/.DS_Store differ diff --git a/data/possum.csv b/data/possum.csv deleted file mode 100644 index 0f7cc0f..0000000 --- a/data/possum.csv +++ /dev/null @@ -1,105 +0,0 @@ -case,site,Pop,sex,age,hdlngth,skullw,totlngth,taill,footlgth,earconch,eye,chest,belly -1,1,Vic,m,8,94.1,60.4,89,36,74.5,54.5,15.2,28,36 -2,1,Vic,f,6,92.5,57.6,91.5,36.5,72.5,51.2,16,28.5,33 -3,1,Vic,f,6,94,60,95.5,39,75.4,51.9,15.5,30,34 -4,1,Vic,f,6,93.2,57.1,92,38,76.1,52.2,15.2,28,34 -5,1,Vic,f,2,91.5,56.3,85.5,36,71,53.2,15.1,28.5,33 -6,1,Vic,f,1,93.1,54.8,90.5,35.5,73.2,53.6,14.2,30,32 -7,1,Vic,m,2,95.3,58.2,89.5,36,71.5,52,14.2,30,34.5 -8,1,Vic,f,6,94.8,57.6,91,37,72.7,53.9,14.5,29,34 -9,1,Vic,f,9,93.4,56.3,91.5,37,72.4,52.9,15.5,28,33 -10,1,Vic,f,6,91.8,58,89.5,37.5,70.9,53.4,14.4,27.5,32 -11,1,Vic,f,9,93.3,57.2,89.5,39,77.2,51.3,14.9,31,34 -12,1,Vic,f,5,94.9,55.6,92,35.5,71.7,51,15.3,28,33 -13,1,Vic,m,5,95.1,59.9,89.5,36,71,49.8,15.8,27,32 -14,1,Vic,m,3,95.4,57.6,91.5,36,74.3,53.7,15.1,28,31.5 -15,1,Vic,m,5,92.9,57.6,85.5,34,69.7,51.8,15.7,28,35 -16,1,Vic,m,4,91.6,56,86,34.5,73,51.4,14.4,28,32 -17,1,Vic,f,1,94.7,67.7,89.5,36.5,73.2,53.2,14.7,29,31 -18,1,Vic,m,2,93.5,55.7,90,36,73.7,55.4,15.3,28,32 -19,1,Vic,f,5,94.4,55.4,90.5,35,73.4,53.9,15.2,28,32 -20,1,Vic,f,4,94.8,56.3,89,38,73.8,52.4,15.5,27,36 -21,1,Vic,f,3,95.9,58.1,96.5,39.5,77.9,52.9,14.2,30,40 -22,1,Vic,m,3,96.3,58.5,91,39.5,73.5,52.1,16.2,28,36 -23,1,Vic,f,4,92.5,56.1,89,36,72.8,53.3,15.4,28,35 -24,1,Vic,m,2,94.4,54.9,84,34,75,53.5,16.2,27,32 -25,1,Vic,m,3,95.8,58.5,91.5,35.5,72.3,51.6,14.9,31,35 -26,1,Vic,m,7,96,59,90,36,73.6,56.2,15,29,38 -27,1,Vic,f,2,90.5,54.5,85,35,70.3,50.8,14.2,23,28 -28,1,Vic,m,4,93.8,56.8,87,34.5,73.2,53,15.3,27,30 -29,1,Vic,f,3,92.8,56,88,35,74.9,51.8,14,24,32 -30,1,Vic,f,2,92.1,54.4,84,33.5,70.6,50.8,14.5,24.5,33 -31,1,Vic,m,3,92.8,54.1,93,37,68,52.5,14.5,27,31 -32,1,Vic,f,4,94.3,56.7,94,39,74.8,52,14.9,28,34 -33,1,Vic,m,3,91.4,54.6,89,37,70.8,51.8,14.8,24,30 -34,2,Vic,m,2,90.6,55.7,85.5,36.5,73.1,53.1,14.4,26,28.5 -35,2,Vic,m,4,94.4,57.9,85,35.5,71.2,55.5,16.4,28,35.5 -36,2,Vic,m,7,93.3,59.3,88,35,74.3,52,14.9,25.5,36 -37,2,Vic,f,2,89.3,54.8,82.5,35,71.2,52,13.6,28,31.5 -38,2,Vic,m,7,92.4,56,80.5,35.5,68.4,49.5,15.9,27,30 -39,2,Vic,f,1,84.7,51.5,75,34,68.7,53.4,13,25,25 -40,2,Vic,f,3,91,55,84.5,36,72.8,51.4,13.6,27,30 -41,2,Vic,f,5,88.4,57,83,36.5,NA,40.3,15.9,27,30.5 -42,2,Vic,m,3,85.3,54.1,77,32,62.7,51.2,13.8,25.5,33 -43,2,Vic,f,2,90,55.5,81,32,72,49.4,13.4,29,31 -44,2,Vic,m,NA,85.1,51.5,76,35.5,70.3,52.6,14.4,23,27 -45,2,Vic,m,3,90.7,55.9,81,34,71.5,54,14.6,27,31.5 -46,2,Vic,m,NA,91.4,54.4,84,35,72.8,51.2,14.4,24.5,35 -47,3,other,m,2,90.1,54.8,89,37.5,66,45.5,15,25,33 -48,3,other,m,5,98.6,63.2,85,34,66.9,44.9,17,28,35 -49,3,other,m,4,95.4,59.2,85,37,69,45,15.9,29.5,35.5 -50,3,other,f,5,91.6,56.4,88,38,65,47.2,14.9,28,36 -51,3,other,f,5,95.6,59.6,85,36,64,43.9,17.4,28,38.5 -52,3,other,m,6,97.6,61,93.5,40,67.9,44.3,15.8,28.5,32.5 -53,3,other,f,3,93.1,58.1,91,38,67.4,46,16.5,26,33.5 -54,4,other,m,7,96.9,63,91.5,43,71.3,46,17.5,30,36.5 -55,4,other,m,2,103.1,63.2,92.5,38,72.5,44.9,16.4,30.5,36 -56,4,other,m,3,99.9,61.5,93.7,38,68.7,46.8,16.4,27.5,31.5 -57,4,other,f,4,95.1,59.4,93,41,67.2,45.3,14.5,31,39 -58,4,other,m,3,94.5,64.2,91,39,66.5,46.4,14.4,30.5,33 -59,4,other,m,2,102.5,62.8,96,40,73.2,44.5,14.7,32,36 -60,4,other,f,2,91.3,57.7,88,39,63.1,47,14.4,26,30 -61,5,other,m,7,95.7,59,86,38,63.1,44.9,15,26.5,31 -62,5,other,f,3,91.3,58,90.5,39,65.5,41.3,16,27,32 -63,5,other,f,6,92,56.4,88.5,38,64.1,46.3,15.2,25.5,28.5 -64,5,other,f,3,96.9,56.5,89.5,38.5,63,45.1,17.1,25.5,33 -65,5,other,f,5,93.5,57.4,88.5,38,68.2,41.7,14,29,38.5 -66,5,other,f,3,90.4,55.8,86,36.5,63.2,44.2,15.7,26.5,34 -67,5,other,m,4,93.3,57.6,85,36.5,64.7,44.1,16.5,27.5,29.5 -68,5,other,m,5,94.1,56,88.5,38,65.9,43.1,17.4,27,30 -69,5,other,m,5,98,55.6,88,37.5,65,45.6,15,28.5,34 -70,5,other,f,7,91.9,56.4,87,38,65.4,44.1,13,27,34 -71,5,other,m,6,92.8,57.6,90,40,65.7,42.8,15,27.5,34 -72,5,other,m,1,85.9,52.4,80.5,35,62,42.4,14.1,25.5,30 -73,5,other,m,1,82.5,52.3,82,36.5,65.7,44.7,16,23.5,28 -74,6,other,f,4,88.7,52,83,38,61.5,45.9,14.7,26,34 -75,6,other,m,6,93.8,58.1,89,38,66.2,45.6,16.9,26,33.5 -76,6,other,m,5,92.4,56.8,89,41,64.5,46.4,17.8,26,33 -77,6,other,m,6,93.6,56.2,84,36,62.8,42.9,16.2,25,35 -78,6,other,m,1,86.5,51,81,36.5,63,44.3,13.2,23,28 -79,6,other,m,1,85.8,50,81,36.5,62.8,43,14.8,22,28.5 -80,6,other,m,1,86.7,52.6,84,38,62.3,44.8,15,23.5,30.5 -81,6,other,m,3,90.6,56,85.5,38,65.6,41.7,17,27.5,35 -82,6,other,f,4,86,54,82,36.5,60.7,42.9,15.4,26,32 -83,6,other,f,3,90,53.8,81.5,36,62,43.3,14,25,29 -84,6,other,m,3,88.4,54.6,80.5,36,62.6,43.6,16.3,25,28.5 -85,6,other,m,3,89.5,56.2,92,40.5,65.6,43.5,14.5,27,31.5 -86,6,other,f,3,88.2,53.2,86.5,38.5,60.3,43.7,13.6,26,31 -87,7,other,m,2,98.5,60.7,93,41.5,71.7,46.8,15,26,36 -88,7,other,f,2,89.6,58,87.5,38,66.7,43.5,16,25.5,31.5 -89,7,other,m,6,97.7,58.4,84.5,35,64.4,46.2,14.4,29,30.5 -90,7,other,m,3,92.6,54.6,85,38.5,69.8,44.8,14.5,25.5,32.5 -91,7,other,m,3,97.8,59.6,89,38,65.5,48,15,26,32 -92,7,other,m,2,90.7,56.3,85,37,67.6,46.8,14.5,25.5,31 -93,7,other,m,3,89.2,54,82,38,63.8,44.9,12.8,24,31 -94,7,other,m,7,91.8,57.6,84,35.5,64.2,45.1,14.4,29,35 -95,7,other,m,4,91.6,56.6,88.5,37.5,64.5,45.4,14.9,27,31 -96,7,other,m,4,94.8,55.7,83,38,66.5,47.7,14,25,33 -97,7,other,m,3,91,53.1,86,38,63.8,46,14.5,25,31.5 -98,7,other,m,5,93.2,68.6,84,35,65.6,44.3,14.5,28.5,32 -99,7,other,f,3,93.3,56.2,86.5,38.5,64.8,43.8,14,28,35 -100,7,other,m,1,89.5,56,81.5,36.5,66,46.8,14.8,23,27 -101,7,other,m,1,88.6,54.7,82.5,39,64.4,48,14,25,33 -102,7,other,f,6,92.4,55,89,38,63.5,45.4,13,25,30 -103,7,other,m,4,91.5,55.2,82.5,36.5,62.9,45.9,15.4,25,29 -104,7,other,f,3,93.6,59.9,89,40,67.6,46,14.8,28.5,33.5 diff --git a/pandas_code/code_templates/images_model.py b/pandas_code/code_templates/images_model.py new file mode 100644 index 0000000..c26c129 --- /dev/null +++ b/pandas_code/code_templates/images_model.py @@ -0,0 +1,30 @@ +import tensorflow as tf +#Deep learning images model +def get_code(args): + + model = tf.keras.models.Sequential([ + tf.keras.layers.Conv2D(16, 3, padding='same', input_shape=(256, 256, 3), activation='relu'), + tf.keras.layers.MaxPooling2D(), + tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu'), + tf.keras.layers.MaxPooling2D(), + tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu'), + tf.keras.layers.MaxPooling2D(), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(256, activation='relu'), + tf.keras.layers.Dense(1, activation='sigmoid') + ]) + early_stopping = tf.keras.callbacks.EarlyStopping\ + (monitor='val_loss', mode='min', verbose=1, patience=10) + model.compile('adam', + # SparseCategoricalCrossentropy(from_logits=True) + loss=tf.losses.BinaryCrossentropy(),metrics=['accuracy']) + model.summary() + + history = model.fit( + args[1], #train batch + validation_data = args[2], #validation batch + epochs = args[3], #change number of epochs as per your requirement + callbacks=[early_stopping] + ) + # model.save(os.path.join('models', 'simplemodel.h5')) + return history, model diff --git a/pandas_code/code_templates/load_images.py b/pandas_code/code_templates/load_images.py new file mode 100644 index 0000000..97d4bcf --- /dev/null +++ b/pandas_code/code_templates/load_images.py @@ -0,0 +1,9 @@ +import tensorflow as tf +# Identifying number of classes and labels +def get_code(args): + #For the configuration of using GPU + gpus = tf.config.experimental.list_physical_devices('GPU') + for gpu in gpus: + tf.config.experimental.set_memory_growth(gpu, True) + data = tf.keras.utils.image_dataset_from_directory(args[0], batch_size=22) + return data diff --git a/pandas_code/code_templates/metrics.py b/pandas_code/code_templates/metrics.py new file mode 100644 index 0000000..3fbb538 --- /dev/null +++ b/pandas_code/code_templates/metrics.py @@ -0,0 +1,17 @@ +import tensorflow as tf +#Finding the accuracy of the model +# pylint: disable=not-callable +def get_code(args): + precision = tf.keras.metrics.Precision() + recall = tf.keras.metrics.Recall() + accuracy = tf.keras.metrics.BinaryAccuracy() + # test = args[0].map(lambda x, y: (x/255, y)) + for batch in args[0].as_numpy_iterator(): + images_batch, labels_batch = batch + yhat = args[1].predict(images_batch) + precision.update_state(labels_batch, yhat) + recall.update_state(labels_batch, yhat) + accuracy.update_state(labels_batch, yhat) + print(f'Precision: {precision.result().numpy()}, \ + Recall: {recall.result().numpy()}, Accuracy: {accuracy.result().numpy()}') + return precision, recall, accuracy diff --git a/pandas_code/code_templates/plot_images.py b/pandas_code/code_templates/plot_images.py new file mode 100644 index 0000000..3ca80c3 --- /dev/null +++ b/pandas_code/code_templates/plot_images.py @@ -0,0 +1,16 @@ +# data_dir = '../data' +# print(os.listdir(os.path.join(data_dir))) +#Dividing the data in different batches and showing labels +# data = tf.keras.utils.image_dataset_from_directory(data_dir, batch_size=12) +# # data_iterator = data.as_numpy_iterator() # Iterating thru each batch of images +# # batch = data_iterator.next() # Getting images of each batch +# # Plotting batch of images +# class_names = data.class_names +# plt.figure(figsize=(10, 10)) +# for images, labels in data.take(1): +# for i in range(9): +# ax = plt.subplot(3, 3, i + 1) +# plt.imshow(images[i].numpy().astype("uint8")) +# plt.title(class_names[labels[i]] + ' => ' + str(labels[i].numpy())) +# plt.axis("off") +# plt.show() diff --git a/pandas_code/code_templates/preprocess_images.py b/pandas_code/code_templates/preprocess_images.py new file mode 100644 index 0000000..c6171bc --- /dev/null +++ b/pandas_code/code_templates/preprocess_images.py @@ -0,0 +1,14 @@ +import tensorflow as tf +#Preprocessing images +def get_code(args): + # Preprocess all images + data = tf.keras.utils.image_dataset_from_directory( + args[0], + # validation_split=0.2, + # subset='training', + image_size = (256, 256), + batch_size = 22, + seed = 123 + ) + normalized_data = data.map(lambda x, y: (x/255, y)) + return data, normalized_data diff --git a/pandas_code/code_templates/split_images.py b/pandas_code/code_templates/split_images.py new file mode 100644 index 0000000..29d1a0b --- /dev/null +++ b/pandas_code/code_templates/split_images.py @@ -0,0 +1,19 @@ +#splitting images +def get_code(args): + train_size = int(len(args[0]) * args[1]) + val_size = int(len(args[0]) * args[2]) + 1 + test_size = int(len(args[0])* args[3]) + 1 + train = args[0].take(train_size) + val = args[0].skip(train_size).take(val_size) + test = args[0].skip(train_size + val_size).take(test_size) + return train, val, test + +# def split_images(self, train_ration, val_ratio, test_ratio): +# # Splitting images +# train_size = int(len(data) * 0.7) +# val_size = int(len(data) * .2) + 1 +# test_size = int(len(data)* .1) + 1 + +# train = data.take((train_size)) +# val = data.skip(train_size).take(val_size) +# test = data.skip(train_size + val_size).take(test_size) diff --git a/pandas_code/code_templates/test_image.py b/pandas_code/code_templates/test_image.py new file mode 100644 index 0000000..87c7ddf --- /dev/null +++ b/pandas_code/code_templates/test_image.py @@ -0,0 +1,19 @@ +import cv2 +import numpy as np +import tensorflow as tf +# Testing the model on images which it havn't seen before +def get_code(args): + img = cv2.imread(args[0]) + # plt.imshow(img) + # plt.show() + resize = tf.image.resize(img, (256, 256)) + # plt.imshow(resize.numpy().astype('uint8')) + # plt.show() + # print(resize.shape) + # We're expanding dimensions of the images because + # our model expects batch of images + yhat = args[1].predict(np.expand_dims(resize/255, 0)) + if yhat > 0.5: + print(f"Predicted class is {args[2][0]}") + else: + print(f"Predicted class is {args[2][1]}") diff --git a/pandas_code/code_templates/validate_images.py b/pandas_code/code_templates/validate_images.py new file mode 100644 index 0000000..8afd08e --- /dev/null +++ b/pandas_code/code_templates/validate_images.py @@ -0,0 +1,23 @@ +import os +import imghdr +import cv2 +#Validating Images in the specified directory +def get_code(args): + image_ext = ['jpeg', 'jpg', 'bmp', 'png'] + for image_class in os.listdir(args[0]): + if image_class[0] != '.': + for image in os.listdir(os.path.join(args[0], image_class)): + if image[0] != '.': + image_path = os.path.join(args[0], image_class, image) + try: + cv2.imread(image_path) + tip = imghdr.what(image_path) + if tip not in image_ext: + print(f"Bad image with invalid ext {image_path}") + os.remove(image_path) + # Uncomment the following two line if you've problem validating the files + # img_bytes = tf.io.read_file(image_path) + # tf.io.decode_image(img_bytes) + except IOError: + print(f"Found bad path {image_path}") + os.remove(image_path) diff --git a/pandas_code/code_templates/visualize.py b/pandas_code/code_templates/visualize.py new file mode 100644 index 0000000..b8b0f9e --- /dev/null +++ b/pandas_code/code_templates/visualize.py @@ -0,0 +1,18 @@ +import pandas as pd +import matplotlib.pyplot as plt +#visualize accuracy +def get_code(args): +#Accuracy Graph + history_df = pd.DataFrame(args[0].history) + plt.figure(figsize=(12,4)) + plt.subplot(1,2,1) + plt.plot(history_df['loss'], label='training loss') + plt.plot(history_df['val_loss'], label='validation loss') + plt.title('Model Loss Function') + plt.legend() + plt.subplot(1,2,2) + plt.plot(history_df['accuracy'], label='training accuracy') + plt.plot(history_df['val_accuracy'], label='validation accuracy') + plt.title('Model Accuracy') + plt.legend() + plt.show() diff --git a/pandas_code/mapping.py b/pandas_code/mapping.py index 87a5f88..070e632 100644 --- a/pandas_code/mapping.py +++ b/pandas_code/mapping.py @@ -9,6 +9,14 @@ from pandas_code.code_templates import lin_reg_predict from pandas_code.code_templates import eval_lin_reg +from pandas_code.code_templates import load_images +from pandas_code.code_templates import validate_images +from pandas_code.code_templates import preprocess_images +from pandas_code.code_templates import split_images +from pandas_code.code_templates import images_model +from pandas_code.code_templates import visualize +from pandas_code.code_templates import metrics +from pandas_code.code_templates import test_image template_mapping = { 'read_csv': read_csv.get_code, 'describe_data': describe_data.get_code, @@ -19,5 +27,13 @@ 'drop_x': drop_x.get_code, 'train_lin_reg': train_lin_reg.get_code, 'lin_reg_predict': lin_reg_predict.get_code, + 'load_images':load_images.get_code, + 'validate_images':validate_images.get_code, + 'preprocess_images':preprocess_images.get_code, + 'split_images':split_images.get_code, + 'images_model':images_model.get_code, + 'visualize':visualize.get_code, + 'metrics':metrics.get_code, + 'test_image':test_image.get_code, 'eval_lin_reg': eval_lin_reg.get_code } diff --git a/requirements.txt b/requirements.txt index 16ed80d..6352719 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,8 @@ Authlib pylint scikit-learn numpy -google-api-python-client \ No newline at end of file +tensorflow +tensorflow-gpu +opencv-python +matplotlib +google-api-python-client diff --git a/tests/pandas_code/parse_template_test.py b/tests/pandas_code/parse_template_test.py index 8e85760..4c0235f 100644 --- a/tests/pandas_code/parse_template_test.py +++ b/tests/pandas_code/parse_template_test.py @@ -54,6 +54,7 @@ def test_parse_train(): template_name = 'train_lin_reg' args = ['x_values', 'y_values'] parse_template(template_name, args) + def test_parse_split(): template_name = 'split' args = ['X', 'Y', '0.8', '200'] diff --git a/visual.py b/visual.py new file mode 100644 index 0000000..f884a0b --- /dev/null +++ b/visual.py @@ -0,0 +1,34 @@ +import sys +from application import image_generator +from pandas_code.mapping import template_mapping +from pandas_code.parse_template import parse_template + +def run_generator(args): + input_file = args[1] + image_gen = image_generator.ImageGenerator(template_mapping, parse_template) + image_gen.load_images(input_file) + image_gen.validate_images(input_file) + data_generator = image_gen.preprocess_images(input_file) + print("Train classes:: ", data_generator.class_names) + print("Number of training files:", len(data_generator.file_paths)) + classes= list(data_generator.class_names) + print(f"There are {len(classes)} number of classes") + (train, val, test) = image_gen.split_images(0.7, 0.1, 0.1) + print(len(train)) + print(len(val)) + print(len(test)) + history = image_gen.train_model(epochs=20) + print(history) + image_gen.visualize() + image_gen.model_metrics() + #Create any image for testing purpose + image_gen.test_image('/Users/harisalam/ml_code_generator/test/happyFace.jpg') + print("\n\n\n\nPrinting Starts from here.\n") + print(image_gen.download_code()) + +#process the arguments +if __name__ == '__main__': + run_generator(sys.argv) +# code can be run using the following command +# python visual.py data/ +# data/ => is the directory where we have images of different classes