diff --git a/application/code_generator.py b/application/code_generator.py index 7eab41f..fff4aeb 100644 --- a/application/code_generator.py +++ b/application/code_generator.py @@ -1,72 +1,78 @@ from model import code_blocks class CodeGenerator: - def __init__(self, template_mapping, parse_template): - self.blocks = code_blocks.AllBlocks() - self.function_mapping = template_mapping - self.parse_template = parse_template - self.data = {} + def __init__(self, template_mapping, parse_template): + self.blocks = code_blocks.AllBlocks() + self.function_mapping = template_mapping + self.parse_template = parse_template + self.data = {} - def get_data(self): - return self.data['dataframe'] + #call to resetAll() function in clas AllBblocks of code_blocks.py + def resetone(self): + self.blocks.reset_all() + self.data = {} - def load_data(self, csv_file): - dataframe = self._parse_and_execute('read_csv', [csv_file]) - self._save('dataframe', dataframe) - self._save('X', dataframe) - return self.data['dataframe'].shape + def get_data(self): + return self.data['dataframe'] - def describe_data(self): - output = self._parse_and_execute('describe_data', ['dataframe']) - return output + def load_data(self, csv_file): + dataframe = self._parse_and_execute('read_csv', [csv_file]) + self._save('dataframe', dataframe) + self._save('X', dataframe) + return self.data['dataframe'].shape - def clean_data(self): - self._parse_and_execute('clean_data', ['dataframe']) - return self.data['dataframe'].shape + def describe_data(self): + output = self._parse_and_execute('describe_data', ['dataframe']) + return output - def get_labels(self): - keys = self._parse_and_execute('get_keys', ['X']) - return keys.values.tolist() + def clean_data(self): + self._parse_and_execute('clean_data', ['dataframe']) + return self.data['dataframe'].shape - def drop_x(self, input_labels): - x_values = self._parse_and_execute('drop_x', ['X', input_labels]) - self._save('X', x_values) + def get_labels(self): + keys = self._parse_and_execute('get_keys', ['X']) + return keys.values.tolist() - def select_y(self, output_label): - x_values, y_values = self._parse_and_execute('select_y', ['X', output_label]) - self._save('X', x_values) - self._save('Y', y_values) + def drop_x(self, input_labels): + x_values = self._parse_and_execute('drop_x', ['X', input_labels]) + self._save('X', x_values) - def split_data(self, train_ratio = 0.8, seed = 200): - (train, test) = self._parse_and_execute('split',['X',train_ratio,seed]) - self.data['train'] = train - self.data['test'] = test - return self.data['train'].shape + def select_y(self, output_label): + x_values, y_values = self._parse_and_execute( + 'select_y', ['X', output_label]) + self._save('X', x_values) + self._save('Y', y_values) - def download_code(self): - return self.blocks.to_text() + def split_data(self, train_ratio=1, seed=200): + (train, test) = self._parse_and_execute( + 'split', ['X', train_ratio, seed]) + self.data['train'] = train + self.data['test'] = test + return self.data['train'].shape - def _create_new_block(self, comment, statements): - block = code_blocks.CodeBlock(comment, statements) - self.blocks.add_next_block(block) + def download_code(self): + return self.blocks.to_text() - def _parse_and_execute(self, template, args): - replaced_args = [] - string_args = [] - for arg in args: - if isinstance(arg, str) and arg in self.data: - replaced_args.append(self.data[arg]) - string_args.append(arg) - else: - replaced_args.append(arg) - if isinstance(arg, str): - string_args.append('\"'+arg+'\"') - else: - string_args.append(str(arg)) + def _create_new_block(self, comment, statements): + block = code_blocks.CodeBlock(comment, statements) + self.blocks.add_next_block(block) - (comments, code) = self.parse_template(template, string_args) - self._create_new_block(comments[0], code) - output = self.function_mapping[template](replaced_args) - return output + def _parse_and_execute(self, template, args): + replaced_args = [] + string_args = [] + for arg in args: + if isinstance(arg, str) and arg in self.data: + replaced_args.append(self.data[arg]) + string_args.append(arg) + else: + replaced_args.append(arg) + if isinstance(arg, str): + string_args.append('\"'+arg+'\"') + else: + string_args.append(str(arg)) + (comments, code) = self.parse_template(template, string_args) + self._create_new_block(comments[0], code) + output = self.function_mapping[template](replaced_args) + return output - def _save(self, key, value): - self.data[key] = value + def _save(self, key, value): + self.data[key] = value diff --git a/flask_app/flask_main.py b/flask_app/flask_main.py index 0ab9c87..17a7917 100644 --- a/flask_app/flask_main.py +++ b/flask_app/flask_main.py @@ -1,16 +1,13 @@ import os - from flask import g from flask import Flask from flask import render_template from flask import request, redirect, flash from werkzeug.utils import secure_filename - from application import code_generator from pandas_code.mapping import template_mapping from pandas_code.parse_template import parse_template - ALLOWED_EXTENSIONS = {'csv'} app = Flask(__name__, template_folder='templates') @@ -20,85 +17,91 @@ @app.route('/') def welcome(): - return render_template('home.html') + return render_template('home.html') def allowed_file(filename): - return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS + return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS @app.route('/download', methods=['GET']) def download_code(): - code = generator.download_code() - return render_template('info/code.html', text=code) + code = generator.download_code() + return render_template('info/code.html', text=code) @app.route('/describe', methods=['GET']) def describe_data(): - description = generator.describe_data() - return render_template('info/description.html',table=description.to_html()) + description = generator.describe_data() + return render_template('info/description.html',table=description.to_html()) + +#added start over option in base.html to clear the previous block of code +@app.route('/start_over', methods=['GET']) +def start_over(): + generator.resetone() + return render_template('home.html') @app.route('/clean', methods=['GET']) def clean_data(): - original_data_size = generator.get_data().shape - cleaned_data_size = generator.clean_data() - num_rows_removed = original_data_size[0]-cleaned_data_size[0] - return render_template('info/cleaning_summary.html', removed_rows=num_rows_removed) + original_data_size = generator.get_data().shape + cleaned_data_size = generator.clean_data() + num_rows_removed = original_data_size[0]-cleaned_data_size[0] + return render_template('info/cleaning_summary.html', removed_rows=num_rows_removed) @app.route('/split', methods=['GET']) def split_data(): - train_data_size = generator.split_data() - return render_template('info/splitting_summary.html', num_rows_train=train_data_size[0]) + train_data_size = generator.split_data() + return render_template('info/splitting_summary.html', num_rows_train=train_data_size[0]) @app.route('/input_labels', methods=['GET', 'POST']) def get_input_labels(): - if request.method == 'POST': - request_dict = request.form.to_dict(flat=False) - generator.drop_x(request_dict['drop_labels']) - return render_template('actions/actions.html') + if request.method == 'POST': + request_dict = request.form.to_dict() + generator.drop_x(request_dict['drop_labels']) + return render_template('actions/actions.html') - keys = generator.get_labels() - return render_template('actions/select_input_values.html', labels=keys) + keys = generator.get_labels() + return render_template('actions/select_input_values.html', labels=keys) @app.route('/labels', methods=['GET', 'POST']) def get_data_labels(): - if request.method == 'POST': - request_dict = request.form.to_dict() - generator.select_y(request_dict['label']) - return redirect('/input_labels') - - keys = generator.get_labels() - return render_template('actions/select_output_value.html', labels=keys) + if request.method == 'POST': + request_dict = request.form.to_dict() + generator.select_y(request_dict['label']) + return redirect('/input_labels') + keys = generator.get_labels() + return render_template('actions/select_output_value.html', labels=keys) # return render_template('labels.html', labels=keys) @app.route('/data', methods=['GET', 'POST']) def upload_file(): - if request.method == 'POST': - # check if the post request has the file part - if 'file' not in request.files: - flash('No file part') - return redirect(request.url) - - file = request.files['file'] - # If the user does not select a file, the browser submits an - # empty file without a filename. - if file.filename == '': - flash('No selected file') - return redirect(request.url) - if file and allowed_file(file.filename): - filename = secure_filename(file.filename) - file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) - # return redirect(url_for('download_file', name=filename)) - print(g) - with app.app_context(): - generator.load_data(app.config['UPLOAD_FOLDER']+'/'+filename) - return render_template('actions/actions.html') - - return render_template('actions/upload_data.html') + if request.method == 'POST': + # check if the post request has the file part + if 'file' not in request.files: + flash('No file part') + return redirect(request.url) + file = request.files['file'] + # If the user does not select a file, the browser submits an + # empty file without a filename. + if file.filename == '': + flash('No selected file') + return redirect(request.url) + if file and allowed_file(file.filename): + filename = secure_filename(file.filename) + file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) + # return redirect(url_for('download_file', name=filename)) + print(g) + #call to resetone() in class CodeGenerator in code_generator.py + generator.resetone() + with app.app_context(): + generator.load_data(app.config['UPLOAD_FOLDER']+'/'+filename) + return render_template('actions/actions.html') + return render_template('actions/upload_data.html') @app.route('/actions') def next_actions(): - return render_template('actions/actions.html') + return render_template('actions/actions.html') # main driver function if __name__ == '__main__': - # run() method of Flask class runs the application - # on the local development server. - app.run() + # run() method of Flask class runs the application + # on the local development server. + app.run() + \ No newline at end of file diff --git a/flask_app/templates/actions/select_input_values.html b/flask_app/templates/actions/select_input_values.html index 8f940b6..2df7e69 100644 --- a/flask_app/templates/actions/select_input_values.html +++ b/flask_app/templates/actions/select_input_values.html @@ -11,7 +11,7 @@

Select your input values

{% for label in labels %} - {{label}}" + {{label}}" {% endfor %}
diff --git a/flask_app/templates/actions/split_data.html b/flask_app/templates/actions/split_data.html index c9d8d15..478c302 100644 --- a/flask_app/templates/actions/split_data.html +++ b/flask_app/templates/actions/split_data.html @@ -2,9 +2,9 @@

Split Data

If this is the only dataset you have with these values, you need to split it into - a training and testing datasets. + a training and testing datasets.

- + \ No newline at end of file diff --git a/flask_app/templates/base.html b/flask_app/templates/base.html index 6dcad26..4a9bb29 100644 --- a/flask_app/templates/base.html +++ b/flask_app/templates/base.html @@ -7,6 +7,7 @@
  • Show Code
  • Upload Data
  • Next Actions +
  • Start Over
    diff --git a/flask_app/templates/info/code.html b/flask_app/templates/info/code.html index 53fd993..ad1927b 100644 --- a/flask_app/templates/info/code.html +++ b/flask_app/templates/info/code.html @@ -7,26 +7,26 @@ {% endfor %}
    - +
    {% endblock %} diff --git a/flask_app/templates/info/splitting_summary.html b/flask_app/templates/info/splitting_summary.html index ba60f00..481381f 100644 --- a/flask_app/templates/info/splitting_summary.html +++ b/flask_app/templates/info/splitting_summary.html @@ -4,4 +4,4 @@

    Training data contains {{num_rows_train}} rows

    -{% endblock %} +{% endblock %} \ No newline at end of file diff --git a/main.py b/main.py index a2d6514..76f098e 100644 --- a/main.py +++ b/main.py @@ -4,18 +4,20 @@ from pandas_code.parse_template import parse_template def run_generator(args): - print(args) - input_file = args[1] - generator = code_generator.CodeGenerator(template_mapping, parse_template) - generator.load_data(input_file) - data_summary = generator.describe_data() - print(data_summary.to_csv()) - clean_data = generator.clean_data() - print(clean_data) - print(generator.get_labels()) - print(generator.split_data()) - code = generator.download_code() - print(code) -#process the arguments + print(args) + input_file = args[1] + generator = code_generator.CodeGenerator(template_mapping, parse_template) + generator.load_data(input_file) + data_summary = generator.describe_data() + print(data_summary.to_csv()) + clean_data = generator.clean_data() + print(clean_data) + print(generator.get_labels()) + print(generator.split_data()) + code = generator.download_code() + print(code) + + +# process the arguments if __name__ == '__main__': - run_generator(sys.argv) + run_generator(sys.argv) diff --git a/model/code_blocks.py b/model/code_blocks.py index 049cc38..35cdf02 100644 --- a/model/code_blocks.py +++ b/model/code_blocks.py @@ -1,34 +1,38 @@ from collections import deque class CodeBlock: - def __init__(self, comment, statements): - self.comment = comment - self.statements = statements + def __init__(self, comment, statements): + self.comment = comment + self.statements = statements - def to_text(self): - text_value = "#"+self.comment+"\n" - for statement in self.statements: - text_value += statement+"\n" - return text_value + def to_text(self): + text_value = "#"+self.comment+"\n" + for statement in self.statements: + text_value += statement+"\n" + return text_value - def from_text(self, text): - pass + def from_text(self, text): + pass class AllBlocks: - def __init__(self): - self.blocks = deque() + def __init__(self): + self.blocks = deque() - def from_file(self, file_name): - pass + #clear previously insetred blocks of code + def reset_all(self): + self.blocks = deque() - def to_text(self): - code = "" - for block in self.blocks: - code+=block.to_text() - return code + def from_file(self, file_name): + pass - def to_file(self, file_name): - pass + def to_text(self): + code = "" + for block in self.blocks: + code+=block.to_text() + return code - def add_next_block(self, next_block): - self.blocks.append(next_block) + def to_file(self, file_name): + pass + + def add_next_block(self, next_block): + self.blocks.append(next_block) diff --git a/pandas_code/code_templates/drop_x.py b/pandas_code/code_templates/drop_x.py index 904b428..08edb1c 100644 --- a/pandas_code/code_templates/drop_x.py +++ b/pandas_code/code_templates/drop_x.py @@ -1,4 +1,4 @@ -# drop dataset features passed to args param +# split the data set into training and test data def get_code(args): x_values = args[0].drop(args[1], axis=1) return x_values diff --git a/pandas_code/parse_template.py b/pandas_code/parse_template.py index 10f27f7..a0b3d2c 100644 --- a/pandas_code/parse_template.py +++ b/pandas_code/parse_template.py @@ -1,39 +1,41 @@ import re def parse_template(template_name, args): - template = 'pandas_code/code_templates/'+template_name+".py" - generated_comments = [] - generated_code = [] - with open(template, encoding='ascii') as source_code: - lines = source_code.readlines() - for line in lines: - trimmed_line = line.strip() - if trimmed_line.startswith("def get_code"): - continue - if trimmed_line.startswith("return"): - continue - if trimmed_line.startswith("#"): - generated_comments.append(trimmed_line) - else: - trimmed_line = replace_args_with_values(trimmed_line, args) - generated_code.append(trimmed_line) + template = 'pandas_code/code_templates/'+template_name+".py" + generated_comments = [] + generated_code = [] + with open(template, encoding='ascii') as source_code: + lines = source_code.readlines() + for line in lines: + trimmed_line = line.strip() + if trimmed_line.startswith("def get_code"): + continue + if trimmed_line.startswith("return"): + continue + if trimmed_line.startswith("#"): + generated_comments.append(trimmed_line) + else: + trimmed_line = replace_args_with_values(trimmed_line, args) + generated_code.append(trimmed_line) + + return (generated_comments, generated_code) - return (generated_comments, generated_code) def replace_args_with_values(line, args): - match = re.search(r"args\[[0-9]\]", line) - while match: - args_span = match.span() - args_string = line[args_span[0]:args_span[1]] - args_index = get_args_index(args_string) + match = re.search(r"args\[[0-9]\]", line) + while match: + args_span = match.span() + args_string = line[args_span[0]:args_span[1]] + args_index = get_args_index(args_string) + + line = line[0:args_span[0]]+args[args_index]+line[args_span[1]:] + match = re.search(r"args\[[0-9]\]", line) - line = line[0:args_span[0]]+args[args_index]+line[args_span[1]:] - match = re.search(r"args\[[0-9]\]", line) + return line - return line def get_args_index(args_string): - index_match = re.search('[0-9]+', args_string) - index_span = index_match.span() - index_string = args_string[index_span[0]:index_span[1]] - index_value = int(index_string) - return index_value + index_match = re.search('[0-9]+', args_string) + index_span = index_match.span() + index_string = args_string[index_span[0]:index_span[1]] + index_value = int(index_string) + return index_value