From 0348dbe7213192c142e42a157f78ffe5a86a5987 Mon Sep 17 00:00:00 2001 From: Ambar Date: Sun, 2 Feb 2020 23:00:11 +0100 Subject: [PATCH 01/20] Update Readme, generator for random numbers written --- .gitignore | 4 ++ README.md | 40 ++++++++++- .../create_check_random_number_list.py | 67 +++++++++++++++++++ requirements.txt | 0 4 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 app_scripts/create_check_random_number_list.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore index b6e4761..c0d9e83 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,7 @@ dmypy.json # Pyre type checker .pyre/ + +# Pycharm files +.idea +.venv \ No newline at end of file diff --git a/README.md b/README.md index 77ece2e..ea2857d 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,38 @@ -# sorting_performance -Try sorting algorithms and evaluate performance by time and number of steps + +## Why does this exist? + +- trying different sorting techniques to see difference in performance +- understand (if possible) trade-off between heavy computations and number of computations +(a-la would you fight 1 horse sized duck or 100 duck size horses) + +## How to run it? + +Please run in python 3 for best results. No special packages required. +Some testing/coverage based tests will be written. Those might need a +look see in the `requirements.txt` file + +### Sorting performance +Try sorting algorithms and evaluate performance by +- total time to solve +- number of steps taken to solve + +### Inspiration + +- local supermarket is giving out free football player cards +- each card has a number so that unique orders can be established +- this helps in keeping track of what cards we have and which ones we want to trade +- card numbers range from 1 to 250 +- arranging all cards in to packs is a daily chore + - **Pack 1** : unique cards (sorted) + - **Pack 2**: extra copies of some cards in pack 1 + +Doing this on the dining table, +I realized that both me and my son are using various methods +of sorting! Constantly trying new sorting methods to either +- *speed up the process* +- OR *slow it down and dumb it down so that we can do it while chatting or watching cartoons* + +## Credits + +- [reddit thread](https://www.reddit.com/r/learnpython/comments/exese6/what_are_some_of_the_projects_i_can_start_working/fg7skxp/) +- [P vs NP problem explanation video in youtube uses a sorting based example](https://youtu.be/EHp4FPyajKQ?t=515) \ No newline at end of file diff --git a/app_scripts/create_check_random_number_list.py b/app_scripts/create_check_random_number_list.py new file mode 100644 index 0000000..e55e387 --- /dev/null +++ b/app_scripts/create_check_random_number_list.py @@ -0,0 +1,67 @@ +import random + +def generate_list(min_number:int=1, max_number:int=1000000, count:int=1000, uniqued_list:bool=True) -> list: + """ + This function will create a list of random numbers. + It accepts min number in list, max number in list and count of numbers in list. + + + :param min_number: mininum value of single number in list of random numbers + :param max_number: maximum value of single number in list of random numbers + :param count: number of random numbers expected + :param uniqued_list: boolean of whether or not the returned random list is allowed to have duplicate values or not + :return: list of size `count` of random numbers in random order + """ + + random_numbers_list = [] + + while not len(random_numbers_list) > count: + + temp = random.randint(min_number, max_number) + + if uniqued_list: + if temp in random_numbers_list: + continue + + random_numbers_list.append(temp) + + if not check_order(random_numbers_list)["random_bool"]: + # If the generated list is somehow ordered then run the generator + # until randomness found. Useful in test scenarios. + generate_list(min_number, max_number, count, uniqued_list) + + return random_numbers_list + + +def check_order(list_of_numbers: list)-> bool: + """ + Take a list of numbers and returns whether the list + was ordered in ascending manner or not + + :param list_of_numbers: + :return: dictionary with 1 key `random_bool`. + - Value True means the list is random. + - Value False means the list is ordered in an ascending manner. + + Doctest + + >>> check_order([1,2,3]) + {'random_bool': False} + + >>> check_order([2,2,3]) + {'random_bool': False} + + >>> check_order([3,2,3]) + {'random_bool': True} + + """ + + state_of_randomness = {"random_bool": False} + + for index, num in enumerate(list_of_numbers[:-1]): + if num>list_of_numbers[index + 1]: + state_of_randomness["random_bool"] = True + + return state_of_randomness + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e69de29 From 6202cf2008b3a9c2df15fad3bc722f36e0fd3572 Mon Sep 17 00:00:00 2001 From: Ambar Date: Tue, 4 Feb 2020 17:28:40 +0100 Subject: [PATCH 02/20] fix count of random numbers in generator, a few sorting algorithms executed --- .../create_check_random_number_list.py | 9 +- sort_algorithms.py | 138 ++++++++++++++++++ 2 files changed, 144 insertions(+), 3 deletions(-) create mode 100644 sort_algorithms.py diff --git a/app_scripts/create_check_random_number_list.py b/app_scripts/create_check_random_number_list.py index e55e387..3e6680f 100644 --- a/app_scripts/create_check_random_number_list.py +++ b/app_scripts/create_check_random_number_list.py @@ -4,7 +4,7 @@ def generate_list(min_number:int=1, max_number:int=1000000, count:int=1000, uniq """ This function will create a list of random numbers. It accepts min number in list, max number in list and count of numbers in list. - + Note; if `count` is None then it defaults to 1000 :param min_number: mininum value of single number in list of random numbers :param max_number: maximum value of single number in list of random numbers @@ -13,9 +13,12 @@ def generate_list(min_number:int=1, max_number:int=1000000, count:int=1000, uniq :return: list of size `count` of random numbers in random order """ + if count == None: + count = 1000 + random_numbers_list = [] - while not len(random_numbers_list) > count: + while not len(random_numbers_list) >= count: temp = random.randint(min_number, max_number) @@ -33,7 +36,7 @@ def generate_list(min_number:int=1, max_number:int=1000000, count:int=1000, uniq return random_numbers_list -def check_order(list_of_numbers: list)-> bool: +def check_order(list_of_numbers: list) -> dict: """ Take a list of numbers and returns whether the list was ordered in ascending manner or not diff --git a/sort_algorithms.py b/sort_algorithms.py new file mode 100644 index 0000000..15bf814 --- /dev/null +++ b/sort_algorithms.py @@ -0,0 +1,138 @@ +import copy +from datetime import datetime + +from app_scripts.create_check_random_number_list import generate_list, check_order + +debug = False +# count = None +count = 500 + +if debug: + unique_random_list = generate_list(count=5, uniqued_list=True) + duplicate_allowed_random_list = generate_list(count=5, uniqued_list=False) +else: + unique_random_list = generate_list(count=count, uniqued_list=True) + duplicate_allowed_random_list = generate_list(count=count, uniqued_list=False) + +known_solution_unique_random_list = [] +known_solution_duplicate_allowed_random_list = [] + + +def print_sort_progress(lowest_number: int, step_count: int): + if debug: + print(f"Lowest number in this round = {lowest_number}. Step_count = {step_count}") + + +def print_sort_results(method_name: str, time_taken_to_sort: float, step_count: int, sort_state: bool, + matches_known_solution: bool = None): + result_f_string = f"Sort {method_name} took {time_taken_to_sort} seconds to order in {step_count} steps. Check: Sort status = {sort_state}." + + if matches_known_solution is not None: + print(f"{result_f_string} Accurate (against known solution: {matches_known_solution})") + else: + print(result_f_string) + + +# Method 1: Selection sort. The method that is easiest. I used it to sort the cards. +# Settings: only unique numbers + +method_name = "Selection sort 1.0" +random_list = copy.deepcopy(unique_random_list) +step_count = 0 +start_time = datetime.now() + +ordered_list = [] + +cache_random_list = copy.deepcopy(random_list) + +while len(ordered_list) != len(random_list): + step_count += 1 + lowest_number = cache_random_list[0] + for i in cache_random_list: + step_count += 1 + if i < lowest_number: + step_count += 1 + lowest_number = i + print_sort_progress(lowest_number, step_count) + + ordered_list.append(lowest_number) + cache_random_list.remove(lowest_number) + +time_taken_to_sort = round((datetime.now() - start_time).total_seconds(), 4) +sort_state = check_order(ordered_list)["random_bool"] is False +known_solution_unique_random_list = copy.deepcopy(ordered_list) +print_sort_results(method_name, time_taken_to_sort, step_count, sort_state) + +# Method 1.1: Selection sort. +# Settings: duplicate numbers allowed + +method_name = "Selection sort 1.1" +random_list = copy.deepcopy(duplicate_allowed_random_list) +step_count = 0 +start_time = datetime.now() + +ordered_list = [] + +cache_random_list = copy.deepcopy(random_list) + +while len(ordered_list) != len(random_list): + step_count += 1 + lowest_number = cache_random_list[0] + for i in cache_random_list: + step_count += 1 + if i < lowest_number: + step_count += 1 + lowest_number = i + # print_sort_progress(lowest_number, step_count) + + lowest_number_list = [] + for i in cache_random_list: + step_count += 1 + if i == lowest_number: + step_count += 1 + lowest_number_list.append(i) + + ordered_list.extend(lowest_number_list) + + for i in lowest_number_list: + step_count += 1 + cache_random_list.remove(i) + +time_taken_to_sort = round((datetime.now() - start_time).total_seconds(), 4) +sort_state = check_order(ordered_list)["random_bool"] is False +known_solution_duplicate_allowed_random_list = copy.deepcopy(ordered_list) +print_sort_results(method_name, time_taken_to_sort, step_count, sort_state) + +# Method 2: Bubble sort (as done in youtube linked video in Readme) https://en.wikipedia.org/wiki/Bubble_sort +# Settings: duplicate/unique agnostic (I think) + +method_name = "Bubble sort" +random_list = copy.deepcopy(unique_random_list) +step_count = 0 +start_time = datetime.now() + +ordered_list = copy.deepcopy(random_list) + +cache_random_list = copy.deepcopy(random_list) +found_pairs_to_swap = True + +while check_order(ordered_list)["random_bool"]: + step_count += 1 + cache_random_list = copy.deepcopy(ordered_list) + for num_index, num in enumerate(cache_random_list[:-1]): + step_count += 1 + if num > ordered_list[num_index + 1]: + step_count += 1 + ordered_list[num_index] = copy.deepcopy(ordered_list[num_index + 1]) + ordered_list[num_index + 1] = num + print_sort_progress(ordered_list[num_index + 1], step_count) + break + +time_taken_to_sort = round((datetime.now() - start_time).total_seconds(), 4) +sort_state = check_order(ordered_list)["random_bool"] is False +matches_known_solution = (ordered_list == known_solution_unique_random_list) +print_sort_results(method_name, time_taken_to_sort, step_count, sort_state, matches_known_solution) + +# TODO: Merge Sort (as done in youtube linked video in Readme) https://en.wikipedia.org/wiki/Merge_sort +# TODO: Quick Sort https://en.wikipedia.org/wiki/Quicksort +# TODO: Heap Sort https://en.wikipedia.org/wiki/Heapsort From c528008c958789dce70b2b58057eecbe51235b68 Mon Sep 17 00:00:00 2001 From: Ambar Date: Wed, 5 Feb 2020 13:48:57 +0100 Subject: [PATCH 03/20] modularize the sort algorithms. properly handle `debug` and `help_text` --- app_scripts/print_scripts.py | 13 ++++ sort_algorithm_runner.py | 28 +++++++ sort_algorithms.py | 138 ----------------------------------- sorters/bubble_sort.py | 40 ++++++++++ sorters/selection_sort_1.py | 43 +++++++++++ sorters/selection_sort_2.py | 53 ++++++++++++++ 6 files changed, 177 insertions(+), 138 deletions(-) create mode 100644 app_scripts/print_scripts.py create mode 100644 sort_algorithm_runner.py delete mode 100644 sort_algorithms.py create mode 100644 sorters/bubble_sort.py create mode 100644 sorters/selection_sort_1.py create mode 100644 sorters/selection_sort_2.py diff --git a/app_scripts/print_scripts.py b/app_scripts/print_scripts.py new file mode 100644 index 0000000..ad56d38 --- /dev/null +++ b/app_scripts/print_scripts.py @@ -0,0 +1,13 @@ +def print_sort_progress(lowest_number: int, step_count: int, debug: bool): + if debug: + print(f"Lowest number in this round = {lowest_number}. Step_count = {step_count}") + + +def print_sort_results(method_name: str, time_taken_to_sort: float, step_count: int, sort_state: bool, + matches_known_solution: bool = None, help_text: str = ""): + result_f_string = f"Sort {method_name} {help_text} took {time_taken_to_sort} seconds to order in {step_count} steps. Check: Sort status = {sort_state}." + + if matches_known_solution is not None: + print(f"{result_f_string} Accurate (against known solution: {matches_known_solution})") + else: + print(result_f_string) diff --git a/sort_algorithm_runner.py b/sort_algorithm_runner.py new file mode 100644 index 0000000..fd5c1fa --- /dev/null +++ b/sort_algorithm_runner.py @@ -0,0 +1,28 @@ +from app_scripts.create_check_random_number_list import generate_list +from sorters.bubble_sort import bubble_sort as bu_s +from sorters.selection_sort_1 import selection_sort as ss1 +from sorters.selection_sort_2 import selection_sort as ss2 + +debug = False +count = None +# count = 500 + +if debug: + unique_random_list = generate_list(count=5, uniqued_list=True) + duplicate_allowed_random_list = generate_list(count=5, uniqued_list=False) +else: + unique_random_list = generate_list(count=count, uniqued_list=True) + duplicate_allowed_random_list = generate_list(count=count, uniqued_list=False) + +known_solution_unique_random_list = ss1(unique_random_list, debug=debug, help_text="for unique numbers") +known_solution_duplicate_allowed_random_list = ss2(duplicate_allowed_random_list, debug=debug, + help_text="for non-unique numbers") + +bu_s(unique_random_list, known_solution_unique_random_list, debug=debug, help_text="for unique numbers") +bu_s(duplicate_allowed_random_list, known_solution_duplicate_allowed_random_list, debug=debug, + help_text="for non-unique numbers") + +# TODO: Merge Sort (as done in youtube linked video in Readme) https://en.wikipedia.org/wiki/Merge_sort +# TODO: Quick Sort https://en.wikipedia.org/wiki/Quicksort +# TODO: Heap Sort https://en.wikipedia.org/wiki/Heapsort +# TODO: Bucket Sort (this is the method that I used to sort cards when I need to sort under distraction. Takes longer because more steps required.) https://en.wikipedia.org/wiki/Bucket_sort diff --git a/sort_algorithms.py b/sort_algorithms.py deleted file mode 100644 index 15bf814..0000000 --- a/sort_algorithms.py +++ /dev/null @@ -1,138 +0,0 @@ -import copy -from datetime import datetime - -from app_scripts.create_check_random_number_list import generate_list, check_order - -debug = False -# count = None -count = 500 - -if debug: - unique_random_list = generate_list(count=5, uniqued_list=True) - duplicate_allowed_random_list = generate_list(count=5, uniqued_list=False) -else: - unique_random_list = generate_list(count=count, uniqued_list=True) - duplicate_allowed_random_list = generate_list(count=count, uniqued_list=False) - -known_solution_unique_random_list = [] -known_solution_duplicate_allowed_random_list = [] - - -def print_sort_progress(lowest_number: int, step_count: int): - if debug: - print(f"Lowest number in this round = {lowest_number}. Step_count = {step_count}") - - -def print_sort_results(method_name: str, time_taken_to_sort: float, step_count: int, sort_state: bool, - matches_known_solution: bool = None): - result_f_string = f"Sort {method_name} took {time_taken_to_sort} seconds to order in {step_count} steps. Check: Sort status = {sort_state}." - - if matches_known_solution is not None: - print(f"{result_f_string} Accurate (against known solution: {matches_known_solution})") - else: - print(result_f_string) - - -# Method 1: Selection sort. The method that is easiest. I used it to sort the cards. -# Settings: only unique numbers - -method_name = "Selection sort 1.0" -random_list = copy.deepcopy(unique_random_list) -step_count = 0 -start_time = datetime.now() - -ordered_list = [] - -cache_random_list = copy.deepcopy(random_list) - -while len(ordered_list) != len(random_list): - step_count += 1 - lowest_number = cache_random_list[0] - for i in cache_random_list: - step_count += 1 - if i < lowest_number: - step_count += 1 - lowest_number = i - print_sort_progress(lowest_number, step_count) - - ordered_list.append(lowest_number) - cache_random_list.remove(lowest_number) - -time_taken_to_sort = round((datetime.now() - start_time).total_seconds(), 4) -sort_state = check_order(ordered_list)["random_bool"] is False -known_solution_unique_random_list = copy.deepcopy(ordered_list) -print_sort_results(method_name, time_taken_to_sort, step_count, sort_state) - -# Method 1.1: Selection sort. -# Settings: duplicate numbers allowed - -method_name = "Selection sort 1.1" -random_list = copy.deepcopy(duplicate_allowed_random_list) -step_count = 0 -start_time = datetime.now() - -ordered_list = [] - -cache_random_list = copy.deepcopy(random_list) - -while len(ordered_list) != len(random_list): - step_count += 1 - lowest_number = cache_random_list[0] - for i in cache_random_list: - step_count += 1 - if i < lowest_number: - step_count += 1 - lowest_number = i - # print_sort_progress(lowest_number, step_count) - - lowest_number_list = [] - for i in cache_random_list: - step_count += 1 - if i == lowest_number: - step_count += 1 - lowest_number_list.append(i) - - ordered_list.extend(lowest_number_list) - - for i in lowest_number_list: - step_count += 1 - cache_random_list.remove(i) - -time_taken_to_sort = round((datetime.now() - start_time).total_seconds(), 4) -sort_state = check_order(ordered_list)["random_bool"] is False -known_solution_duplicate_allowed_random_list = copy.deepcopy(ordered_list) -print_sort_results(method_name, time_taken_to_sort, step_count, sort_state) - -# Method 2: Bubble sort (as done in youtube linked video in Readme) https://en.wikipedia.org/wiki/Bubble_sort -# Settings: duplicate/unique agnostic (I think) - -method_name = "Bubble sort" -random_list = copy.deepcopy(unique_random_list) -step_count = 0 -start_time = datetime.now() - -ordered_list = copy.deepcopy(random_list) - -cache_random_list = copy.deepcopy(random_list) -found_pairs_to_swap = True - -while check_order(ordered_list)["random_bool"]: - step_count += 1 - cache_random_list = copy.deepcopy(ordered_list) - for num_index, num in enumerate(cache_random_list[:-1]): - step_count += 1 - if num > ordered_list[num_index + 1]: - step_count += 1 - ordered_list[num_index] = copy.deepcopy(ordered_list[num_index + 1]) - ordered_list[num_index + 1] = num - print_sort_progress(ordered_list[num_index + 1], step_count) - break - -time_taken_to_sort = round((datetime.now() - start_time).total_seconds(), 4) -sort_state = check_order(ordered_list)["random_bool"] is False -matches_known_solution = (ordered_list == known_solution_unique_random_list) -print_sort_results(method_name, time_taken_to_sort, step_count, sort_state, matches_known_solution) - -# TODO: Merge Sort (as done in youtube linked video in Readme) https://en.wikipedia.org/wiki/Merge_sort -# TODO: Quick Sort https://en.wikipedia.org/wiki/Quicksort -# TODO: Heap Sort https://en.wikipedia.org/wiki/Heapsort diff --git a/sorters/bubble_sort.py b/sorters/bubble_sort.py new file mode 100644 index 0000000..47109a0 --- /dev/null +++ b/sorters/bubble_sort.py @@ -0,0 +1,40 @@ +import copy +from datetime import datetime + +from app_scripts.create_check_random_number_list import check_order +from app_scripts.print_scripts import print_sort_progress, print_sort_results + + +# Method 2: Bubble sort (as done in youtube linked video in Readme) https://en.wikipedia.org/wiki/Bubble_sort +# Settings: duplicate/unique agnostic (I think) + + +def bubble_sort(random_list: list, known_solution_unique_random_list: list, debug: bool, help_text: str = ""): + method_name = "Bubble sort" + step_count = 0 + start_time = datetime.now() + + ordered_list = copy.deepcopy(random_list) + + while check_order(ordered_list)["random_bool"]: + step_count += 1 + cache_random_list = copy.deepcopy(ordered_list) + for num_index, num in enumerate(cache_random_list[:-1]): + step_count += 1 + if num > ordered_list[num_index + 1]: + step_count += 1 + ordered_list[num_index] = copy.deepcopy(ordered_list[num_index + 1]) + ordered_list[num_index + 1] = num + print_sort_progress(ordered_list[num_index + 1], step_count, debug=debug) + break + + time_taken_to_sort = round((datetime.now() - start_time).total_seconds(), 4) + sort_state = check_order(ordered_list)["random_bool"] is False + matches_known_solution = (ordered_list == known_solution_unique_random_list) + print_sort_results(method_name=method_name, + time_taken_to_sort=time_taken_to_sort, + step_count=step_count, + sort_state=sort_state, + matches_known_solution=matches_known_solution, + help_text=help_text + ) diff --git a/sorters/selection_sort_1.py b/sorters/selection_sort_1.py new file mode 100644 index 0000000..0fd76ad --- /dev/null +++ b/sorters/selection_sort_1.py @@ -0,0 +1,43 @@ +import copy +from datetime import datetime + +from app_scripts.create_check_random_number_list import check_order +from app_scripts.print_scripts import print_sort_progress, print_sort_results + + +# Method 1: Selection sort. The method that is easiest. I used it to sort the cards. this was the quickest for me. +# Settings: only unique numbers + +def selection_sort(random_list: list, debug: bool, help_text: str = "") -> list: + method_name = "Selection sort 1.0" + step_count = 0 + start_time = datetime.now() + + ordered_list = [] + + cache_random_list = copy.deepcopy(random_list) + + while len(ordered_list) != len(random_list): + step_count += 1 + lowest_number = cache_random_list[0] + for i in cache_random_list: + step_count += 1 + if i < lowest_number: + step_count += 1 + lowest_number = i + print_sort_progress(lowest_number, step_count, debug=debug) + + ordered_list.append(lowest_number) + cache_random_list.remove(lowest_number) + + time_taken_to_sort = round((datetime.now() - start_time).total_seconds(), 4) + sort_state = check_order(ordered_list)["random_bool"] is False + known_solution_unique_random_list = copy.deepcopy(ordered_list) + print_sort_results(method_name=method_name, + time_taken_to_sort=time_taken_to_sort, + step_count=step_count, + sort_state=sort_state, + help_text=help_text, + ) + + return known_solution_unique_random_list diff --git a/sorters/selection_sort_2.py b/sorters/selection_sort_2.py new file mode 100644 index 0000000..5cb6d6c --- /dev/null +++ b/sorters/selection_sort_2.py @@ -0,0 +1,53 @@ +import copy +from datetime import datetime + +from app_scripts.create_check_random_number_list import check_order +from app_scripts.print_scripts import print_sort_progress, print_sort_results + + +# Method 1.1: Selection sort. +# Settings: duplicate numbers allowed + +def selection_sort(random_list: list, debug: bool, help_text: str = "") -> list: + method_name = "Selection sort 1.1" + step_count = 0 + start_time = datetime.now() + + ordered_list = [] + + cache_random_list = copy.deepcopy(random_list) + + while len(ordered_list) != len(random_list): + step_count += 1 + lowest_number = cache_random_list[0] + for i in cache_random_list: + step_count += 1 + if i < lowest_number: + step_count += 1 + lowest_number = i + print_sort_progress(lowest_number, step_count, debug=debug) + + lowest_number_list = [] + for i in cache_random_list: + step_count += 1 + if i == lowest_number: + step_count += 1 + lowest_number_list.append(i) + + ordered_list.extend(lowest_number_list) + + for i in lowest_number_list: + step_count += 1 + cache_random_list.remove(i) + + time_taken_to_sort = round((datetime.now() - start_time).total_seconds(), 4) + sort_state = check_order(ordered_list)["random_bool"] is False + known_solution_duplicate_allowed_random_list = copy.deepcopy(ordered_list) + print_sort_results(method_name=method_name, + time_taken_to_sort=time_taken_to_sort, + step_count=step_count, + sort_state=sort_state, + help_text=help_text, + ) + + return known_solution_duplicate_allowed_random_list From 3295fbad066e1584ff1bd1320e84aeb65ebb0751 Mon Sep 17 00:00:00 2001 From: Ambar Date: Thu, 6 Feb 2020 00:22:50 +0100 Subject: [PATCH 04/20] reformatting code for easy readability using black --- .../create_check_random_number_list.py | 12 ++++-- app_scripts/print_scripts.py | 18 +++++++-- sort_algorithm_runner.py | 40 ++++++++++++++----- sorters/bubble_sort.py | 30 +++++++++----- sorters/selection_sort_1.py | 14 ++++--- sorters/selection_sort_2.py | 14 ++++--- 6 files changed, 88 insertions(+), 40 deletions(-) diff --git a/app_scripts/create_check_random_number_list.py b/app_scripts/create_check_random_number_list.py index 3e6680f..b258bfe 100644 --- a/app_scripts/create_check_random_number_list.py +++ b/app_scripts/create_check_random_number_list.py @@ -1,6 +1,12 @@ import random -def generate_list(min_number:int=1, max_number:int=1000000, count:int=1000, uniqued_list:bool=True) -> list: + +def generate_list( + min_number: int = 1, + max_number: int = 1000000, + count: int = 1000, + uniqued_list: bool = True, +) -> list: """ This function will create a list of random numbers. It accepts min number in list, max number in list and count of numbers in list. @@ -62,9 +68,7 @@ def check_order(list_of_numbers: list) -> dict: state_of_randomness = {"random_bool": False} for index, num in enumerate(list_of_numbers[:-1]): - if num>list_of_numbers[index + 1]: + if num > list_of_numbers[index + 1]: state_of_randomness["random_bool"] = True return state_of_randomness - - diff --git a/app_scripts/print_scripts.py b/app_scripts/print_scripts.py index ad56d38..8313d38 100644 --- a/app_scripts/print_scripts.py +++ b/app_scripts/print_scripts.py @@ -1,13 +1,23 @@ def print_sort_progress(lowest_number: int, step_count: int, debug: bool): if debug: - print(f"Lowest number in this round = {lowest_number}. Step_count = {step_count}") + print( + f"Lowest number in this round = {lowest_number}. Step_count = {step_count}" + ) -def print_sort_results(method_name: str, time_taken_to_sort: float, step_count: int, sort_state: bool, - matches_known_solution: bool = None, help_text: str = ""): +def print_sort_results( + method_name: str, + time_taken_to_sort: float, + step_count: int, + sort_state: bool, + matches_known_solution: bool = None, + help_text: str = "", +): result_f_string = f"Sort {method_name} {help_text} took {time_taken_to_sort} seconds to order in {step_count} steps. Check: Sort status = {sort_state}." if matches_known_solution is not None: - print(f"{result_f_string} Accurate (against known solution: {matches_known_solution})") + print( + f"{result_f_string} Accurate (against known solution: {matches_known_solution})" + ) else: print(result_f_string) diff --git a/sort_algorithm_runner.py b/sort_algorithm_runner.py index fd5c1fa..6671655 100644 --- a/sort_algorithm_runner.py +++ b/sort_algorithm_runner.py @@ -1,26 +1,48 @@ from app_scripts.create_check_random_number_list import generate_list from sorters.bubble_sort import bubble_sort as bu_s +from sorters.merge_sort import merge_sort as ms from sorters.selection_sort_1 import selection_sort as ss1 from sorters.selection_sort_2 import selection_sort as ss2 -debug = False +debug = True count = None # count = 500 if debug: - unique_random_list = generate_list(count=5, uniqued_list=True) - duplicate_allowed_random_list = generate_list(count=5, uniqued_list=False) + unique_random_list = generate_list(max_number=20, count=6, uniqued_list=True) + duplicate_allowed_random_list = generate_list( + max_number=20, count=6, uniqued_list=False + ) else: unique_random_list = generate_list(count=count, uniqued_list=True) duplicate_allowed_random_list = generate_list(count=count, uniqued_list=False) -known_solution_unique_random_list = ss1(unique_random_list, debug=debug, help_text="for unique numbers") -known_solution_duplicate_allowed_random_list = ss2(duplicate_allowed_random_list, debug=debug, - help_text="for non-unique numbers") +known_solution_unique_random_list = ss1( + unique_random_list, debug=debug, help_text="for unique numbers" +) +known_solution_duplicate_allowed_random_list = ss2( + duplicate_allowed_random_list, debug=debug, help_text="for non-unique numbers" +) -bu_s(unique_random_list, known_solution_unique_random_list, debug=debug, help_text="for unique numbers") -bu_s(duplicate_allowed_random_list, known_solution_duplicate_allowed_random_list, debug=debug, - help_text="for non-unique numbers") +bu_s( + unique_random_list, + known_solution_unique_random_list, + debug=debug, + help_text="for unique numbers", +) +bu_s( + duplicate_allowed_random_list, + known_solution_duplicate_allowed_random_list, + debug=debug, + help_text="for non-unique numbers", +) + +ms( + unique_random_list, + known_solution_unique_random_list, + debug=debug, + help_text="for unique numbers", +) # TODO: Merge Sort (as done in youtube linked video in Readme) https://en.wikipedia.org/wiki/Merge_sort # TODO: Quick Sort https://en.wikipedia.org/wiki/Quicksort diff --git a/sorters/bubble_sort.py b/sorters/bubble_sort.py index 47109a0..6d5a220 100644 --- a/sorters/bubble_sort.py +++ b/sorters/bubble_sort.py @@ -6,10 +6,15 @@ # Method 2: Bubble sort (as done in youtube linked video in Readme) https://en.wikipedia.org/wiki/Bubble_sort -# Settings: duplicate/unique agnostic (I think) +# Settings: duplicate/unique agnostic -def bubble_sort(random_list: list, known_solution_unique_random_list: list, debug: bool, help_text: str = ""): +def bubble_sort( + random_list: list, + known_solution_unique_random_list: list, + debug: bool, + help_text: str = "", +): method_name = "Bubble sort" step_count = 0 start_time = datetime.now() @@ -25,16 +30,19 @@ def bubble_sort(random_list: list, known_solution_unique_random_list: list, debu step_count += 1 ordered_list[num_index] = copy.deepcopy(ordered_list[num_index + 1]) ordered_list[num_index + 1] = num - print_sort_progress(ordered_list[num_index + 1], step_count, debug=debug) + print_sort_progress( + ordered_list[num_index + 1], step_count, debug=debug + ) break time_taken_to_sort = round((datetime.now() - start_time).total_seconds(), 4) sort_state = check_order(ordered_list)["random_bool"] is False - matches_known_solution = (ordered_list == known_solution_unique_random_list) - print_sort_results(method_name=method_name, - time_taken_to_sort=time_taken_to_sort, - step_count=step_count, - sort_state=sort_state, - matches_known_solution=matches_known_solution, - help_text=help_text - ) + matches_known_solution = ordered_list == known_solution_unique_random_list + print_sort_results( + method_name=method_name, + time_taken_to_sort=time_taken_to_sort, + step_count=step_count, + sort_state=sort_state, + matches_known_solution=matches_known_solution, + help_text=help_text, + ) diff --git a/sorters/selection_sort_1.py b/sorters/selection_sort_1.py index 0fd76ad..f61edc9 100644 --- a/sorters/selection_sort_1.py +++ b/sorters/selection_sort_1.py @@ -8,6 +8,7 @@ # Method 1: Selection sort. The method that is easiest. I used it to sort the cards. this was the quickest for me. # Settings: only unique numbers + def selection_sort(random_list: list, debug: bool, help_text: str = "") -> list: method_name = "Selection sort 1.0" step_count = 0 @@ -33,11 +34,12 @@ def selection_sort(random_list: list, debug: bool, help_text: str = "") -> list: time_taken_to_sort = round((datetime.now() - start_time).total_seconds(), 4) sort_state = check_order(ordered_list)["random_bool"] is False known_solution_unique_random_list = copy.deepcopy(ordered_list) - print_sort_results(method_name=method_name, - time_taken_to_sort=time_taken_to_sort, - step_count=step_count, - sort_state=sort_state, - help_text=help_text, - ) + print_sort_results( + method_name=method_name, + time_taken_to_sort=time_taken_to_sort, + step_count=step_count, + sort_state=sort_state, + help_text=help_text, + ) return known_solution_unique_random_list diff --git a/sorters/selection_sort_2.py b/sorters/selection_sort_2.py index 5cb6d6c..37bcf66 100644 --- a/sorters/selection_sort_2.py +++ b/sorters/selection_sort_2.py @@ -8,6 +8,7 @@ # Method 1.1: Selection sort. # Settings: duplicate numbers allowed + def selection_sort(random_list: list, debug: bool, help_text: str = "") -> list: method_name = "Selection sort 1.1" step_count = 0 @@ -43,11 +44,12 @@ def selection_sort(random_list: list, debug: bool, help_text: str = "") -> list: time_taken_to_sort = round((datetime.now() - start_time).total_seconds(), 4) sort_state = check_order(ordered_list)["random_bool"] is False known_solution_duplicate_allowed_random_list = copy.deepcopy(ordered_list) - print_sort_results(method_name=method_name, - time_taken_to_sort=time_taken_to_sort, - step_count=step_count, - sort_state=sort_state, - help_text=help_text, - ) + print_sort_results( + method_name=method_name, + time_taken_to_sort=time_taken_to_sort, + step_count=step_count, + sort_state=sort_state, + help_text=help_text, + ) return known_solution_duplicate_allowed_random_list From 4b0827e69282e05b7b03067201e9dc85308c75d5 Mon Sep 17 00:00:00 2001 From: Ambar Date: Thu, 6 Feb 2020 02:09:05 +0100 Subject: [PATCH 05/20] fix bubble sort for algorithm efficiency --- sorters/bubble_sort.py | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/sorters/bubble_sort.py b/sorters/bubble_sort.py index 6d5a220..6fcc98f 100644 --- a/sorters/bubble_sort.py +++ b/sorters/bubble_sort.py @@ -2,7 +2,7 @@ from datetime import datetime from app_scripts.create_check_random_number_list import check_order -from app_scripts.print_scripts import print_sort_progress, print_sort_results +from app_scripts.print_scripts import print_sort_results # Method 2: Bubble sort (as done in youtube linked video in Readme) https://en.wikipedia.org/wiki/Bubble_sort @@ -19,21 +19,34 @@ def bubble_sort( step_count = 0 start_time = datetime.now() - ordered_list = copy.deepcopy(random_list) + cache_random_list = copy.deepcopy(random_list) + ordered_list = [] + count_cache_list = len(cache_random_list) - while check_order(ordered_list)["random_bool"]: - step_count += 1 - cache_random_list = copy.deepcopy(ordered_list) - for num_index, num in enumerate(cache_random_list[:-1]): - step_count += 1 - if num > ordered_list[num_index + 1]: + while count_cache_list > 1: + for num_index in range(count_cache_list - 1): + current_num = cache_random_list[num_index] + next_num = cache_random_list[num_index + 1] + + if current_num > next_num: + cache_random_list[num_index] = next_num + cache_random_list[num_index + 1] = current_num + step_count += 1 + else: + cache_random_list[num_index] = current_num + cache_random_list[num_index + 1] = next_num step_count += 1 - ordered_list[num_index] = copy.deepcopy(ordered_list[num_index + 1]) - ordered_list[num_index + 1] = num - print_sort_progress( - ordered_list[num_index + 1], step_count, debug=debug - ) - break + + ordered_list.insert(0, cache_random_list[-1]) + cache_random_list = copy.deepcopy(cache_random_list[:-1]) + count_cache_list = len(cache_random_list) + + if debug: + print("\t\t\tgrowing ordered_list", ordered_list) + + ordered_list = cache_random_list + ordered_list + if debug: + print("\t\t\t\tordered_list", ordered_list) time_taken_to_sort = round((datetime.now() - start_time).total_seconds(), 4) sort_state = check_order(ordered_list)["random_bool"] is False From 575d967b8ed78affc7f22b685ebebd4cc646f9d9 Mon Sep 17 00:00:00 2001 From: Ambar Date: Thu, 6 Feb 2020 02:10:54 +0100 Subject: [PATCH 06/20] reformatting code for easy readability using black --- app_scripts/print_scripts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app_scripts/print_scripts.py b/app_scripts/print_scripts.py index 8313d38..42b705d 100644 --- a/app_scripts/print_scripts.py +++ b/app_scripts/print_scripts.py @@ -13,7 +13,7 @@ def print_sort_results( matches_known_solution: bool = None, help_text: str = "", ): - result_f_string = f"Sort {method_name} {help_text} took {time_taken_to_sort} seconds to order in {step_count} steps. Check: Sort status = {sort_state}." + result_f_string = f"{method_name} {help_text} took {time_taken_to_sort} seconds to order in {step_count} steps. Check: Sort status = {sort_state}." if matches_known_solution is not None: print( From a0b2a7bf2f047b6e52a8da571c2da5743c553d9a Mon Sep 17 00:00:00 2001 From: Ambar Date: Thu, 6 Feb 2020 02:11:55 +0100 Subject: [PATCH 07/20] implement clearer step count mechanism. Also fix the mechanism to avoid double counting --- sorters/selection_sort_1.py | 4 ++-- sorters/selection_sort_2.py | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sorters/selection_sort_1.py b/sorters/selection_sort_1.py index f61edc9..a1858ab 100644 --- a/sorters/selection_sort_1.py +++ b/sorters/selection_sort_1.py @@ -19,13 +19,13 @@ def selection_sort(random_list: list, debug: bool, help_text: str = "") -> list: cache_random_list = copy.deepcopy(random_list) while len(ordered_list) != len(random_list): - step_count += 1 lowest_number = cache_random_list[0] for i in cache_random_list: - step_count += 1 if i < lowest_number: step_count += 1 lowest_number = i + else: + step_count += 1 print_sort_progress(lowest_number, step_count, debug=debug) ordered_list.append(lowest_number) diff --git a/sorters/selection_sort_2.py b/sorters/selection_sort_2.py index 37bcf66..f00bbb1 100644 --- a/sorters/selection_sort_2.py +++ b/sorters/selection_sort_2.py @@ -19,13 +19,13 @@ def selection_sort(random_list: list, debug: bool, help_text: str = "") -> list: cache_random_list = copy.deepcopy(random_list) while len(ordered_list) != len(random_list): - step_count += 1 lowest_number = cache_random_list[0] for i in cache_random_list: - step_count += 1 if i < lowest_number: step_count += 1 lowest_number = i + else: + step_count += 1 print_sort_progress(lowest_number, step_count, debug=debug) lowest_number_list = [] @@ -34,6 +34,8 @@ def selection_sort(random_list: list, debug: bool, help_text: str = "") -> list: if i == lowest_number: step_count += 1 lowest_number_list.append(i) + else: + step_count += 1 ordered_list.extend(lowest_number_list) From a0286906492d347d961fbb17dd4877b902a955d5 Mon Sep 17 00:00:00 2001 From: Ambar Date: Thu, 6 Feb 2020 02:12:58 +0100 Subject: [PATCH 08/20] implement merge sort --- sort_algorithm_runner.py | 11 ++++- sorters/merge_sort.py | 91 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 sorters/merge_sort.py diff --git a/sort_algorithm_runner.py b/sort_algorithm_runner.py index 6671655..d0f2a5d 100644 --- a/sort_algorithm_runner.py +++ b/sort_algorithm_runner.py @@ -4,7 +4,7 @@ from sorters.selection_sort_1 import selection_sort as ss1 from sorters.selection_sort_2 import selection_sort as ss2 -debug = True +debug = False count = None # count = 500 @@ -30,6 +30,7 @@ debug=debug, help_text="for unique numbers", ) + bu_s( duplicate_allowed_random_list, known_solution_duplicate_allowed_random_list, @@ -44,7 +45,13 @@ help_text="for unique numbers", ) -# TODO: Merge Sort (as done in youtube linked video in Readme) https://en.wikipedia.org/wiki/Merge_sort +ms( + duplicate_allowed_random_list, + known_solution_duplicate_allowed_random_list, + debug=debug, + help_text="for non-unique numbers", +) + # TODO: Quick Sort https://en.wikipedia.org/wiki/Quicksort # TODO: Heap Sort https://en.wikipedia.org/wiki/Heapsort # TODO: Bucket Sort (this is the method that I used to sort cards when I need to sort under distraction. Takes longer because more steps required.) https://en.wikipedia.org/wiki/Bucket_sort diff --git a/sorters/merge_sort.py b/sorters/merge_sort.py new file mode 100644 index 0000000..aa6805c --- /dev/null +++ b/sorters/merge_sort.py @@ -0,0 +1,91 @@ +import copy +from datetime import datetime + +from app_scripts.create_check_random_number_list import check_order +from app_scripts.print_scripts import print_sort_results + + +# Method 3: Merge sort (as done in youtube linked video in Readme) https://en.wikipedia.org/wiki/Merge_sort +# Settings: duplicate/unique agnostic (I think) + + +def get_combined_chunk(chunk_a: list, chunk_b: list) -> dict: + chunk_ordered_list = [] + step_count = 0 + + while chunk_a and chunk_b: + + if chunk_a[0] > chunk_b[0]: + chunk_ordered_list.append(chunk_b[0]) + chunk_b = chunk_b[1:] + step_count += 1 + else: + chunk_ordered_list.append(chunk_a[0]) + chunk_a = chunk_a[1:] + step_count += 1 + + chunk_ordered_list.extend(chunk_a) + chunk_ordered_list.extend(chunk_b) + + return { + "chunk_ordered_list": chunk_ordered_list, + "step_count": step_count, + } + + +def merge_sort( + random_list: list, + known_solution_unique_random_list: list, + debug: bool, + help_text: str = "", +): + method_name = "Merge sort" + step_count = 0 + start_time = datetime.now() + ordered_list = copy.deepcopy(random_list) + chunk_size = 1 + + random_count = len(random_list) + + while chunk_size <= random_count: + cache_random_list = copy.deepcopy(ordered_list) + ordered_list = [] + + chunk_indexes = [i for i in range(0, random_count, chunk_size)] + [random_count] + step_count += len(chunk_indexes) + chunk_pos = list(zip(chunk_indexes[:-1], chunk_indexes[1:])) + chunks = [cache_random_list[i[0] : i[1]] for i in chunk_pos] + step_count += len(chunks) + + for chunk_index, a_chunk in enumerate(chunks[:-1]): + if chunk_index % 2 == 0: + temp = get_combined_chunk(a_chunk, chunks[chunk_index + 1]) + ordered_list.extend(temp["chunk_ordered_list"]) + step_count += temp["step_count"] + + leftover_random_numbers = cache_random_list[ + len(ordered_list) : len(cache_random_list) + ] + ordered_list.extend(leftover_random_numbers) + + chunk_size = chunk_size * 2 + + if debug: + print("current_chunk_size =", chunk_size) + print("\tcache_random_list", cache_random_list) + print("\tchunk_indexes", chunk_indexes) + print("\tchunk_pos", chunk_pos) + print("\tchunks", chunks) + print("\t\t`ordered_list`, next `cache_random_list`", ordered_list) + + time_taken_to_sort = round((datetime.now() - start_time).total_seconds(), 4) + sort_state = check_order(ordered_list)["random_bool"] is False + matches_known_solution = ordered_list == known_solution_unique_random_list + print_sort_results( + method_name=method_name, + time_taken_to_sort=time_taken_to_sort, + step_count=step_count, + sort_state=sort_state, + matches_known_solution=matches_known_solution, + help_text=help_text, + ) From 361b97ba3c626557089f0e80ceaa05365eb9033d Mon Sep 17 00:00:00 2001 From: Ambar Date: Sat, 8 Feb 2020 10:23:46 +0100 Subject: [PATCH 09/20] change print to logging statements --- app_scripts/print_scripts.py | 8 +++++--- sort_algorithm_runner.py | 8 ++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/app_scripts/print_scripts.py b/app_scripts/print_scripts.py index 42b705d..3db4e4d 100644 --- a/app_scripts/print_scripts.py +++ b/app_scripts/print_scripts.py @@ -1,6 +1,8 @@ +import logging + def print_sort_progress(lowest_number: int, step_count: int, debug: bool): if debug: - print( + logging.info( f"Lowest number in this round = {lowest_number}. Step_count = {step_count}" ) @@ -16,8 +18,8 @@ def print_sort_results( result_f_string = f"{method_name} {help_text} took {time_taken_to_sort} seconds to order in {step_count} steps. Check: Sort status = {sort_state}." if matches_known_solution is not None: - print( + logging.info( f"{result_f_string} Accurate (against known solution: {matches_known_solution})" ) else: - print(result_f_string) + logging.info(result_f_string) diff --git a/sort_algorithm_runner.py b/sort_algorithm_runner.py index d0f2a5d..ed544ee 100644 --- a/sort_algorithm_runner.py +++ b/sort_algorithm_runner.py @@ -1,3 +1,5 @@ +import logging + from app_scripts.create_check_random_number_list import generate_list from sorters.bubble_sort import bubble_sort as bu_s from sorters.merge_sort import merge_sort as ms @@ -8,6 +10,12 @@ count = None # count = 500 +logging.basicConfig( + # filename=f"{my_dir}/sort_runner.log", + level=logging.INFO, + format="%(asctime)s %(levelname)s %(message)s", +) + if debug: unique_random_list = generate_list(max_number=20, count=6, uniqued_list=True) duplicate_allowed_random_list = generate_list( From 0e2a336ecf98f1e82701a5176a90079770c4638b Mon Sep 17 00:00:00 2001 From: Ambar Date: Sat, 8 Feb 2020 10:24:23 +0100 Subject: [PATCH 10/20] doctest for all sorters --- sorters/bubble_sort.py | 19 +++++++++++++++++++ sorters/merge_sort.py | 36 +++++++++++++++++++++++++++++++++++- sorters/selection_sort_1.py | 16 ++++++++++++++++ sorters/selection_sort_2.py | 15 +++++++++++++++ 4 files changed, 85 insertions(+), 1 deletion(-) diff --git a/sorters/bubble_sort.py b/sorters/bubble_sort.py index 6fcc98f..83c7931 100644 --- a/sorters/bubble_sort.py +++ b/sorters/bubble_sort.py @@ -15,6 +15,23 @@ def bubble_sort( debug: bool, help_text: str = "", ): + """ + + :param random_list: + :param known_solution_unique_random_list: + :param debug: + :param help_text: + :return: + + Doctest + + >>> bubble_sort([5,4,3,2,1],[1,2,3,4,5], debug=False) + [1, 2, 3, 4, 5] + + >>> bubble_sort([3,3,2,1,2,3], [1,2,2,3,3,3], debug=False) + [1, 2, 2, 3, 3, 3] + + """ method_name = "Bubble sort" step_count = 0 start_time = datetime.now() @@ -59,3 +76,5 @@ def bubble_sort( matches_known_solution=matches_known_solution, help_text=help_text, ) + + return ordered_list diff --git a/sorters/merge_sort.py b/sorters/merge_sort.py index aa6805c..c56bd56 100644 --- a/sorters/merge_sort.py +++ b/sorters/merge_sort.py @@ -6,10 +6,25 @@ # Method 3: Merge sort (as done in youtube linked video in Readme) https://en.wikipedia.org/wiki/Merge_sort -# Settings: duplicate/unique agnostic (I think) +# Settings: duplicate/unique agnostic def get_combined_chunk(chunk_a: list, chunk_b: list) -> dict: + """ + + :param chunk_a: + :param chunk_b: + :return: + + Doctest + + >>> get_combined_chunk(chunk_a = [2], chunk_b = [1]) + {'chunk_ordered_list': [1, 2], 'step_count': 1} + + >>> get_combined_chunk(chunk_a = [2,4,5,8], chunk_b = [3,4,7]) + {'chunk_ordered_list': [2, 3, 4, 4, 5, 7, 8], 'step_count': 6} + + """ chunk_ordered_list = [] step_count = 0 @@ -39,6 +54,23 @@ def merge_sort( debug: bool, help_text: str = "", ): + """ + + :param random_list: + :param known_solution_unique_random_list: + :param debug: + :param help_text: + :return: + + Doctest + + >>> merge_sort([5,4,3,2,1],[1,2,3,4,5], debug=False) + [1, 2, 3, 4, 5] + + >>> merge_sort([3,3,2,1,2,3], [1,2,2,3,3,3], debug=False) + [1, 2, 2, 3, 3, 3] + + """ method_name = "Merge sort" step_count = 0 start_time = datetime.now() @@ -89,3 +121,5 @@ def merge_sort( matches_known_solution=matches_known_solution, help_text=help_text, ) + + return ordered_list diff --git a/sorters/selection_sort_1.py b/sorters/selection_sort_1.py index a1858ab..9e96582 100644 --- a/sorters/selection_sort_1.py +++ b/sorters/selection_sort_1.py @@ -10,6 +10,22 @@ def selection_sort(random_list: list, debug: bool, help_text: str = "") -> list: + """ + + :param random_list: + :param debug: + :param help_text: + :return: + + Doctest + + >>> selection_sort([5,4,3,2,1], debug=False) + [1, 2, 3, 4, 5] + + >>> selection_sort([3,3,2,1,2,3], debug=False) + [1, 2, 2, 3, 3, 3] + + """ method_name = "Selection sort 1.0" step_count = 0 start_time = datetime.now() diff --git a/sorters/selection_sort_2.py b/sorters/selection_sort_2.py index f00bbb1..c9003ac 100644 --- a/sorters/selection_sort_2.py +++ b/sorters/selection_sort_2.py @@ -10,6 +10,21 @@ def selection_sort(random_list: list, debug: bool, help_text: str = "") -> list: + """ + + :param random_list: + :param debug: + :param help_text: + :return: + + Doctest + + >>> selection_sort([5,4,3,2,1], debug=False) + [1, 2, 3, 4, 5] + + >>> selection_sort([3,3,2,1,2,3], debug=False) + [1, 2, 2, 3, 3, 3] + """ method_name = "Selection sort 1.1" step_count = 0 start_time = datetime.now() From 6470f997589508645801e601ac83a0d1dc998124 Mon Sep 17 00:00:00 2001 From: Ambar Date: Sat, 8 Feb 2020 10:31:29 +0100 Subject: [PATCH 11/20] github actions to run doctest on each commit --- .github/workflows/pythonapp.yml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .github/workflows/pythonapp.yml diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml new file mode 100644 index 0000000..a10fa16 --- /dev/null +++ b/.github/workflows/pythonapp.yml @@ -0,0 +1,29 @@ +name: Python application + +on: [push] + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v1 + with: + python-version: 3.8 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Lint with flake8 + run: | + pip install flake8 + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with doctest + run: | + python -m doctest -v app_scripts/*.py sorters/*.py \ No newline at end of file From 360babf1535b58f5f9bccbec2f019b3621e8cfdd Mon Sep 17 00:00:00 2001 From: Ambar Das Date: Sun, 23 Feb 2020 23:12:17 +0100 Subject: [PATCH 12/20] make multiple runs, create csv, query native sorter --- .gitignore | 4 +- .../create_check_random_number_list.py | 10 ++-- app_scripts/print_scripts.py | 40 ++++++++++++-- sort_algorithm_runner.py | 52 ++++++++++++++++++ sorters/bubble_sort.py | 10 ++-- sorters/merge_sort.py | 7 +-- sorters/native_sort.py | 55 +++++++++++++++++++ sorters/selection_sort_1.py | 5 +- sorters/selection_sort_2.py | 5 +- 9 files changed, 166 insertions(+), 22 deletions(-) create mode 100644 sorters/native_sort.py diff --git a/.gitignore b/.gitignore index c0d9e83..6682489 100644 --- a/.gitignore +++ b/.gitignore @@ -130,4 +130,6 @@ dmypy.json # Pycharm files .idea -.venv \ No newline at end of file +.venv + +*.csv \ No newline at end of file diff --git a/app_scripts/create_check_random_number_list.py b/app_scripts/create_check_random_number_list.py index b258bfe..3f4981d 100644 --- a/app_scripts/create_check_random_number_list.py +++ b/app_scripts/create_check_random_number_list.py @@ -2,10 +2,10 @@ def generate_list( - min_number: int = 1, - max_number: int = 1000000, - count: int = 1000, - uniqued_list: bool = True, + min_number: int = 1, + max_number: int = 1000000, + count: int = 1000, + uniqued_list: bool = True, ) -> list: """ This function will create a list of random numbers. @@ -19,7 +19,7 @@ def generate_list( :return: list of size `count` of random numbers in random order """ - if count == None: + if count is None: count = 1000 random_numbers_list = [] diff --git a/app_scripts/print_scripts.py b/app_scripts/print_scripts.py index 3db4e4d..5c574cc 100644 --- a/app_scripts/print_scripts.py +++ b/app_scripts/print_scripts.py @@ -1,4 +1,8 @@ +import csv import logging +import uuid +from pathlib import Path + def print_sort_progress(lowest_number: int, step_count: int, debug: bool): if debug: @@ -8,12 +12,13 @@ def print_sort_progress(lowest_number: int, step_count: int, debug: bool): def print_sort_results( - method_name: str, - time_taken_to_sort: float, - step_count: int, - sort_state: bool, - matches_known_solution: bool = None, - help_text: str = "", + method_name: str, + time_taken_to_sort: float, + step_count: int, + sort_state: bool, + matches_known_solution: bool = None, + help_text: str = "", + create_csv: bool = False, ): result_f_string = f"{method_name} {help_text} took {time_taken_to_sort} seconds to order in {step_count} steps. Check: Sort status = {sort_state}." @@ -23,3 +28,26 @@ def print_sort_results( ) else: logging.info(result_f_string) + + if create_csv: + + temp = { + "run_id": uuid.uuid4(), + "method_name": method_name, + "time_taken": time_taken_to_sort, + "step_count": step_count, + "sort_state": sort_state, + } + + if Path().joinpath("perf.csv").exists(): + + with open("perf.csv", "a") as csv_file: + csv_writer = csv.writer(csv_file) + csv_writer.writerow(list(temp.values())) + + else: + + with open("perf.csv", "w") as csv_file: + csv_writer = csv.writer(csv_file) + csv_writer.writerow(list(temp.keys())) + csv_writer.writerow(list(temp.values())) diff --git a/sort_algorithm_runner.py b/sort_algorithm_runner.py index ed544ee..13279d3 100644 --- a/sort_algorithm_runner.py +++ b/sort_algorithm_runner.py @@ -5,6 +5,7 @@ from sorters.merge_sort import merge_sort as ms from sorters.selection_sort_1 import selection_sort as ss1 from sorters.selection_sort_2 import selection_sort as ss2 +from sorters.native_sort import native_sort as ns debug = False count = None @@ -60,6 +61,57 @@ help_text="for non-unique numbers", ) +ns( + unique_random_list, + known_solution_unique_random_list, + debug=debug, + help_text="for unique numbers", +) + +ns( + duplicate_allowed_random_list, + known_solution_duplicate_allowed_random_list, + debug=debug, + help_text="for non-unique numbers", +) + + +def create_graph(): + + for i in range(100): + + known_solution_unique_random_list = ss1( + unique_random_list, debug=debug, help_text="for unique numbers" + ) + + bu_s( + unique_random_list, + known_solution_unique_random_list, + debug=debug, + help_text="for unique numbers", + create_csv=True, + ) + + ms( + unique_random_list, + known_solution_unique_random_list, + debug=debug, + help_text="for unique numbers", + create_csv=True, + ) + + ns( + unique_random_list, + known_solution_unique_random_list, + debug=debug, + help_text="for unique numbers", + create_csv=True, + ) + + +create_graph() + + # TODO: Quick Sort https://en.wikipedia.org/wiki/Quicksort # TODO: Heap Sort https://en.wikipedia.org/wiki/Heapsort # TODO: Bucket Sort (this is the method that I used to sort cards when I need to sort under distraction. Takes longer because more steps required.) https://en.wikipedia.org/wiki/Bucket_sort diff --git a/sorters/bubble_sort.py b/sorters/bubble_sort.py index 83c7931..8a3ec76 100644 --- a/sorters/bubble_sort.py +++ b/sorters/bubble_sort.py @@ -10,10 +10,11 @@ def bubble_sort( - random_list: list, - known_solution_unique_random_list: list, - debug: bool, - help_text: str = "", + random_list: list, + known_solution_unique_random_list: list, + debug: bool, + help_text: str = "", + create_csv: bool = False, ): """ @@ -75,6 +76,7 @@ def bubble_sort( sort_state=sort_state, matches_known_solution=matches_known_solution, help_text=help_text, + create_csv=create_csv, ) return ordered_list diff --git a/sorters/merge_sort.py b/sorters/merge_sort.py index c56bd56..2bf367f 100644 --- a/sorters/merge_sort.py +++ b/sorters/merge_sort.py @@ -42,10 +42,7 @@ def get_combined_chunk(chunk_a: list, chunk_b: list) -> dict: chunk_ordered_list.extend(chunk_a) chunk_ordered_list.extend(chunk_b) - return { - "chunk_ordered_list": chunk_ordered_list, - "step_count": step_count, - } + return {"chunk_ordered_list": chunk_ordered_list, "step_count": step_count} def merge_sort( @@ -53,6 +50,7 @@ def merge_sort( known_solution_unique_random_list: list, debug: bool, help_text: str = "", + create_csv: bool = False, ): """ @@ -120,6 +118,7 @@ def merge_sort( sort_state=sort_state, matches_known_solution=matches_known_solution, help_text=help_text, + create_csv=create_csv, ) return ordered_list diff --git a/sorters/native_sort.py b/sorters/native_sort.py new file mode 100644 index 0000000..c125baa --- /dev/null +++ b/sorters/native_sort.py @@ -0,0 +1,55 @@ +from datetime import datetime + +from app_scripts.create_check_random_number_list import check_order +from app_scripts.print_scripts import print_sort_results + + +# Method 4: Native sort (as done by pythons sorted method) +# Settings: duplicate/unique agnostic + + +def native_sort( + random_list: list, + known_solution_unique_random_list: list, + debug: bool, + help_text: str = "", + create_csv: bool = False, +): + """ + + :param random_list: + :param known_solution_unique_random_list: + :param debug: + :param help_text: + :return: + + Doctest + + >>> bubble_sort([5,4,3,2,1],[1,2,3,4,5], debug=False) + [1, 2, 3, 4, 5] + + >>> bubble_sort([3,3,2,1,2,3], [1,2,2,3,3,3], debug=False) + [1, 2, 2, 3, 3, 3] + + """ + method_name = "Native sort" + step_count = 1 + start_time = datetime.now() + ordered_list = sorted(random_list) + if debug: + print("\t\t\t\tordered_list", ordered_list) + + time_taken_to_sort = round((datetime.now() - start_time).total_seconds(), 4) + sort_state = check_order(ordered_list)["random_bool"] is False + matches_known_solution = ordered_list == known_solution_unique_random_list + print_sort_results( + method_name=method_name, + time_taken_to_sort=time_taken_to_sort, + step_count=step_count, + sort_state=sort_state, + matches_known_solution=matches_known_solution, + help_text=help_text, + create_csv=create_csv, + ) + + return ordered_list diff --git a/sorters/selection_sort_1.py b/sorters/selection_sort_1.py index 9e96582..b770104 100644 --- a/sorters/selection_sort_1.py +++ b/sorters/selection_sort_1.py @@ -9,7 +9,9 @@ # Settings: only unique numbers -def selection_sort(random_list: list, debug: bool, help_text: str = "") -> list: +def selection_sort( + random_list: list, debug: bool, help_text: str = "", create_csv: bool = False +) -> list: """ :param random_list: @@ -56,6 +58,7 @@ def selection_sort(random_list: list, debug: bool, help_text: str = "") -> list: step_count=step_count, sort_state=sort_state, help_text=help_text, + create_csv=create_csv, ) return known_solution_unique_random_list diff --git a/sorters/selection_sort_2.py b/sorters/selection_sort_2.py index c9003ac..891f76f 100644 --- a/sorters/selection_sort_2.py +++ b/sorters/selection_sort_2.py @@ -9,7 +9,9 @@ # Settings: duplicate numbers allowed -def selection_sort(random_list: list, debug: bool, help_text: str = "") -> list: +def selection_sort( + random_list: list, debug: bool, help_text: str = "", create_csv: bool = False +) -> list: """ :param random_list: @@ -67,6 +69,7 @@ def selection_sort(random_list: list, debug: bool, help_text: str = "") -> list: step_count=step_count, sort_state=sort_state, help_text=help_text, + create_csv=create_csv, ) return known_solution_duplicate_allowed_random_list From 0eb0282eddbc700161c387cc481825dd40583fba Mon Sep 17 00:00:00 2001 From: Ambar Das Date: Sun, 23 Feb 2020 23:16:10 +0100 Subject: [PATCH 13/20] fix doctest for native sorting --- sorters/native_sort.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sorters/native_sort.py b/sorters/native_sort.py index c125baa..5cab60b 100644 --- a/sorters/native_sort.py +++ b/sorters/native_sort.py @@ -25,10 +25,10 @@ def native_sort( Doctest - >>> bubble_sort([5,4,3,2,1],[1,2,3,4,5], debug=False) + >>> native_sort([5,4,3,2,1],[1,2,3,4,5], debug=False) [1, 2, 3, 4, 5] - >>> bubble_sort([3,3,2,1,2,3], [1,2,2,3,3,3], debug=False) + >>> native_sort([3,3,2,1,2,3], [1,2,2,3,3,3], debug=False) [1, 2, 2, 3, 3, 3] """ From 0511e7107e976da8a9eb43986331bc198446f5d4 Mon Sep 17 00:00:00 2001 From: Ambar Date: Tue, 25 Feb 2020 18:01:50 +0100 Subject: [PATCH 14/20] commented out methods for visualizations bug fix for updating random numbers between runs --- sort_algorithm_runner.py | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/sort_algorithm_runner.py b/sort_algorithm_runner.py index 13279d3..3814320 100644 --- a/sort_algorithm_runner.py +++ b/sort_algorithm_runner.py @@ -3,9 +3,9 @@ from app_scripts.create_check_random_number_list import generate_list from sorters.bubble_sort import bubble_sort as bu_s from sorters.merge_sort import merge_sort as ms +from sorters.native_sort import native_sort as ns from sorters.selection_sort_1 import selection_sort as ss1 from sorters.selection_sort_2 import selection_sort as ss2 -from sorters.native_sort import native_sort as ns debug = False count = None @@ -79,11 +79,10 @@ def create_graph(): for i in range(100): - + unique_random_list = generate_list(count=count, uniqued_list=True) known_solution_unique_random_list = ss1( - unique_random_list, debug=debug, help_text="for unique numbers" + unique_random_list, debug=debug, help_text="for unique numbers", create_csv=True ) - bu_s( unique_random_list, known_solution_unique_random_list, @@ -91,7 +90,6 @@ def create_graph(): help_text="for unique numbers", create_csv=True, ) - ms( unique_random_list, known_solution_unique_random_list, @@ -99,7 +97,6 @@ def create_graph(): help_text="for unique numbers", create_csv=True, ) - ns( unique_random_list, known_solution_unique_random_list, @@ -108,9 +105,34 @@ def create_graph(): create_csv=True, ) - create_graph() +## Experimental +# import numpy as np +# import pandas as pd +# import matplotlib.pyplot as plt +# all_data_df = pd.read_csv("perf.csv") +# perf_df = all_data_df.loc[:, ["method_name", "time_taken"]] +# steps_df = all_data_df.loc[:, ["method_name", "step_count"]] +# +# perf_mer = perf_df[perf_df["method_name"] == "Merge sort"].loc[:,"time_taken"].to_numpy() +# perf_bub = perf_df[perf_df["method_name"] == "Bubble sort"].loc[:,"time_taken"].to_numpy() +# perf_nat = perf_df[perf_df["method_name"] == "Native sort"].loc[:,"time_taken"].to_numpy() +# perf_sel = perf_df[perf_df["method_name"] == "Selection sort 1.0"].loc[:,"time_taken"].to_numpy() +# plt.title("Box plot of performance") +# plt.boxplot((perf_mer, perf_bub, perf_nat, perf_sel)) +# plt.show() +# +# steps_mer = steps_df[steps_df["method_name"] == "Merge sort"].loc[:,"step_count"].to_numpy() +# steps_bub = steps_df[steps_df["method_name"] == "Bubble sort"].loc[:,"step_count"].to_numpy() +# steps_nat = steps_df[steps_df["method_name"] == "Native sort"].loc[:,"step_count"].to_numpy() +# steps_sel = steps_df[steps_df["method_name"] == "Selection sort 1.0"].loc[:,"step_count"].to_numpy() +# plt.title("Box plot of step_counts") +# plt.boxplot((steps_mer, steps_bub, steps_nat, steps_sel)) +# plt.show() + + + # TODO: Quick Sort https://en.wikipedia.org/wiki/Quicksort # TODO: Heap Sort https://en.wikipedia.org/wiki/Heapsort From 5fbd83d807a91b2226ea3204caf396144278f716 Mon Sep 17 00:00:00 2001 From: Ambar Date: Wed, 26 Feb 2020 17:43:06 +0100 Subject: [PATCH 15/20] initial commit for quick sort --- sorters/quick_sort.py | 192 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 sorters/quick_sort.py diff --git a/sorters/quick_sort.py b/sorters/quick_sort.py new file mode 100644 index 0000000..f298dff --- /dev/null +++ b/sorters/quick_sort.py @@ -0,0 +1,192 @@ +import copy + + +# from app_scripts.create_check_random_number_list import check_order +# from app_scripts.print_scripts import print_sort_results + + +# Method 5: Quick sort https://en.wikipedia.org/wiki/Quicksort +# Settings: duplicate/unique agnostic + + +def find_pivot(random_list: list) -> dict: + """ + + :param random_list: + :return: + + Doctest + + >>> find_pivot([7,3,5,12,18,9,2]) + {'step_count': 2, 'pivot': 7, 'pivot_index': 0} + + >>> find_pivot([7,3,5,18,9,2]) + {'step_count': 2, 'pivot': 7, 'pivot_index': 0} + + >>> find_pivot([2,3,18,7,9,5]) + {'step_count': 3, 'pivot': 7, 'pivot_index': 3} + """ + + indexes = { + "begin": 0, + "middle": int(len(random_list) / 2), + "pre_middle": int(len(random_list) / 2) - 1, + "end": len(random_list) - 1, + } + + vals_to_chose_from = { + "begin": random_list[indexes["begin"]], + "middle": random_list[indexes["middle"]], + "end": random_list[indexes["end"]], + } + if not len(random_list) % 2: + vals_to_chose_from["pre_middle"] = random_list[indexes["pre_middle"]] + + step_count = 1 + for pivot_index_key, pivot in vals_to_chose_from.items(): + step_count += 1 + if pivot != min(list(vals_to_chose_from.values())) and pivot != max(list(vals_to_chose_from.values())): + return { + "step_count": step_count, + "pivot": pivot, + "pivot_index": indexes[pivot_index_key] + } + + +def sort_wrt_pivot(random_list: list, pivot: int) -> dict: + """ + + :param random_list: + :param pivot: + :return: + + Doctest + + >>> sort_wrt_pivot([7,3,5,12,18,9,2], 8) + {'step_count': 23, 'less_random_list': [[7, 3, 5, 2], [8], [18, 9, 12]]} + + >>> sort_wrt_pivot([12,18,9,2,7,3,5], 8) + {'step_count': 39, 'less_random_list': [[5, 3, 7, 2], [8], [9, 18, 12]]} + + """ + step_count = 0 + cache_random_list = copy.deepcopy(random_list) + + while True: + + for val_ind, val in enumerate(cache_random_list): + step_count += 1 + if val > pivot: + item_from_left = val + item_from_left_ind = val_ind + break + + for val_ind, val in enumerate(cache_random_list): + step_count += 1 + if val < pivot: + item_from_right = val + item_from_right_ind = val_ind + + if item_from_right_ind < item_from_left_ind: + return { + "step_count": step_count, + "less_random_list": [ + cache_random_list[:item_from_left_ind], + [pivot], + cache_random_list[item_from_left_ind:] + ], + } + + else: + cache_random_list[item_from_right_ind] = item_from_left + cache_random_list[item_from_left_ind] = item_from_right + + +def recursive_sorter(list_of_random_lists: list, step_count: int = 0) -> dict: + """ + + :param list_of_random_lists: + :param step_count: + :return: + """ + + new_list_of_random_lists = [] + sorting_action_carried_out = False + + for random_list in list_of_random_lists: + + if len(random_list) < 3: + new_list_of_random_lists.append(random_list) + else: + sorting_action_carried_out = True + pivot_data = find_pivot(random_list) + step_count += pivot_data["step_count"] + pivot = pivot_data["pivot"] + pivot_index = pivot_data["pivot_index"] + + new_random_list = copy.deepcopy(random_list) + del new_random_list[pivot_index] + + partition_data = sort_wrt_pivot(new_random_list, pivot) + step_count += partition_data["step_count"] + new_list_of_random_lists.extend(partition_data["less_random_list"]) + + if sorting_action_carried_out: + recursive_sorter(new_list_of_random_lists, step_count) + else: + ordered_list = [] + [ordered_list.extend(i) for i in new_list_of_random_lists] + return { + "ordered_list": ordered_list, + "step_count": step_count + } + +# def quick_sort( +# random_list: list, +# known_solution_unique_random_list: list, +# debug: bool, +# help_text: str = "", +# create_csv: bool = False, +# ): +# """ +# +# :param random_list: +# :param known_solution_unique_random_list: +# :param debug: +# :param help_text: +# :return: +# +# Doctest +# +# >>> quick_sort([5,4,3,2,1],[1,2,3,4,5], debug=False) +# [1, 2, 3, 4, 5] +# +# >>> quick_sort([3,3,2,1,2,3], [1,2,2,3,3,3], debug=False) +# [1, 2, 2, 3, 3, 3] +# +# """ +# method_name = "Quick sort" +# step_count = 0 +# start_time = datetime.now() +# +# ordered_list_dict = recursive_sorter([random_list]) +# step_count = ordered_list_dict["step_count"] +# ordered_list = ordered_list_dict["ordered_list"] +# +# if debug: +# print("\t\t\t\tordered_list", ordered_list) +# +# time_taken_to_sort = round((datetime.now() - start_time).total_seconds(), 4) +# sort_state = check_order(ordered_list)["random_bool"] is False +# matches_known_solution = ordered_list == known_solution_unique_random_list +# print_sort_results( +# method_name=method_name, +# time_taken_to_sort=time_taken_to_sort, +# step_count=step_count, +# sort_state=sort_state, +# matches_known_solution=matches_known_solution, +# help_text=help_text, +# create_csv=create_csv, +# ) +# +# return ordered_list From 24b91664765222ccb7e0921b74de0a13f2bbf024 Mon Sep 17 00:00:00 2001 From: Ambar Date: Wed, 26 Feb 2020 20:14:59 +0100 Subject: [PATCH 16/20] working version of quick sort --- sort_algorithm_runner.py | 29 +++++- sorters/quick_sort.py | 187 +++++++++++++++++++++++++-------------- 2 files changed, 147 insertions(+), 69 deletions(-) diff --git a/sort_algorithm_runner.py b/sort_algorithm_runner.py index 3814320..3ea716f 100644 --- a/sort_algorithm_runner.py +++ b/sort_algorithm_runner.py @@ -4,6 +4,7 @@ from sorters.bubble_sort import bubble_sort as bu_s from sorters.merge_sort import merge_sort as ms from sorters.native_sort import native_sort as ns +from sorters.quick_sort import quick_sort as qs from sorters.selection_sort_1 import selection_sort as ss1 from sorters.selection_sort_2 import selection_sort as ss2 @@ -75,6 +76,20 @@ help_text="for non-unique numbers", ) +qs( + unique_random_list, + known_solution_unique_random_list, + debug=debug, + help_text="for unique numbers", +) + +qs( + duplicate_allowed_random_list, + known_solution_duplicate_allowed_random_list, + debug=debug, + help_text="for non-unique numbers", +) + def create_graph(): @@ -104,6 +119,13 @@ def create_graph(): help_text="for unique numbers", create_csv=True, ) + qs( + unique_random_list, + known_solution_unique_random_list, + debug=debug, + help_text="for unique numbers", + create_csv=True, + ) create_graph() @@ -116,24 +138,25 @@ def create_graph(): # steps_df = all_data_df.loc[:, ["method_name", "step_count"]] # # perf_mer = perf_df[perf_df["method_name"] == "Merge sort"].loc[:,"time_taken"].to_numpy() +# perf_quk = perf_df[perf_df["method_name"] == "Quick sort"].loc[:,"time_taken"].to_numpy() # perf_bub = perf_df[perf_df["method_name"] == "Bubble sort"].loc[:,"time_taken"].to_numpy() # perf_nat = perf_df[perf_df["method_name"] == "Native sort"].loc[:,"time_taken"].to_numpy() # perf_sel = perf_df[perf_df["method_name"] == "Selection sort 1.0"].loc[:,"time_taken"].to_numpy() # plt.title("Box plot of performance") -# plt.boxplot((perf_mer, perf_bub, perf_nat, perf_sel)) +# plt.boxplot((perf_mer, perf_bub, perf_nat, perf_sel, perf_quk)) # plt.show() # # steps_mer = steps_df[steps_df["method_name"] == "Merge sort"].loc[:,"step_count"].to_numpy() +# steps_quk = steps_df[steps_df["method_name"] == "Quick sort"].loc[:,"step_count"].to_numpy() # steps_bub = steps_df[steps_df["method_name"] == "Bubble sort"].loc[:,"step_count"].to_numpy() # steps_nat = steps_df[steps_df["method_name"] == "Native sort"].loc[:,"step_count"].to_numpy() # steps_sel = steps_df[steps_df["method_name"] == "Selection sort 1.0"].loc[:,"step_count"].to_numpy() # plt.title("Box plot of step_counts") -# plt.boxplot((steps_mer, steps_bub, steps_nat, steps_sel)) +# plt.boxplot((steps_mer, steps_bub, steps_nat, steps_sel, steps_quk)) # plt.show() -# TODO: Quick Sort https://en.wikipedia.org/wiki/Quicksort # TODO: Heap Sort https://en.wikipedia.org/wiki/Heapsort # TODO: Bucket Sort (this is the method that I used to sort cards when I need to sort under distraction. Takes longer because more steps required.) https://en.wikipedia.org/wiki/Bucket_sort diff --git a/sorters/quick_sort.py b/sorters/quick_sort.py index f298dff..a6ccfd9 100644 --- a/sorters/quick_sort.py +++ b/sorters/quick_sort.py @@ -1,8 +1,8 @@ import copy +from datetime import datetime - -# from app_scripts.create_check_random_number_list import check_order -# from app_scripts.print_scripts import print_sort_results +from app_scripts.create_check_random_number_list import check_order +from app_scripts.print_scripts import print_sort_results # Method 5: Quick sort https://en.wikipedia.org/wiki/Quicksort @@ -25,6 +25,9 @@ def find_pivot(random_list: list) -> dict: >>> find_pivot([2,3,18,7,9,5]) {'step_count': 3, 'pivot': 7, 'pivot_index': 3} + + >>> find_pivot([14,8,15,15,18,14]) + {'step_count': 9, 'pivot': 15, 'pivot_index': 2} """ indexes = { @@ -42,18 +45,37 @@ def find_pivot(random_list: list) -> dict: if not len(random_list) % 2: vals_to_chose_from["pre_middle"] = random_list[indexes["pre_middle"]] + temp = { + "step_count": 1, + "pivot": None, + "pivot_index": None + } + step_count = 1 for pivot_index_key, pivot in vals_to_chose_from.items(): step_count += 1 if pivot != min(list(vals_to_chose_from.values())) and pivot != max(list(vals_to_chose_from.values())): - return { + temp = { "step_count": step_count, "pivot": pivot, "pivot_index": indexes[pivot_index_key] } + return temp + + if None in list(temp.values()): + max_val = max(list(vals_to_chose_from.values())) + for i in list(vals_to_chose_from.keys()): + step_count += 1 + if vals_to_chose_from[i] == max_val: + max_val_index = indexes[i] + return { + "step_count": step_count, + "pivot": max_val, + "pivot_index": max_val_index + } -def sort_wrt_pivot(random_list: list, pivot: int) -> dict: +def sort_wrt_pivot(random_list: list, pivot: int, debug: bool = False) -> dict: """ :param random_list: @@ -68,6 +90,8 @@ def sort_wrt_pivot(random_list: list, pivot: int) -> dict: >>> sort_wrt_pivot([12,18,9,2,7,3,5], 8) {'step_count': 39, 'less_random_list': [[5, 3, 7, 2], [8], [9, 18, 12]]} + >>> sort_wrt_pivot([4,4,2], 4) + {'step_count': 9, 'less_random_list': [[2], [4], [4, 4]]} """ step_count = 0 cache_random_list = copy.deepcopy(random_list) @@ -76,18 +100,24 @@ def sort_wrt_pivot(random_list: list, pivot: int) -> dict: for val_ind, val in enumerate(cache_random_list): step_count += 1 - if val > pivot: + if val >= pivot: item_from_left = val item_from_left_ind = val_ind break for val_ind, val in enumerate(cache_random_list): step_count += 1 - if val < pivot: + if val <= pivot: item_from_right = val item_from_right_ind = val_ind - if item_from_right_ind < item_from_left_ind: + if debug: + print("item_from_left", item_from_left) + print("item_from_left_ind", item_from_left_ind) + print("item_from_right", item_from_right) + print("item_from_right_ind", item_from_right_ind) + + if item_from_right_ind <= item_from_left_ind or item_from_right == item_from_left: return { "step_count": step_count, "less_random_list": [ @@ -102,20 +132,36 @@ def sort_wrt_pivot(random_list: list, pivot: int) -> dict: cache_random_list[item_from_left_ind] = item_from_right -def recursive_sorter(list_of_random_lists: list, step_count: int = 0) -> dict: +def recursive_sorter(list_of_random_lists: list, step_count: int = 0, debug: bool = False) -> dict: """ :param list_of_random_lists: :param step_count: :return: + + Doctest + + >>> recursive_sorter([[7,3,5,12,18,9,2]]) + {'ordered_list_of_lists': [[2], [3], [5], [7], [9], [12], [18]], 'step_count': 41} """ new_list_of_random_lists = [] sorting_action_carried_out = False + if debug and not step_count: + print("Beginning of Quick sort.") + print("Random list: ", list_of_random_lists) + elif debug and step_count: + print("\tRandom list: ", list_of_random_lists) + for random_list in list_of_random_lists: if len(random_list) < 3: + try: + if random_list[0] > random_list[1]: + random_list.reverse() + except IndexError: + pass new_list_of_random_lists.append(random_list) else: sorting_action_carried_out = True @@ -124,69 +170,78 @@ def recursive_sorter(list_of_random_lists: list, step_count: int = 0) -> dict: pivot = pivot_data["pivot"] pivot_index = pivot_data["pivot_index"] + if debug: + print("\t\t pivot", pivot) + new_random_list = copy.deepcopy(random_list) del new_random_list[pivot_index] - partition_data = sort_wrt_pivot(new_random_list, pivot) + partition_data = sort_wrt_pivot(new_random_list, pivot, debug) step_count += partition_data["step_count"] new_list_of_random_lists.extend(partition_data["less_random_list"]) + current_result = { + "ordered_list_of_lists": new_list_of_random_lists, + "step_count": step_count + } + if sorting_action_carried_out: - recursive_sorter(new_list_of_random_lists, step_count) + if debug: + print("\tnew_list_of_random_lists", new_list_of_random_lists) + return recursive_sorter(new_list_of_random_lists, step_count, debug) else: - ordered_list = [] - [ordered_list.extend(i) for i in new_list_of_random_lists] - return { - "ordered_list": ordered_list, - "step_count": step_count - } + return current_result + -# def quick_sort( -# random_list: list, -# known_solution_unique_random_list: list, -# debug: bool, -# help_text: str = "", -# create_csv: bool = False, -# ): -# """ -# -# :param random_list: -# :param known_solution_unique_random_list: -# :param debug: -# :param help_text: -# :return: -# -# Doctest -# -# >>> quick_sort([5,4,3,2,1],[1,2,3,4,5], debug=False) -# [1, 2, 3, 4, 5] -# -# >>> quick_sort([3,3,2,1,2,3], [1,2,2,3,3,3], debug=False) -# [1, 2, 2, 3, 3, 3] -# -# """ -# method_name = "Quick sort" -# step_count = 0 -# start_time = datetime.now() -# -# ordered_list_dict = recursive_sorter([random_list]) -# step_count = ordered_list_dict["step_count"] -# ordered_list = ordered_list_dict["ordered_list"] -# -# if debug: -# print("\t\t\t\tordered_list", ordered_list) -# -# time_taken_to_sort = round((datetime.now() - start_time).total_seconds(), 4) -# sort_state = check_order(ordered_list)["random_bool"] is False -# matches_known_solution = ordered_list == known_solution_unique_random_list -# print_sort_results( -# method_name=method_name, -# time_taken_to_sort=time_taken_to_sort, -# step_count=step_count, -# sort_state=sort_state, -# matches_known_solution=matches_known_solution, -# help_text=help_text, -# create_csv=create_csv, -# ) -# -# return ordered_list +def quick_sort( + random_list: list, + known_solution_unique_random_list: list, + debug: bool, + help_text: str = "", + create_csv: bool = False, +): + """ + + :param random_list: + :param known_solution_unique_random_list: + :param debug: + :param help_text: + :return: + + Doctest + + >>> quick_sort([5,4,3,2,1],[1,2,3,4,5], debug=False) + [1, 2, 3, 4, 5] + + >>> quick_sort([3,3,2,1,2,3], [1,2,2,3,3,3], debug=False) + [1, 2, 2, 3, 3, 3] + + """ + method_name = "Quick sort" + step_count = 0 + start_time = datetime.now() + ordered_list = [] + + ordered_list_dict = recursive_sorter([random_list], debug=debug) + step_count = ordered_list_dict["step_count"] + ordered_list_of_lists = ordered_list_dict["ordered_list_of_lists"] + for i in ordered_list_of_lists: + ordered_list.extend(i) + + if debug: + print("\t\t\t\tordered_list", ordered_list) + + time_taken_to_sort = round((datetime.now() - start_time).total_seconds(), 4) + sort_state = check_order(ordered_list)["random_bool"] is False + matches_known_solution = ordered_list == known_solution_unique_random_list + print_sort_results( + method_name=method_name, + time_taken_to_sort=time_taken_to_sort, + step_count=step_count, + sort_state=sort_state, + matches_known_solution=matches_known_solution, + help_text=help_text, + create_csv=create_csv, + ) + + return ordered_list From ee6d6e8a4334a94b0c6f52c68a2fea0230acc546 Mon Sep 17 00:00:00 2001 From: Ambar Date: Thu, 27 Feb 2020 18:33:37 +0100 Subject: [PATCH 17/20] visualization expperiments --- sort_algorithm_runner.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/sort_algorithm_runner.py b/sort_algorithm_runner.py index 3ea716f..16127f1 100644 --- a/sort_algorithm_runner.py +++ b/sort_algorithm_runner.py @@ -130,30 +130,37 @@ def create_graph(): create_graph() ## Experimental -# import numpy as np # import pandas as pd # import matplotlib.pyplot as plt # all_data_df = pd.read_csv("perf.csv") # perf_df = all_data_df.loc[:, ["method_name", "time_taken"]] # steps_df = all_data_df.loc[:, ["method_name", "step_count"]] # +# fig, (ax1, ax2) = plt.subplots(1, 2) +# # perf_mer = perf_df[perf_df["method_name"] == "Merge sort"].loc[:,"time_taken"].to_numpy() # perf_quk = perf_df[perf_df["method_name"] == "Quick sort"].loc[:,"time_taken"].to_numpy() # perf_bub = perf_df[perf_df["method_name"] == "Bubble sort"].loc[:,"time_taken"].to_numpy() # perf_nat = perf_df[perf_df["method_name"] == "Native sort"].loc[:,"time_taken"].to_numpy() # perf_sel = perf_df[perf_df["method_name"] == "Selection sort 1.0"].loc[:,"time_taken"].to_numpy() -# plt.title("Box plot of performance") -# plt.boxplot((perf_mer, perf_bub, perf_nat, perf_sel, perf_quk)) -# plt.show() +# ax1.set_title("Box plot of performance (lower is better)") +# ax1.set_xlabel("sorters") +# ax1.set_ylabel("time in seconds") +# ax1.boxplot((perf_mer, perf_bub, perf_nat, perf_sel, perf_quk)) +# ax1.legend(["perf_mer", "perf_bub", "perf_nat", "perf_sel", "perf_quk"]) # # steps_mer = steps_df[steps_df["method_name"] == "Merge sort"].loc[:,"step_count"].to_numpy() # steps_quk = steps_df[steps_df["method_name"] == "Quick sort"].loc[:,"step_count"].to_numpy() # steps_bub = steps_df[steps_df["method_name"] == "Bubble sort"].loc[:,"step_count"].to_numpy() # steps_nat = steps_df[steps_df["method_name"] == "Native sort"].loc[:,"step_count"].to_numpy() # steps_sel = steps_df[steps_df["method_name"] == "Selection sort 1.0"].loc[:,"step_count"].to_numpy() -# plt.title("Box plot of step_counts") -# plt.boxplot((steps_mer, steps_bub, steps_nat, steps_sel, steps_quk)) -# plt.show() +# ax2.set_title("Box plot of steps (lower is better)") +# ax2.set_xlabel("sorters") +# ax2.set_ylabel("count") +# ax2.boxplot((steps_mer, steps_bub, steps_nat, steps_sel, steps_quk)) +# ax2.legend(["steps_mer", "steps_bub", "steps_nat", "steps_sel", "steps_quk"]) +# +# fig.show() From 02262ca4dc77aa6e22f86230a254abb8e73a5fb4 Mon Sep 17 00:00:00 2001 From: Ambar Date: Mon, 2 Mar 2020 17:38:32 +0100 Subject: [PATCH 18/20] black formatting with allowed line length 99 --- .../create_check_random_number_list.py | 5 +-- app_scripts/print_scripts.py | 4 +-- sort_algorithm_runner.py | 7 ++-- sorters/merge_sort.py | 4 +-- sorters/quick_sort.py | 35 +++++++------------ 5 files changed, 18 insertions(+), 37 deletions(-) diff --git a/app_scripts/create_check_random_number_list.py b/app_scripts/create_check_random_number_list.py index 3f4981d..a9588ff 100644 --- a/app_scripts/create_check_random_number_list.py +++ b/app_scripts/create_check_random_number_list.py @@ -2,10 +2,7 @@ def generate_list( - min_number: int = 1, - max_number: int = 1000000, - count: int = 1000, - uniqued_list: bool = True, + min_number: int = 1, max_number: int = 1000000, count: int = 1000, uniqued_list: bool = True, ) -> list: """ This function will create a list of random numbers. diff --git a/app_scripts/print_scripts.py b/app_scripts/print_scripts.py index 5c574cc..2b9a1ca 100644 --- a/app_scripts/print_scripts.py +++ b/app_scripts/print_scripts.py @@ -6,9 +6,7 @@ def print_sort_progress(lowest_number: int, step_count: int, debug: bool): if debug: - logging.info( - f"Lowest number in this round = {lowest_number}. Step_count = {step_count}" - ) + logging.info(f"Lowest number in this round = {lowest_number}. Step_count = {step_count}") def print_sort_results( diff --git a/sort_algorithm_runner.py b/sort_algorithm_runner.py index 16127f1..972798c 100644 --- a/sort_algorithm_runner.py +++ b/sort_algorithm_runner.py @@ -20,9 +20,7 @@ if debug: unique_random_list = generate_list(max_number=20, count=6, uniqued_list=True) - duplicate_allowed_random_list = generate_list( - max_number=20, count=6, uniqued_list=False - ) + duplicate_allowed_random_list = generate_list(max_number=20, count=6, uniqued_list=False) else: unique_random_list = generate_list(count=count, uniqued_list=True) duplicate_allowed_random_list = generate_list(count=count, uniqued_list=False) @@ -127,6 +125,7 @@ def create_graph(): create_csv=True, ) + create_graph() ## Experimental @@ -163,7 +162,5 @@ def create_graph(): # fig.show() - - # TODO: Heap Sort https://en.wikipedia.org/wiki/Heapsort # TODO: Bucket Sort (this is the method that I used to sort cards when I need to sort under distraction. Takes longer because more steps required.) https://en.wikipedia.org/wiki/Bucket_sort diff --git a/sorters/merge_sort.py b/sorters/merge_sort.py index 2bf367f..b2f633f 100644 --- a/sorters/merge_sort.py +++ b/sorters/merge_sort.py @@ -93,9 +93,7 @@ def merge_sort( ordered_list.extend(temp["chunk_ordered_list"]) step_count += temp["step_count"] - leftover_random_numbers = cache_random_list[ - len(ordered_list) : len(cache_random_list) - ] + leftover_random_numbers = cache_random_list[len(ordered_list) : len(cache_random_list)] ordered_list.extend(leftover_random_numbers) chunk_size = chunk_size * 2 diff --git a/sorters/quick_sort.py b/sorters/quick_sort.py index a6ccfd9..6b6adb3 100644 --- a/sorters/quick_sort.py +++ b/sorters/quick_sort.py @@ -45,20 +45,18 @@ def find_pivot(random_list: list) -> dict: if not len(random_list) % 2: vals_to_chose_from["pre_middle"] = random_list[indexes["pre_middle"]] - temp = { - "step_count": 1, - "pivot": None, - "pivot_index": None - } + temp = {"step_count": 1, "pivot": None, "pivot_index": None} step_count = 1 for pivot_index_key, pivot in vals_to_chose_from.items(): step_count += 1 - if pivot != min(list(vals_to_chose_from.values())) and pivot != max(list(vals_to_chose_from.values())): + if pivot != min(list(vals_to_chose_from.values())) and pivot != max( + list(vals_to_chose_from.values()) + ): temp = { "step_count": step_count, "pivot": pivot, - "pivot_index": indexes[pivot_index_key] + "pivot_index": indexes[pivot_index_key], } return temp @@ -68,11 +66,7 @@ def find_pivot(random_list: list) -> dict: step_count += 1 if vals_to_chose_from[i] == max_val: max_val_index = indexes[i] - return { - "step_count": step_count, - "pivot": max_val, - "pivot_index": max_val_index - } + return {"step_count": step_count, "pivot": max_val, "pivot_index": max_val_index} def sort_wrt_pivot(random_list: list, pivot: int, debug: bool = False) -> dict: @@ -123,7 +117,7 @@ def sort_wrt_pivot(random_list: list, pivot: int, debug: bool = False) -> dict: "less_random_list": [ cache_random_list[:item_from_left_ind], [pivot], - cache_random_list[item_from_left_ind:] + cache_random_list[item_from_left_ind:], ], } @@ -180,10 +174,7 @@ def recursive_sorter(list_of_random_lists: list, step_count: int = 0, debug: boo step_count += partition_data["step_count"] new_list_of_random_lists.extend(partition_data["less_random_list"]) - current_result = { - "ordered_list_of_lists": new_list_of_random_lists, - "step_count": step_count - } + current_result = {"ordered_list_of_lists": new_list_of_random_lists, "step_count": step_count} if sorting_action_carried_out: if debug: @@ -194,11 +185,11 @@ def recursive_sorter(list_of_random_lists: list, step_count: int = 0, debug: boo def quick_sort( - random_list: list, - known_solution_unique_random_list: list, - debug: bool, - help_text: str = "", - create_csv: bool = False, + random_list: list, + known_solution_unique_random_list: list, + debug: bool, + help_text: str = "", + create_csv: bool = False, ): """ From f213408a04fd104a0b779c4af703756b9884b500 Mon Sep 17 00:00:00 2001 From: Ambar Date: Mon, 2 Mar 2020 17:47:55 +0100 Subject: [PATCH 19/20] simplify execution for testing --- sort_algorithm_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sort_algorithm_runner.py b/sort_algorithm_runner.py index 972798c..4623272 100644 --- a/sort_algorithm_runner.py +++ b/sort_algorithm_runner.py @@ -126,7 +126,7 @@ def create_graph(): ) -create_graph() +# create_graph() ## Experimental # import pandas as pd From 7c8cc2e90b4a2cd37c9d34f83a94932c96e2df32 Mon Sep 17 00:00:00 2001 From: Ambar Date: Tue, 3 Mar 2020 00:43:44 +0100 Subject: [PATCH 20/20] better visualization with xtick labelling --- .gitignore | 5 ++++- requirements.txt | 2 ++ sort_algorithm_runner.py | 11 ++++++----- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 6682489..583eeef 100644 --- a/.gitignore +++ b/.gitignore @@ -132,4 +132,7 @@ dmypy.json .idea .venv -*.csv \ No newline at end of file +*.csv +*.jpeg +*.png +*.jpg \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e69de29..5e02c33 100644 --- a/requirements.txt +++ b/requirements.txt @@ -0,0 +1,2 @@ +matplotlib +pandas \ No newline at end of file diff --git a/sort_algorithm_runner.py b/sort_algorithm_runner.py index 4623272..d285a63 100644 --- a/sort_algorithm_runner.py +++ b/sort_algorithm_runner.py @@ -126,7 +126,7 @@ def create_graph(): ) -# create_graph() +create_graph() ## Experimental # import pandas as pd @@ -145,8 +145,9 @@ def create_graph(): # ax1.set_title("Box plot of performance (lower is better)") # ax1.set_xlabel("sorters") # ax1.set_ylabel("time in seconds") +# ax1.set_xticklabels(["Merge", "Quick", "Bubble", "Native", "Selection 1.0"]) # ax1.boxplot((perf_mer, perf_bub, perf_nat, perf_sel, perf_quk)) -# ax1.legend(["perf_mer", "perf_bub", "perf_nat", "perf_sel", "perf_quk"]) +# # # steps_mer = steps_df[steps_df["method_name"] == "Merge sort"].loc[:,"step_count"].to_numpy() # steps_quk = steps_df[steps_df["method_name"] == "Quick sort"].loc[:,"step_count"].to_numpy() @@ -156,11 +157,11 @@ def create_graph(): # ax2.set_title("Box plot of steps (lower is better)") # ax2.set_xlabel("sorters") # ax2.set_ylabel("count") +# ax2.set_xticklabels(["Merge", "Quick", "Bubble", "Native", "Selection 1.0"]) # ax2.boxplot((steps_mer, steps_bub, steps_nat, steps_sel, steps_quk)) -# ax2.legend(["steps_mer", "steps_bub", "steps_nat", "steps_sel", "steps_quk"]) -# -# fig.show() +##fig.savefig("x.jpeg", orientation="landscape", bbox_inches="tight") +## fig.show() # TODO: Heap Sort https://en.wikipedia.org/wiki/Heapsort # TODO: Bucket Sort (this is the method that I used to sort cards when I need to sort under distraction. Takes longer because more steps required.) https://en.wikipedia.org/wiki/Bucket_sort