From 7c38e14ab63ec6fc3acfbfe89cec01cffeea4478 Mon Sep 17 00:00:00 2001 From: VishnuSandeep Date: Sun, 10 Jan 2021 21:01:27 +0530 Subject: [PATCH] completed till step4 --- Python.ipynb | 548 +++++++++++++++++++++++++++++++++++++++++++++++++++ step_4.py | 36 ++++ 2 files changed, 584 insertions(+) create mode 100644 Python.ipynb create mode 100644 step_4.py diff --git a/Python.ipynb b/Python.ipynb new file mode 100644 index 0000000..0730fb4 --- /dev/null +++ b/Python.ipynb @@ -0,0 +1,548 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python-ifed\n", + "\n", + "Welcome to the python-ifed notebook, This notebook is a walkthrough + task for you to get started with software development using python.\n", + "\n", + "After completing this notebook successfully you should be familiar with the following concepts:\n", + "* Use version control\n", + "* Writing clean and modular code\n", + "* Improve code efficiency\n", + "* Add effective documentation\n", + "* Testing \n", + "* Code reviews\n", + "\n", + "**This exercise depends on a lot of googling skills and is aimed at making you efficient in the same**." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Challenge 1 : Tables and stuff" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 0\n", + "\n", + "You might be already done with this step\n", + "\n", + "### Learn to version control using git\n", + "\n", + "1. Install git CLI tool\n", + "1. Configure git CLI\n", + "1. Create a GitHub account if you already have not \n", + "1. Clone the repo that contains this repository\n", + "1. Now you are in master branch\n", + "1. Create a new branch with your name as the branch name and continue\n", + "\n", + "### Reference materials\n", + "https://www.atlassian.com/git\n", + "\n", + "https://www.tutorialspoint.com/git/git_basic_concepts.htm" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Step 1\n", + "\n", + "## Clean and modular code\n", + "\n", + "Test out your googling skills and understand what writing clean and modular code means and answer the following questions by editing this cell in jupyter notebook.\n", + "\n", + "### Describe what each of the following means\n", + "\n", + "#### Production\n", + "Production code is the code that other people are actually using it\n", + "#### Clean \n", + "Clean code is code that is easy to understand and easy to change.\n", + "#### Modular\n", + "Modular programming is a software design technique that emphasizes separating the functionality of a program into independent,interchangeable modules, such that each contains everything necessary to execute only one aspect of the desired functionality.\n", + "#### Module\n", + "A module is a software component or part of a program that contains one or more routines.\n", + "#### Refactoring code\n", + "Code refactoring is defined as the process of restructuring computer code without changing or adding to its external behavior and functionality." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you have finished writing the answers you can now commit these new changes with git using the commit message __step 1 completed__ ." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Step 2\n", + "\n", + "## Refactor: Cricket Match Analysis\n", + "\n", + "Here I would be providing you with a sample code, don't worry about the working much try to get an abstract idea and complete the task" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "df = pd.read_csv('matches.csv', sep=',')\n", + "df.set_index('id',inplace=True)\n", + "df.drop('umpire3',axis=1,inplace=True)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is how our data looks like right now we need to write efficient code to replaces the spaces with an underscore, the janky way of doing this is" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "new_df = df.rename(columns={'team 1': 'team_1',\n", + " 'team 2': 'team_2',\n", + " 'toss winner': 'toss_winner',\n", + " 'dl applied': 'dl_applied',\n", + " 'win by runs': 'win_by_runs',\n", + " 'win by wickets': 'win_by_wickets',\n", + " 'player of match': 'player_of_match',\n", + " 'umpire 1':'umpire_1',\n", + " 'umpire 2':'umpire_2'\n", + " })\n", + "new_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is like a hardcoded way of doing it slightly better way of doing this is " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "labels = list(df.columns)\n", + "labels[0] = labels[0].replace(' ', '_')\n", + "labels[1] = labels[1].replace(' ', '_')\n", + "labels[2] = labels[2].replace(' ', '_')\n", + "labels[3] = labels[3].replace(' ', '_')\n", + "labels[5] = labels[5].replace(' ', '_')\n", + "labels[6] = labels[6].replace(' ', '_')\n", + "labels[7] = labels[7].replace(' ', '_')\n", + "labels[8] = labels[8].replace(' ', '_')\n", + "labels[9] = labels[9].replace(' ', '_')\n", + "labels[10] = labels[10].replace(' ', '_')\n", + "df.columns = labels\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is also a very redundant way of doing this try writing a better code to do the same. Limit yourselves to one or two lines of code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(0,10):\n", + " for i in range(0,10):\n", + " labels[i] = labels[i].replace(' ','_')\n", + " \n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you are done till here make a new commit with message __step 2 complete__." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Step 3\n", + "\n", + "## Optimizing Code\n", + "\n", + "### Efficient Code\n", + "\n", + "Knowing how to write code that runs efficiently is another essential skill in software development. Optimizing code to be more efficient can mean making it:\n", + "\n", + "* Execute faster\n", + "* Take up less space in memory/storage\n", + "\n", + "\n", + "Resources:\n", + "https://stackify.com/20-simple-python-performance-tuning-tips/\n", + "\n", + "https://pybit.es/faster-python.html\n", + "\n", + "https://towardsdatascience.com/one-simple-trick-for-speeding-up-your-python-code-with-numpy-1afc846db418\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open('subset_elemets.txt') as f:\n", + " subset_elements = f.read().split('\\n')\n", + " \n", + "with open('all_elements.txt') as f:\n", + " all_elements = f.read().split('\\n')\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "start = time.time()\n", + "verified_elements = []\n", + "\n", + "for element in subset_elements:\n", + " if element in all_elements:\n", + " verified_elements.append(element)\n", + "\n", + "print(len(verified_elements))\n", + "print('Duration: {} seconds'.format(time.time() - start))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Use vector operations using NumPy to optimise the loop" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import numpy as np\n", + "import pandas as pd\n", + "with open('subset_elements.txt') as f:\n", + " subset_elements = f.read().split('\\n')\n", + "\n", + "with open('all_elements.txt') as f:\n", + " all_elements = f.read().split('\\n')\n", + "\n", + "start = time.time()\n", + "verified_elements = []\n", + "elements = elements.intersect1d(subset_elements,all_elements)\n", + "\n", + "print(len(verified_elements))\n", + "print('Duration: {} seconds'.format(time.time() - start))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Use a python datastructure which has a method to peform this task faster" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import numpy as np\n", + "import pandas as pd\n", + "with open('subset_elements.txt') as f:\n", + " subset_elements = f.read().split('\\n')\n", + "\n", + "with open('all_elements.txt') as f:\n", + " all_elements = f.read().split('\\n')\n", + "\n", + "start = time.time()\n", + "map(str(all_elements,verified_list))\n", + "\n", + "print(len(verified_elements))\n", + "print('Duration: {} seconds'.format(time.time() - start))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Step 4\n", + "# Documentation\n", + "\n", + "Documentation is one of the important part of software development. Teach yourself about documentation in python and convert code in step 3 to functions and add relevant documentation for the same.\n", + "\n", + "#### Resources\n", + "https://www.python.org/dev/peps/pep-0257/\n", + "\n", + "https://numpydoc.readthedocs.io/en/latest/format.html\n", + "\n", + "https://www.datacamp.com/community/tutorials/documenting-python-code\n", + "\n", + "do not add this code to the notebook instead create a new python file called step_4.py and define the functions as well as a main to test the working of all the functions make sure to version this file as well on the commit message __step 4 completed__" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Step 5\n", + "\n", + "# Testing\n", + "\n", + "Testing your code is essential before deployment. It helps you catch errors and faulty conclusions before they make any major impact. Today, employers are looking for data scientists with the skills to properly prepare their code for an industry setting, which includes testing their code.\n", + "\n", + "Learn about pytest and install it \n", + "\n", + "https://docs.pytest.org/en/stable/\n", + "\n", + "create a new file called nearest_square.py this function should return the nearest perfect square number which is less than or equal to the number.\n", + "\n", + "create another file called test_nearest_square.py to test the function with different test each for values 5,-12,9 and 23\n", + "\n", + "execute the line below to ensure it is working correctly\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! pytest test_nearest_square.py" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you are done with this step make a new commit __step5 completed__" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Step 6 \n", + "# Code review\n", + "\n", + "Understand how code review works\n", + "\n", + "https://github.com/lyst/MakingLyst/tree/master/code-reviews\n", + "\n", + "https://www.kevinlondon.com/2015/05/05/code-review-best-practices.html\n", + "\n", + "_Leave it to us_ if you are done with this step go ahead and push this notebook as well as all the files to your GitHub and make a pull request to the repository that you cloned this task from so that we can review your progress till now, Please continue with the challenge" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Challenge 2 : Pokemon" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that you are familiar with writing efficient code let's use that practice and do some API fetching to answer some questions, Below are some questions below and you should use the [pokeapi](https://pokeapi.co/) to do some API fetching with python and answer the questions.\n", + "\n", + "**NOTE**\n", + "You should fill the cell with code that gives the answer and not write the answer directly, failing to do so will reduce your evaluation and proves that you did not bother reading this part." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Remember you were in a pokemon room (discord) what is the **type** of that pokemon" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Insert your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "What **type** of pokemons does this **type** take damage from\n", + "\n", + "__hint__ the url field of previous response" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Insert your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For each of the **double damage from** type list 5 pokemons in that type" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Insert your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a function ability() inside a new file ability.py to return a list of the abilities of any given pokemon by doing an API query the function should accept a string parameter which is the name of the pokemon\n", + "\n", + "execute the line below to ensure everything is working properly" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1m============================= test session starts ==============================\u001b[0m\n", + "platform darwin -- Python 3.8.5, pytest-6.2.1, py-1.10.0, pluggy-0.13.1\n", + "rootdir: /Users/abhijitramesh/development/Python-ifed\n", + "collected 4 items \u001b[0m\n", + "\n", + "test_abilities.py \u001b[32m.\u001b[0m\u001b[32m.\u001b[0m\u001b[32m.\u001b[0m\u001b[32m.\u001b[0m\u001b[32m [100%]\u001b[0m\n", + "\n", + "\u001b[32m============================== \u001b[32m\u001b[1m4 passed\u001b[0m\u001b[32m in 2.75s\u001b[0m\u001b[32m ===============================\u001b[0m\n" + ] + } + ], + "source": [ + "!pytest test_abilities.py" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Please version till this point saying with a message \"Completed challenge 2\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extra Challenge for extra karma point" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If the above challenge was a cakewalk and you have a lot of time left to attempt this challenge to earn some karma points, this challenge is completely optional.\n", + "Let's create a pokedex with the function we created earlier on that is\n", + "\n", + "* What is the type of pokemon\n", + "* What type of pokemon gives double damages to the given pokemon\n", + "* List 5 pokemons which gives the given pokemon double damage\n", + "* Abilities of our pokemon\n", + "\n", + "Use [pysimplegui](https://pypi.org/project/PySimpleGUI) to create a simple pokedex which consumes these functions\n", + "\n", + "save the file as pokedex.py" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Version till this point with a message \"Completed Extra Challenge\"\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/step_4.py b/step_4.py new file mode 100644 index 0000000..2d6c880 --- /dev/null +++ b/step_4.py @@ -0,0 +1,36 @@ +import time +import pandas as pd +import numpy as np +'''imports the module which is a written program and provides a lot of built-in function''' +def elements(sub_elements,all_elements): + '''Gives the return of length of verified elements''' + start = time.time() + verified_elements = [] + + for element in subset_elements: + if element in all_elements: + verified_elements.append(element) + '''prints the length of verified elements and duration of execution using time function''' + print(len(verified_elements)) + print('Duration: {} seconds'.format(time.time() - start)) + + + +def usingnumpy(): + '''Using numpy vectorisation to fast the execution of the code''' + start = time.ti + verified_elements = [] + elements = elements.intersect1d(subset_elements,all_elements) +'''Prints the length of verified elements and duration of execution using time function''' +print(len(verified_elements)) +print('Duration: {} seconds'.format(time.time() - start)) + + +def elements(sub_elements, all_elements): + '''using a datastrucutre map() to speed the execution of code''' + start = time.time() + map(str(all_elements,verified_list)) +'''prints the length of the verified_elements and duration of execution using time function''' +print(len(verified_elements)) +print('Duration: {} seconds'.format(time.time() - start)) +'''we can see that using numpy executes the fastest and then map and then for loop'''