diff --git a/02_activities/assignments/assignment3/Assignment-3_PowerBIcode.pbix b/02_activities/assignments/assignment3/Assignment-3_PowerBIcode.pbix new file mode 100644 index 000000000..a46abdfbd Binary files /dev/null and b/02_activities/assignments/assignment3/Assignment-3_PowerBIcode.pbix differ diff --git a/02_activities/assignments/assignment3/Assignment-3_Pythoncode.ipynb b/02_activities/assignments/assignment3/Assignment-3_Pythoncode.ipynb new file mode 100644 index 000000000..9176bced2 --- /dev/null +++ b/02_activities/assignments/assignment3/Assignment-3_Pythoncode.ipynb @@ -0,0 +1,103 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 26, + "id": "213576fc", + "metadata": {}, + "outputs": [], + "source": [ + "# Import libraries\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "c69c793d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\ShaileshT\\AppData\\Local\\Temp\\ipykernel_33040\\3500826477.py:15: FutureWarning: \n", + "\n", + "Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.\n", + "\n", + " sns.barplot(\n", + "C:\\Users\\ShaileshT\\AppData\\Local\\Temp\\ipykernel_33040\\3500826477.py:39: UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown\n", + " plt.show()\n" + ] + } + ], + "source": [ + "\n", + "\n", + "# Step 1: Loading the CSV dataset\n", + "df = pd.read_csv(\"ontario_housing_supply_2024.csv\")\n", + "\n", + "# Step 2: Cleaning the percentage column\n", + "df['Progress % for 2024'] = df['Progress % for 2024'].str.replace('%', '').astype(float)\n", + "\n", + "# Step 3: Sorting and extracting top 10 municipalities by 2024 progress %\n", + "top10 = df.sort_values(by='Progress % for 2024', ascending=False).head(10)\n", + "\n", + "# Step 4: Setting figure size and style\n", + "fig, ax = plt.subplots(figsize=(10, 6), dpi=100)\n", + "sns.set(style=\"whitegrid\")\n", + "\n", + "# Step 5: Plotting horizontal bar chart ON `ax`\n", + "sns.barplot(\n", + " x='Progress % for 2024',\n", + " y='Municipality',\n", + " data=top10,\n", + " palette='viridis',\n", + " ax=ax # <-- Use the correct axes\n", + ")\n", + "\n", + "# Step 6: Adding title and labels\n", + "ax.set_title('Top 10 Ontario Municipalities by 2024 Housing Target Progress', fontsize=14)\n", + "ax.set_xlabel('Progress Toward 2024 Target (%)', fontsize=12)\n", + "ax.set_ylabel('Municipality', fontsize=12)\n", + "\n", + "# Step 7: Annotating each bar with its value\n", + "for i in range(len(top10)):\n", + " ax.text(\n", + " top10['Progress % for 2024'].iloc[i] + 1, # x\n", + " i, # y\n", + " f\"{top10['Progress % for 2024'].iloc[i]:.1f}%\", # label\n", + " va='center'\n", + " )\n", + "\n", + "# Step 8: Save plot\n", + "plt.tight_layout()\n", + "plt.show()\n", + "fig.savefig(\"top10_housing_progress.png\", dpi=100, bbox_inches='tight')\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dsi_participant", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.21" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/02_activities/assignments/assignment3/PowerBI_Visualization.jpg b/02_activities/assignments/assignment3/PowerBI_Visualization.jpg new file mode 100644 index 000000000..1cc5fb5de Binary files /dev/null and b/02_activities/assignments/assignment3/PowerBI_Visualization.jpg differ diff --git a/02_activities/assignments/assignment3/PowerBI_describe_justify.ipynb b/02_activities/assignments/assignment3/PowerBI_describe_justify.ipynb new file mode 100644 index 000000000..c6d1d636f --- /dev/null +++ b/02_activities/assignments/assignment3/PowerBI_describe_justify.ipynb @@ -0,0 +1,74 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1a380684", + "metadata": {}, + "source": [ + "Power BI Visualization – Describe and Justify:\n", + "1.\tWhat software did you use to create your data visualization?\n", + "I used Microsoft Power BI to create the clustered bar chart. It provides an intuitive drag-and-drop interface, strong support for categorical comparisons, and interactive filtering capabilities.\n", + "\n", + "2.\tWho is your intended audience?\n", + "The intended audience includes urban planners, municipal decision-makers, and housing policy analysts in Ontario. It can also serve a secondary audience of engaged citizens and advocacy groups tracking housing development progress.\n", + "\n", + "3.\tWhat information or message are you trying to convey with your visualization?\n", + "The goal is to compare how each Ontario municipality is performing against its 2024 housing target. The message is simple:\n", + "-\tSome cities are exceeding or approaching targets.\n", + "-\tOthers are lagging behind.\n", + "This visualization helps prioritize attention and potential resource reallocation.\n", + "\n", + "4.\tWhat aspects of design did you consider when making your visualization? How did you apply them? With what elements of your plots?\n", + "-\tI focused on clarity, contrast, and comparability:\n", + "-\tUsed horizontal clustered bars to make municipality names easy to scan.\n", + "-\tApplied distinct colors (blue for target, pink for progress) to avoid confusion.\n", + "-\tEnabled data labels for quick reference to actual figures.\n", + "-\tGrouped related metrics on the same axis to allow immediate visual comparison.\n", + "\n", + "5.\tHow did you ensure that your data visualizations are reproducible?\n", + "The dataset was cleaned and loaded using Power BI’s data model, and the entire process is saved in the .pbix file. The steps are easily replayable and modifiable using the Power Query Editor, ensuring full reproducibility. If another user opens the same file with access to the dataset, they will get the exact same results.\n", + "\n", + "6.\tHow did you ensure that your data visualization is accessible?\n", + "-\tSelected high-contrast colors for colorblind-friendly readability.\n", + "-\tTurned on data labels to minimize reliance on interpreting bar lengths.\n", + "-\tAvoided clutter by limiting the number of categories to the top municipalities.\n", + "-\tProvided clear axis titles and an informative legend to guide interpretation.\n", + "\n", + "7.\tWho are the individuals and communities who might be impacted by your visualization?\n", + "-\tMunicipal governments that may use this for performance tracking or reporting.\n", + "-\tPolicy-makers shaping housing investments or incentives.\n", + "-\tDevelopers and citizens advocating for more responsive housing plans.\n", + "The visualization highlights areas of concern and success, potentially influencing where public and private resources flow.\n", + "\n", + "8.\tHow did you choose which features of your chosen dataset to include or exclude from your visualization?\n", + "I included only the 2024 housing target, actual 2024 housing progress, and municipality name to maintain focus. I excluded demographic, geographic, or historical data to keep the visualization clean and aligned with the specific question of \"target vs. progress\" for 2024.\n", + "\n", + "9.\tWhat ‘underwater labour’ contributed to your final data visualization product?\n", + "-\tData cleaning and normalization to align municipality names and numeric types\n", + "-\tExploring chart types in Power BI to find one that best supports categorical comparison\n", + "-\tFormatting and adjusting visual elements for clarity and accessibility\n", + "-\tManual renaming of legend and axes for plain-language readability\n", + "-\tTime spent testing different combinations of metrics to make the story clear and non-misleading\n" + ] + }, + { + "cell_type": "markdown", + "id": "c3574389", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dsi_participant", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.9.21" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/02_activities/assignments/assignment3/Python_Visualization.png b/02_activities/assignments/assignment3/Python_Visualization.png new file mode 100644 index 000000000..9db2f1454 Binary files /dev/null and b/02_activities/assignments/assignment3/Python_Visualization.png differ diff --git a/02_activities/assignments/assignment3/Python_describe_justify.ipynb b/02_activities/assignments/assignment3/Python_describe_justify.ipynb new file mode 100644 index 000000000..3549e5a24 --- /dev/null +++ b/02_activities/assignments/assignment3/Python_describe_justify.ipynb @@ -0,0 +1,73 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ffdb7648", + "metadata": {}, + "source": [ + "Python Visualization – Describe and Justify:\n", + "1.\tWhat software did you use to create your data visualization?\n", + "I used Python, specifically the matplotlib and seaborn libraries, to create the horizontal bar chart. These tools are powerful for data wrangling, customization, and reproducible visual storytelling through code.\n", + "2.\tWho is your intended audience?\n", + "This visualization is designed for housing analysts, data scientists, government transparency advocates, and anyone seeking a clear, data-driven snapshot of housing development performance in Ontario municipalities.\n", + "3.\tWhat information or message are you trying to convey with your visualization?\n", + "The visualization ranks and compares the top 10 municipalities based on their percentage progress toward 2024 housing targets. The message is:\n", + "Here are the municipalities outperforming their targets — let's spotlight what they’re doing right. It draws attention to overachievement and encourages reflection on regional housing policies.\n", + "4.\tWhat aspects of design did you consider when making your visualization? How did you apply them? With what elements of your plots?\n", + "-\tUsed a sequential color palette to reflect progression from lowest to highest performance.\n", + "-\tPlotted a horizontal bar chart for easier comparison of categorical names with long labels.\n", + "-\tAdded value labels directly on bars to remove the need for users to estimate lengths.\n", + "-\tSet a descriptive title and labeled axes to guide interpretation.\n", + "-\tRemoved unnecessary clutter (e.g., spines, ticks) to maintain focus.\n", + "\n", + "5.\tHow did you ensure that your data visualizations are reproducible?\n", + "The entire visualization is built from a single Python script, which:\n", + "1.\tLoads the dataset\n", + "2.\tFilters and sorts the top 10 municipalities\n", + "3.\tGenerates the chart with all formatting\n", + "Anyone running this script with the same data will get identical output, making it fully reproducible. It can also be automated or integrated into reports.\n", + "\n", + "6.\tHow did you ensure that your data visualization is accessible?\n", + "-\tUsed high-contrast color shades that are distinguishable by most users, including those with color vision deficiency.\n", + "-\tLabels on each bar improve readability without requiring legend decoding.\n", + "-\tFont sizes and layout spacing were tuned for clarity on both screen and print.\n", + "-\tThe chart doesn’t rely on interactive features, so it's accessible in static formats like PDFs or presentations.\n", + "\n", + "7.\tWho are the individuals and communities who might be impacted by your visualization?\n", + "-\tMunicipal planners and policymakers can benchmark their performance.\n", + "-\tCivic groups and local residents can use the visualization for advocacy or accountability.\n", + "-\tGovernment funding bodies may use this to identify and reward top-performing cities.\n", + "The chart highlights successes that could lead to replication or funding shifts.\n", + "\n", + "8.\tHow did you choose which features of your chosen dataset to include or exclude from your visualization?\n", + "To emphasize clarity, I excluded raw counts and non-percentage metrics. I filtered the top 10 performers only; this allowed the chart to remain readable, focused, and impactful, avoiding information overload or dilution of the insight.\n", + "\n", + "9.\tWhat ‘underwater labour’ contributed to your final data visualization product?\n", + "-\tWriting and testing Python code to prepare the dataset and format the chart\n", + "-\tAdjusting figure size, orientation, axis labels, and color mapping manually\n", + "-\tDebugging issues like oversized image exports and annotating labels properly\n", + "-\tVerifying the percentage calculations to ensure accuracy\n", + "-\tExporting the chart in a format suitable for inclusion in reports\n" + ] + }, + { + "cell_type": "markdown", + "id": "638fa34d", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dsi_participant", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.9.21" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/02_activities/assignments/assignment3/assignment_3.md b/02_activities/assignments/assignment3/assignment_3.md new file mode 100644 index 000000000..584248b5f --- /dev/null +++ b/02_activities/assignments/assignment3/assignment_3.md @@ -0,0 +1,69 @@ +# Data Visualization + +## Assignment 3: Final Project + +### Requirements: +- We will finish this class by giving you the chance to use what you have learned in a practical context, by creating data visualizations from raw data. +- Choose a dataset of interest from the [City of Toronto’s Open Data Portal](https://www.toronto.ca/city-government/data-research-maps/open-data/) or [Ontario’s Open Data Catalogue](https://data.ontario.ca/). +- Using Python and one other data visualization software (Excel or free alternative, Tableau Public, any other tool you prefer), create two distinct visualizations from your dataset of choice. +- For each visualization, describe and justify: + > What software did you use to create your data visualization? + + > Who is your intended audience? + + > What information or message are you trying to convey with your visualization? + + > What aspects of design did you consider when making your visualization? How did you apply them? With what elements of your plots? + + > How did you ensure that your data visualizations are reproducible? If the tool you used to make your data visualization is not reproducible, how will this impact your data visualization? + + > How did you ensure that your data visualization is accessible? + + > Who are the individuals and communities who might be impacted by your visualization? + + > How did you choose which features of your chosen dataset to include or exclude from your visualization? + + > What ‘underwater labour’ contributed to your final data visualization product? + +- This assignment is intentionally open-ended - you are free to create static or dynamic data visualizations, maps, or whatever form of data visualization you think best communicates your information to your audience of choice! +- Total word count should not exceed **(as a maximum) 1000 words** + +### Why am I doing this assignment?: +- This ongoing assignment ensures active participation in the course, and assesses the learning outcomes: +* Create and customize data visualizations from start to finish in Python +* Apply general design principles to create accessible and equitable data visualizations +* Use data visualization to tell a story +- This would be a great project to include in your GitHub Portfolio – put in the effort to make it something worthy of showing prospective employers! + +### Rubric: + +| Component | Scoring | Requirement | +|-------------------|----------|-----------------------------------------------------------------------------| +| Data Visualizations | Complete/Incomplete | - Data visualizations are distinct from each other
- Data visualizations are clearly identified
- Different sources/rationales (text with two images of data, if visualizations are labeled)
- High-quality visuals (high resolution and clear data)
- Data visualizations follow best practices of accessibility | +| Written Explanations | Complete/Incomplete | - All questions from assignment description are answered for each visualization
- Explanations are supported by course content or scholarly sources, where needed | +| Code | Complete/Incomplete | - All code is included as an appendix with your final submissions
- Code is clearly commented and reproducible | + +## Submission Information + +🚨 **Please review our [Assignment Submission Guide](https://github.com/UofT-DSI/onboarding/blob/main/onboarding_documents/submissions.md)** 🚨 for detailed instructions on how to format, branch, and submit your work. Following these guidelines is crucial for your submissions to be evaluated correctly. + +### Submission Parameters: +* Submission Due Date: `23:59 - 13/07/2025` +* The branch name for your repo should be: `assignment-3` +* What to submit for this assignment: + * A folder/directory containing: + * This file (assignment_3.md) + * Two data visualizations + * Two markdown files for each both visualizations with their written descriptions. + * Link to your dataset of choice. + * Complete and commented code as an appendix (for your visualization made with Python, and for the other, if relevant) +* What the pull request link should look like for this assignment: `https://github.com//visualization/pull/` + * Open a private window in your browser. Copy and paste the link to your pull request into the address bar. Make sure you can see your pull request properly. This helps the technical facilitator and learning support staff review your submission easily. + +Checklist: +- [ ] Create a branch called `assignment-3`. +- [ ] Ensure that the repository is public. +- [ ] Review [the PR description guidelines](https://github.com/UofT-DSI/onboarding/blob/main/onboarding_documents/submissions.md#guidelines-for-pull-request-descriptions) and adhere to them. +- [ ] Verify that the link is accessible in a private browser window. + +If you encounter any difficulties or have questions, please don't hesitate to reach out to our team via our Slack. Our Technical Facilitators and Learning Support staff are here to help you navigate any challenges. diff --git a/02_activities/assignments/assignment3/ontario_housing_supply_2024.csv b/02_activities/assignments/assignment3/ontario_housing_supply_2024.csv new file mode 100644 index 000000000..26f7bf6c6 --- /dev/null +++ b/02_activities/assignments/assignment3/ontario_housing_supply_2024.csv @@ -0,0 +1,52 @@ +_id,Municipality,Ten-year housing target,Total housing progress since 2022,Target for 2024,Total 2024 housing progress,Progress % for 2024,Housing Target Status +409,Ajax,17000,1801,1417,163,11.50%,Not met +410,Aurora,8000,641,667,167,25.04%,Not met +411,Barrie,23000,4782,1917,791,41.26%,Not met +412,Belleville,3100,1140,258,213,82.56%,On track +413,Bradford West Gwillimbury,6500,991,542,239,44.10%,Not met +414,Brampton,113000,20121,9417,4907,52.11%,Not met +415,Brantford,10000,2451,833,280,33.61%,Not met +416,Burlington,29000,1466,2417,754,31.20%,Not met +417,Caledon,13000,2349,1083,553,51.06%,Not met +418,Cambridge,19000,2908,1583,626,39.55%,Not met +419,Chatham-Kent,1100,1203,92,136,147.83%,Exceeded +420,Clarington,13000,1820,1083,584,53.92%,Not met +421,East Gwillimbury,4300,993,358,107,29.89%,Not met +422,Georgina,6200,845,517,223,43.13%,Not met +423,Greater Sudbury,3800,967,317,230,72.56%,Not met +424,Guelph,18000,2577,1500,849,56.60%,Not met +425,Haldimand County,4200,673,350,92,26.29%,Not met +426,Halton Hills,9500,652,792,266,33.59%,Not met +427,Hamilton,47000,9444,3917,1632,41.66%,Not met +428,Innisfil,6300,1555,525,260,49.52%,Not met +429,Kawartha Lakes,6500,1375,542,332,61.25%,Not met +430,Kingston,8000,2348,667,452,67.77%,Not met +431,Kitchener,35000,8565,2917,2038,69.87%,Not met +432,London,47000,7325,3917,3012,76.90%,Not met +433,Markham,44000,5903,3667,1445,39.41%,Not met +434,Milton,21000,4952,1750,1673,95.60%,On track +435,Mississauga,120000,11880,10000,3277,32.77%,Not met +436,New Tecumseth,6400,538,533,231,43.34%,Not met +437,Newmarket,12000,983,1000,199,19.90%,Not met +438,Niagara Falls,8000,1564,667,481,72.11%,Not met +439,Norfolk County,5700,701,475,155,32.63%,Not met +440,North Bay,1000,652,83,230,277.11%,Exceeded +441,Oakville,33000,7306,2750,3382,122.98%,Exceeded +442,Oshawa,23000,4001,1917,887,46.27%,Not met +443,Ottawa,151000,26777,12583,6007,47.74%,Not met +444,Peterborough City,4700,1009,392,233,59.44%,Not met +445,Pickering,13000,4748,1083,1696,156.60%,Exceeded +446,Richmond Hill,27000,2948,2250,752,33.42%,Not met +447,Sarnia,1000,561,83,150,180.72%,Exceeded +448,Sault Ste. Marie,1500,662,125,180,144.00%,Exceeded +449,St. Catharines,11000,1330,917,259,28.24%,Not met +450,Thunder Bay,2200,587,183,246,134.43%,Exceeded +451,Toronto,285000,71762,23750,18422,77.57%,Not met +452,Vaughan,42000,8521,3500,2348,67.09%,Not met +453,Waterloo,16000,2420,1333,857,64.29%,Not met +454,Welland,4300,1929,358,382,106.70%,Exceeded +455,Whitby,18000,3263,1500,557,37.13%,Not met +456,Whitchurch-Stouffville,6500,1624,542,42,7.75%,Not met +457,Windsor,13000,2293,1083,1456,134.44%,Exceeded +458,Woodstock,5500,717,458,105,22.93%,Not met +459,Municipalities without targets,172700,41816,14392,8081,56.15%,Not applicable