diff --git a/Project2-WebScraping.ipynb b/Project2-WebScraping.ipynb new file mode 100644 index 0000000..f6a600a --- /dev/null +++ b/Project2-WebScraping.ipynb @@ -0,0 +1,167 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 232, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import sys\n", + "import requests\n", + "from pathlib import Path\n", + "\n", + "from bs4 import BeautifulSoup\n", + "\n", + "from selenium import webdriver\n", + "from selenium.webdriver.common.keys import Keys\n", + "from selenium.webdriver.common.action_chains import ActionChains\n", + "from selenium.common.exceptions import NoSuchElementException" + ] + }, + { + "cell_type": "code", + "execution_count": 233, + "metadata": {}, + "outputs": [], + "source": [ + "driver = webdriver.Firefox(executable_path=r'C:\\Program Files\\geckodriver-v0.29.1-win64\\geckodriver.exe')" + ] + }, + { + "cell_type": "code", + "execution_count": 241, + "metadata": {}, + "outputs": [], + "source": [ + "def get_movies(name: str):\n", + " \n", + " driver = webdriver.Firefox(executable_path=r'C:\\Program Files\\geckodriver-v0.29.1-win64\\geckodriver.exe')\n", + " driver.get(\"https://www.imdb.com/\")\n", + " \n", + " time.sleep(1)\n", + " elem = driver.find_element_by_name(\"q\")\n", + " elem.clear()\n", + " elem.send_keys(name)\n", + " elem.send_keys(Keys.RETURN)\n", + " \n", + " time.sleep(1)\n", + " try:\n", + " element = driver.find_element_by_partial_link_text(name)\n", + " element.click()\n", + " except NoSuchElementException: \n", + " driver.close()\n", + " return 'Could not find actor/actress'\n", + " \n", + " time.sleep(1)\n", + " page_source = driver.page_source\n", + " \n", + " time.sleep(1)\n", + " driver.close()\n", + " \n", + " world_selection = BeautifulSoup(page_source, 'lxml')\n", + " movie_source = world_selection.find('div', class_='filmo-category-section')\n", + " movie_list = re.findall(r'>(.*?)<', str(movie_source))\n", + " movies = [i for i in movie_list if i != '']\n", + " \n", + " return movies\n" + ] + }, + { + "cell_type": "code", + "execution_count": 243, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Big Bug Man',\n", + " 'completed',\n", + " 'Superman II: The Richard Donner Cut',\n", + " 'The Godfather',\n", + " 'Michael Jackson: You Rock My World',\n", + " 'The Score',\n", + " 'Free Money',\n", + " 'The Brave',\n", + " 'The Island of Dr. Moreau',\n", + " 'Divine Rapture',\n", + " 'Don Juan DeMarco',\n", + " 'Christopher Columbus: The Discovery',\n", + " 'The Freshman',\n", + " 'A Dry White Season',\n", + " 'The Formula',\n", + " 'Apocalypse Now',\n", + " 'Roots: The Next Generations',\n", + " 'Part VII (1960-1967)',\n", + " 'Superman',\n", + " 'The Godfather Saga',\n", + " 'Episode #1.4',\n", + " 'Episode #1.3',\n", + " 'Episode #1.2',\n", + " 'Episode #1.1',\n", + " 'The Missouri Breaks',\n", + " 'Last Tango in Paris',\n", + " 'The Godfather',\n", + " 'The Nightcomers',\n", + " 'Burn!',\n", + " 'The Night of the Following Day',\n", + " 'Candy',\n", + " 'Reflections in a Golden Eye',\n", + " 'A Countess from Hong Kong',\n", + " 'The Appaloosa',\n", + " 'The Chase',\n", + " 'Morituri',\n", + " 'Bedtime Story',\n", + " 'The Ugly American',\n", + " 'Mutiny on the Bounty',\n", + " 'One-Eyed Jacks',\n", + " 'The Fugitive Kind',\n", + " 'The Young Lions',\n", + " 'Sayonara',\n", + " 'The Teahouse of the August Moon',\n", + " 'Guys and Dolls',\n", + " 'Omnibus',\n", + " 'Advice to Bathers',\n", + " 'Désirée',\n", + " 'On the Waterfront',\n", + " 'The Wild One',\n", + " 'Julius Caesar',\n", + " 'Viva Zapata!',\n", + " 'A Streetcar Named Desire',\n", + " 'The Men',\n", + " \"Actor's Studio\",\n", + " \"I'm No Hero\"]" + ] + }, + "execution_count": 243, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_movies('Marlon Brando')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}