diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..cc99851 Binary files /dev/null and b/.DS_Store differ diff --git a/your-code/API.ipynb b/your-code/API.ipynb new file mode 100644 index 0000000..6230149 --- /dev/null +++ b/your-code/API.ipynb @@ -0,0 +1,1033 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import requests\n", + "import pandas as pd\n", + "from pandas.io.json import json_normalize\n", + "import tweepy\n", + "import re \n", + "from tweepy import OAuthHandler \n", + "from textblob import TextBlob\n", + "from nltk.corpus import stopwords\n", + "import requests\n", + "from bs4 import BeautifulSoup\n", + "\n", + "#cursor\n", + "#tweet.text" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "url = 'https://www.imdb.com/chart/tvmeter?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=4da9d9a5-d299-43f2-9c53-f0efa18182cd&pf_rd_r=0KYJP3BJH5DN6EZWRQ5Z&pf_rd_s=right-4&pf_rd_t=15506&pf_rd_i=toptv&ref_=chttvtp_ql_5'" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "soup = requests.get(url).content\n", + "soup = BeautifulSoup(soup,'html.parser')" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "#Las series más populares\n", + "popularTV = soup.select(\"td.titleColumn a[href^='/title']\")\n", + "popularTV = [name.text for name in popularTV]\n", + "popularTV = list(map(lambda x:x.strip(),popularTV))\n", + "top10 = popularTV[:11]" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "#Score\n", + "scores = soup.select(\"td.ratingColumn strong\")\n", + "score = [score.text for score in scores]\n", + "top10score = score[:11]" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameScore
0Watchmen6.8
1Cómo vivir contigo mismo7.4
2Peaky Blinders8.8
3The Walking Dead8.3
4Modern Love8.1
5American Horror Story8.1
6Breaking Bad9.5
7Daybreak6.8
8Batwoman3.2
9Supernatural8.4
10Castle Rock7.7
\n", + "
" + ], + "text/plain": [ + " Name Score\n", + "0 Watchmen 6.8\n", + "1 Cómo vivir contigo mismo 7.4\n", + "2 Peaky Blinders 8.8\n", + "3 The Walking Dead 8.3\n", + "4 Modern Love 8.1\n", + "5 American Horror Story 8.1\n", + "6 Breaking Bad 9.5\n", + "7 Daybreak 6.8\n", + "8 Batwoman 3.2\n", + "9 Supernatural 8.4\n", + "10 Castle Rock 7.7" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Crear el dataframe de las series más populares en IMDB\n", + "columns = [\"Name\", \"Score\"]\n", + "dicttop = {'Name':top10,'Score':top10score}\n", + "\n", + "top10df = pd.DataFrame(dicttop, columns=columns)\n", + "top10df" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "#Acceder a API Twitter\n", + "API_KEY = \"CRuEovkPCCTmYVAC8PbndD9N3\"\n", + "API_SECRET = \"SikynKdtF2NpEDpx9xvdtuS9wMHbgQXFAyCq5qj1HYbCJfnhEZ\"\n", + "ACCESS_TOKEN = \"79345899-q0Yb2A2BOfNXkNEjqdFU474r8yARK2WkUqhlTYDjY\"\n", + "ACCESS_TOKEN_SECRET = \"OUD4HI3o11cc4VC2TkTcxSYWTQAcjsh2mlni3SGlkA3fy\"\n", + "\n", + "auth = tweepy.OAuthHandler(API_KEY, API_SECRET)\n", + "auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)\n", + "api = tweepy.API(auth)" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [], + "source": [ + "#Buscar los mejores tweets de una serie\n", + "best_tweets = api.search(q='Watchmen', lang = 'en', count = 100, result_type='mixed')\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
created_atidid_strtexttruncatedentitiesmetadatasourcein_reply_to_status_idin_reply_to_status_id_str...favorite_countfavoritedretweetedpossibly_sensitivelangretweeted_statusquoted_status_idquoted_status_id_strquoted_statusextended_entities
0Fri Nov 01 17:00:06 +0000 201911903125702442147841190312570244214784Very excited to announce The Official @Watchme...True{'hashtags': [], 'symbols': [], 'user_mentions...{'result_type': 'popular', 'iso_language_code'...<a href=\"https://twittimer.com\" rel=\"nofollow\"...NaNNone...1452FalseFalseFalseenNaNNaNNaNNaNNaN
1Fri Nov 01 17:00:00 +0000 201911903125455104778251190312545510477825The Official Watchmen Podcast is coming. Join ...True{'hashtags': [{'text': 'Chernobyl', 'indices':...{'result_type': 'popular', 'iso_language_code'...<a href=\"https://studio.twitter.com\" rel=\"nofo...NaNNone...544FalseFalseFalseenNaNNaNNaNNaNNaN
2Fri Nov 01 15:15:31 +0000 201911902862515109027871190286251510902787white comic fans hate watchmen. but black woma...True{'hashtags': [], 'symbols': [], 'user_mentions...{'result_type': 'popular', 'iso_language_code'...<a href=\"https://mobile.twitter.com\" rel=\"nofo...NaNNone...401FalseFalseNaNenNaNNaNNaNNaNNaN
3Sat Nov 02 23:27:42 +0000 201911907725031044136961190772503104413696@ProperOpinion Thought the same about the watc...False{'hashtags': [], 'symbols': [], 'user_mentions...{'iso_language_code': 'en', 'result_type': 're...<a href=\"http://twitter.com/download/iphone\" r...1.190757e+181190757443216785409...1FalseFalseNaNenNaNNaNNaNNaNNaN
4Sat Nov 02 23:27:42 +0000 201911907725000759296001190772500075929600@ReginaKing @HBO You’re awesome in The Leftove...False{'hashtags': [], 'symbols': [], 'user_mentions...{'iso_language_code': 'en', 'result_type': 're...<a href=\"http://twitter.com/download/iphone\" r...1.190723e+181190722756511334400...0FalseFalseNaNenNaNNaNNaNNaNNaN
..................................................................
95Sat Nov 02 22:59:19 +0000 201911907653601166213201190765360116621320RT @michaelharriot: Thread:\\n\\nA lot of white ...False{'hashtags': [], 'symbols': [], 'user_mentions...{'iso_language_code': 'en', 'result_type': 're...<a href=\"http://twitter.com/download/android\" ...NaNNone...0FalseFalseNaNen{'created_at': 'Tue Oct 22 02:24:21 +0000 2019...NaNNaNNaNNaN
96Sat Nov 02 22:59:06 +0000 201911907653050412113921190765305041211392RT @tvs_movies: #TheBatman: William Hoy Film E...False{'hashtags': [{'text': 'TheBatman', 'indices':...{'iso_language_code': 'en', 'result_type': 're...<a href=\"http://twitter.com/download/iphone\" r...NaNNone...0FalseFalseNaNen{'created_at': 'Sat Nov 02 12:50:53 +0000 2019...NaNNaNNaNNaN
97Sat Nov 02 22:58:38 +0000 201911907651875168296961190765187516829696RT @tvs_movies: #TheBatman: William Hoy Film E...False{'hashtags': [{'text': 'TheBatman', 'indices':...{'iso_language_code': 'en', 'result_type': 're...<a href=\"http://twitter.com/download/iphone\" r...NaNNone...0FalseFalseNaNen{'created_at': 'Sat Nov 02 12:50:53 +0000 2019...NaNNaNNaNNaN
98Sat Nov 02 22:58:18 +0000 201911907651039788933121190765103978893312The End Is the Beginning Is the End https://t....True{'hashtags': [], 'symbols': [], 'user_mentions...{'iso_language_code': 'en', 'result_type': 're...<a href=\"https://mobile.twitter.com\" rel=\"nofo...NaNNone...0FalseFalseFalseenNaNNaNNaNNaNNaN
99Sat Nov 02 22:57:38 +0000 201911907649358290206731190764935829020673Watchmen has a pretty amazing use of colorFalse{'hashtags': [], 'symbols': [], 'user_mentions...{'iso_language_code': 'en', 'result_type': 're...<a href=\"http://twitter.com/download/iphone\" r...NaNNone...0FalseFalseNaNenNaNNaNNaNNaNNaN
\n", + "

100 rows × 30 columns

\n", + "
" + ], + "text/plain": [ + " created_at id id_str \\\n", + "0 Fri Nov 01 17:00:06 +0000 2019 1190312570244214784 1190312570244214784 \n", + "1 Fri Nov 01 17:00:00 +0000 2019 1190312545510477825 1190312545510477825 \n", + "2 Fri Nov 01 15:15:31 +0000 2019 1190286251510902787 1190286251510902787 \n", + "3 Sat Nov 02 23:27:42 +0000 2019 1190772503104413696 1190772503104413696 \n", + "4 Sat Nov 02 23:27:42 +0000 2019 1190772500075929600 1190772500075929600 \n", + ".. ... ... ... \n", + "95 Sat Nov 02 22:59:19 +0000 2019 1190765360116621320 1190765360116621320 \n", + "96 Sat Nov 02 22:59:06 +0000 2019 1190765305041211392 1190765305041211392 \n", + "97 Sat Nov 02 22:58:38 +0000 2019 1190765187516829696 1190765187516829696 \n", + "98 Sat Nov 02 22:58:18 +0000 2019 1190765103978893312 1190765103978893312 \n", + "99 Sat Nov 02 22:57:38 +0000 2019 1190764935829020673 1190764935829020673 \n", + "\n", + " text truncated \\\n", + "0 Very excited to announce The Official @Watchme... True \n", + "1 The Official Watchmen Podcast is coming. Join ... True \n", + "2 white comic fans hate watchmen. but black woma... True \n", + "3 @ProperOpinion Thought the same about the watc... False \n", + "4 @ReginaKing @HBO You’re awesome in The Leftove... False \n", + ".. ... ... \n", + "95 RT @michaelharriot: Thread:\\n\\nA lot of white ... False \n", + "96 RT @tvs_movies: #TheBatman: William Hoy Film E... False \n", + "97 RT @tvs_movies: #TheBatman: William Hoy Film E... False \n", + "98 The End Is the Beginning Is the End https://t.... True \n", + "99 Watchmen has a pretty amazing use of color False \n", + "\n", + " entities \\\n", + "0 {'hashtags': [], 'symbols': [], 'user_mentions... \n", + "1 {'hashtags': [{'text': 'Chernobyl', 'indices':... \n", + "2 {'hashtags': [], 'symbols': [], 'user_mentions... \n", + "3 {'hashtags': [], 'symbols': [], 'user_mentions... \n", + "4 {'hashtags': [], 'symbols': [], 'user_mentions... \n", + ".. ... \n", + "95 {'hashtags': [], 'symbols': [], 'user_mentions... \n", + "96 {'hashtags': [{'text': 'TheBatman', 'indices':... \n", + "97 {'hashtags': [{'text': 'TheBatman', 'indices':... \n", + "98 {'hashtags': [], 'symbols': [], 'user_mentions... \n", + "99 {'hashtags': [], 'symbols': [], 'user_mentions... \n", + "\n", + " metadata \\\n", + "0 {'result_type': 'popular', 'iso_language_code'... \n", + "1 {'result_type': 'popular', 'iso_language_code'... \n", + "2 {'result_type': 'popular', 'iso_language_code'... \n", + "3 {'iso_language_code': 'en', 'result_type': 're... \n", + "4 {'iso_language_code': 'en', 'result_type': 're... \n", + ".. ... \n", + "95 {'iso_language_code': 'en', 'result_type': 're... \n", + "96 {'iso_language_code': 'en', 'result_type': 're... \n", + "97 {'iso_language_code': 'en', 'result_type': 're... \n", + "98 {'iso_language_code': 'en', 'result_type': 're... \n", + "99 {'iso_language_code': 'en', 'result_type': 're... \n", + "\n", + " source in_reply_to_status_id \\\n", + "0 \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
text
0Very excited to announce The Official @Watchme...
1The Official Watchmen Podcast is coming. Join ...
2white comic fans hate watchmen. but black woma...
3@ProperOpinion Thought the same about the watc...
4@ReginaKing @HBO You’re awesome in The Leftove...
......
95RT @michaelharriot: Thread:\\n\\nA lot of white ...
96RT @tvs_movies: #TheBatman: William Hoy Film E...
97RT @tvs_movies: #TheBatman: William Hoy Film E...
98The End Is the Beginning Is the End https://t....
99Watchmen has a pretty amazing use of color
\n", + "

100 rows × 1 columns

\n", + "" + ], + "text/plain": [ + " text\n", + "0 Very excited to announce The Official @Watchme...\n", + "1 The Official Watchmen Podcast is coming. Join ...\n", + "2 white comic fans hate watchmen. but black woma...\n", + "3 @ProperOpinion Thought the same about the watc...\n", + "4 @ReginaKing @HBO You’re awesome in The Leftove...\n", + ".. ...\n", + "95 RT @michaelharriot: Thread:\\n\\nA lot of white ...\n", + "96 RT @tvs_movies: #TheBatman: William Hoy Film E...\n", + "97 RT @tvs_movies: #TheBatman: William Hoy Film E...\n", + "98 The End Is the Beginning Is the End https://t....\n", + "99 Watchmen has a pretty amazing use of color\n", + "\n", + "[100 rows x 1 columns]" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Convertirlo a dataframe\n", + "tweetsdf= pd.DataFrame(text)\n", + "tweetsdf" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [], + "source": [ + "#función para clasificar tweets\n", + "def get_tweet_sentiment(tweet): \n", + " ''' \n", + " Utility function to classify sentiment of passed tweet \n", + " using textblob's sentiment method \n", + " '''\n", + " # create TextBlob object of passed tweet text \n", + " analysis = TextBlob(tweet) \n", + " # set sentiment \n", + " if analysis.sentiment.polarity > 0: \n", + " return 'positive'\n", + " elif analysis.sentiment.polarity == 0: \n", + " return 'neutral'\n", + " else: \n", + " return 'negative'" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [], + "source": [ + "#Aplicar tweets a todo\n", + "sentiments = tweetsdf['text'].apply(get_tweet_sentiment)" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
textSentimentssentiments
0Very excited to announce The Official @Watchme...positivepositive
1The Official Watchmen Podcast is coming. Join ...neutralneutral
2white comic fans hate watchmen. but black woma...negativenegative
3RT @verge: What DC Comics needs to learn from ...negativenegative
4RT @SnyderContent: Zack Snyder's #Watchmen: Th...neutralneutral
5I have almost zero clue what is happening on W...positivepositive
6@firststartrmh I think I shall watch Xmas Rob ...positivepositive
7@megankatenelson @watchmen I am! I have a lot...neutralneutral
8A must watch for anyone watching #Watchmen \\na...positivepositive
9@ReginaKing @HBO I love this show more than my...positivepositive
10RT @BSpodNetwork: Want to listen to an intelli...positivepositive
11Want to listen to an intelligent discussion? C...positivepositive
12I’m about to rewatch the Watchmen movie.....ne...neutralneutral
13Yes sis!! @sadiquabynum https://t.co/6bRKYVFGa5neutralneutral
14@ABatsoulis @MoviesMatrix Yeah im not snyder f...neutralneutral
\n", + "
" + ], + "text/plain": [ + " text Sentiments sentiments\n", + "0 Very excited to announce The Official @Watchme... positive positive\n", + "1 The Official Watchmen Podcast is coming. Join ... neutral neutral\n", + "2 white comic fans hate watchmen. but black woma... negative negative\n", + "3 RT @verge: What DC Comics needs to learn from ... negative negative\n", + "4 RT @SnyderContent: Zack Snyder's #Watchmen: Th... neutral neutral\n", + "5 I have almost zero clue what is happening on W... positive positive\n", + "6 @firststartrmh I think I shall watch Xmas Rob ... positive positive\n", + "7 @megankatenelson @watchmen I am! I have a lot... neutral neutral\n", + "8 A must watch for anyone watching #Watchmen \\na... positive positive\n", + "9 @ReginaKing @HBO I love this show more than my... positive positive\n", + "10 RT @BSpodNetwork: Want to listen to an intelli... positive positive\n", + "11 Want to listen to an intelligent discussion? C... positive positive\n", + "12 I’m about to rewatch the Watchmen movie.....ne... neutral neutral\n", + "13 Yes sis!! @sadiquabynum https://t.co/6bRKYVFGa5 neutral neutral\n", + "14 @ABatsoulis @MoviesMatrix Yeah im not snyder f... neutral neutral" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tweetsdf['sentiments'] = sentiments\n", + "tweetsdf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def main(): \n", + " # creating object of TwitterClient Class \n", + " api = TwitterClient() \n", + " # calling function to get tweets \n", + " tweets = api.get_tweets(query = 'Donald Trump', count = 200) \n", + " \n", + " # picking positive tweets from tweets \n", + " ptweets = [tweet for tweet in tweets if tweet['sentiment'] == 'positive'] \n", + " # percentage of positive tweets \n", + " print(\"Positive tweets percentage: {} %\".format(100*len(ptweets)/len(tweets))) \n", + " # picking negative tweets from tweets \n", + " ntweets = [tweet for tweet in tweets if tweet['sentiment'] == 'negative'] \n", + " # percentage of negative tweets \n", + " print(\"Negative tweets percentage: {} %\".format(100*len(ntweets)/len(tweets))) \n", + " # percentage of neutral tweets \n", + " print(\"Neutral tweets percentage: {} % \\ \n", + " \".format(100*len(tweets - ntweets - ptweets)/len(tweets))) \n", + " \n", + " # printing first 5 positive tweets \n", + " print(\"\\n\\nPositive tweets:\") \n", + " for tweet in ptweets[:10]: \n", + " print(tweet['text']) \n", + " \n", + " # printing first 5 negative tweets \n", + " print(\"\\n\\nNegative tweets:\") \n", + " for tweet in ntweets[:10]: \n", + " print(tweet['text']) \n", + " \n", + "if __name__ == \"__main__\": \n", + " # calling main function \n", + " main() " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tweets = api.get_tweets(query = 'Donald Trump', count = 200)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/your-code/Lab- proyecto API&Web.ipynb b/your-code/Lab- proyecto API&Web.ipynb new file mode 100644 index 0000000..1a6a6e6 --- /dev/null +++ b/your-code/Lab- proyecto API&Web.ipynb @@ -0,0 +1,1167 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import requests\n", + "import pandas as pd\n", + "from pandas.io.json import json_normalize\n", + "import tweepy\n", + "import re \n", + "from tweepy import OAuthHandler \n", + "from textblob import TextBlob\n", + "import requests\n", + "from bs4 import BeautifulSoup\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def get_top_five():\n", + " #Parsing del link\n", + " url = 'https://www.imdb.com/chart/tvmeter?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=4da9d9a5-d299-43f2-9c53-f0efa18182cd&pf_rd_r=0KYJP3BJH5DN6EZWRQ5Z&pf_rd_s=right-4&pf_rd_t=15506&pf_rd_i=toptv&ref_=chttvtp_ql_5'\n", + " soup = requests.get(url).content\n", + " soup = BeautifulSoup(soup,'html.parser')\n", + " \n", + " #Conseguir los títulos de las series más populares\n", + " popularTV = soup.select(\"td.titleColumn a[href^='/title']\")\n", + " popularTV = [name.text for name in popularTV]\n", + " popularTV = list(map(lambda x:x.strip(),popularTV))\n", + " top5 = popularTV[:6]\n", + " \n", + " #Conseguir el score de las series más populares\n", + " scores = soup.select(\"td.ratingColumn strong\")\n", + " score = [score.text for score in scores]\n", + " top5score = score[:6]\n", + " \n", + " #Crear el dataframe de las series más populares en IMDB\n", + " columns = [\"Name\", \"Score\"]\n", + " dicttop = {'Name':top5,'Score':top5score}\n", + "\n", + " global top5df\n", + " \n", + " top5df = pd.DataFrame(dicttop, columns=columns)\n", + " return top5df" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameScore
0Watchmen6.8
1Cómo vivir contigo mismo7.4
2Peaky Blinders8.8
3The Walking Dead8.3
4Modern Love8.1
5American Horror Story8.1
\n", + "
" + ], + "text/plain": [ + " Name Score\n", + "0 Watchmen 6.8\n", + "1 Cómo vivir contigo mismo 7.4\n", + "2 Peaky Blinders 8.8\n", + "3 The Walking Dead 8.3\n", + "4 Modern Love 8.1\n", + "5 American Horror Story 8.1" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_top_five()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def access_Twitter_API(): \n", + " # Mis credenciales \n", + " API_KEY = \"CRuEovkPCCTmYVAC8PbndD9N3\"\n", + " API_SECRET = \"SikynKdtF2NpEDpx9xvdtuS9wMHbgQXFAyCq5qj1HYbCJfnhEZ\"\n", + " ACCESS_TOKEN = \"79345899-q0Yb2A2BOfNXkNEjqdFU474r8yARK2WkUqhlTYDjY\"\n", + " ACCESS_TOKEN_SECRET = \"OUD4HI3o11cc4VC2TkTcxSYWTQAcjsh2mlni3SGlkA3fy\"\n", + "\n", + "\n", + " # Intento de conexión\n", + " try: \n", + " auth = tweepy.OAuthHandler(API_KEY, API_SECRET)\n", + " auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)\n", + " global api\n", + " api = tweepy.API(auth)\n", + " return api\n", + " except: \n", + " print(\"Error: verifica tus credenciales\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "access_Twitter_API()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "#Convertir a json dataframe\n", + "def get_text(json):\n", + " global texts\n", + " global df\n", + " texts = [pd.Series(i._json) for i in json]\n", + " df = pd.DataFrame(texts)\n", + " return df\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "#Buscar los mejores tweets de una serie\n", + "def get_best_tweets(serie):\n", + " try:\n", + " global best_tweets\n", + " best_tweets = api.search(q=serie, lang = 'en', count = 50, result_type='mixed')\n", + " global tweets\n", + " tweets = get_text(best_tweets)\n", + " return tweets\n", + " except tweepy.TweepError as e: \n", + " print(\"Error : \" + str(e)) " + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "#Dejar solo texto y convertirlo a dataframe\n", + "def only_text_column(column):\n", + " global tweets_to_text\n", + " tweets_to_text = tweets['text']\n", + " global tweetsdf\n", + " tweetsdf = pd.DataFrame(tweets_to_text)\n", + " return tweetsdf\n" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "def todo(serie):\n", + " return only_text_column(get_best_tweets(serie))" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
text
0Are you ready to venture into the great beyond...
1It's only just begun. #WatchmenHBO https://t.c...
2What DC Comics needs to learn from Joker’s suc...
3RT @Firannion: @neilhimself There needs to be ...
4RT @watchmen: Are you ready to venture into th...
5RT @Firannion: @neilhimself There needs to be ...
6RT @55mmbae: The opening scene of watchmen was...
7RT @Salon: \"Watchmen\" creator and star: This i...
8Last week's #WatchmenHBO was full of tiny deta...
9The Affair Comes to an End, Power Heads for Hi...
10RT @PAPPADEMAS: hearing Alan Moore has decided...
11RT @thelindsayellis: It’s a shame that the new...
12RT @DCComics: Who's watching @Watchmen? Dive i...
13@AkakpoJamal @watchmen @HBO You're an actual c...
14Giving Watchmen a chance.
15RT @jilevin: \"Watchmen\" creator and star: This...
16@AyeNellz_ Facts, indeed! Random question but ...
17RT @michaelharriot: Thread:\\n\\nA lot of white ...
18Are the rorschach mask supposed to represent t...
19... and that shall suffice for today. Another ...
20@Gary_Gillatt Watchmen worth a look then? I’ve...
21RT @55mmbae: The opening scene of watchmen was...
22@ReignOfApril @watchmen @HBO I’m jealous! How ...
23RT @Minxysshoes: @ReignOfApril @watchmen @HBO ...
24RT @michaelharriot: Thread:\\n\\nA lot of white ...
25RT @vitargaryen13: \"We don’t do lollipops and ...
26RT @jilevin: \"Watchmen\" creator and star: This...
27RT @jilevin: \"Watchmen\" creator and star: This...
28FINALLY watching Watchmen
29@Xo_Shereen_Xo Well I'd say \"Peaky Blinders\" i...
30Estoy viendo Watchmen 1x02 \"Martial Feats of C...
31https://t.co/EpxKLAXshI\\n\\nIf you're watching ...
32RT @jilevin: \"Watchmen\" creator and star: This...
33RT @thelindsayellis: It’s a shame that the new...
34@StuGilmore Did you see the Watchmen series ye...
35\"Watchmen\" creator and star: This is America's...
36RT @watchmen: \"Breathtaking, first class enter...
37\"Watchmen\" creator and star: This is America's...
38RT @watchmen: Are you ready to venture into th...
39We ready for watchmen tonight !?!?
40RT @SoultzKim: 😉 q is very helpful, but Our Cr...
41Watchmen is a bit fucking good innit?
42Watchmen got interrupted so now I'm on to ... ...
43Been home sick all day so I already finished a...
44it's watchmen day!!! 😎
45\"We don’t do lollipops and rainbows. Because w...
46HBO Watchmen Review: Better Than the Movie So ...
47@carigervin There’s a great website called The...
48@EWDocJensen @LByock That was an incredible ep...
49Anatomy of a Scene: How 'Watchmen' Director Ni...
\n", + "
" + ], + "text/plain": [ + " text\n", + "0 Are you ready to venture into the great beyond...\n", + "1 It's only just begun. #WatchmenHBO https://t.c...\n", + "2 What DC Comics needs to learn from Joker’s suc...\n", + "3 RT @Firannion: @neilhimself There needs to be ...\n", + "4 RT @watchmen: Are you ready to venture into th...\n", + "5 RT @Firannion: @neilhimself There needs to be ...\n", + "6 RT @55mmbae: The opening scene of watchmen was...\n", + "7 RT @Salon: \"Watchmen\" creator and star: This i...\n", + "8 Last week's #WatchmenHBO was full of tiny deta...\n", + "9 The Affair Comes to an End, Power Heads for Hi...\n", + "10 RT @PAPPADEMAS: hearing Alan Moore has decided...\n", + "11 RT @thelindsayellis: It’s a shame that the new...\n", + "12 RT @DCComics: Who's watching @Watchmen? Dive i...\n", + "13 @AkakpoJamal @watchmen @HBO You're an actual c...\n", + "14 Giving Watchmen a chance.\n", + "15 RT @jilevin: \"Watchmen\" creator and star: This...\n", + "16 @AyeNellz_ Facts, indeed! Random question but ...\n", + "17 RT @michaelharriot: Thread:\\n\\nA lot of white ...\n", + "18 Are the rorschach mask supposed to represent t...\n", + "19 ... and that shall suffice for today. Another ...\n", + "20 @Gary_Gillatt Watchmen worth a look then? I’ve...\n", + "21 RT @55mmbae: The opening scene of watchmen was...\n", + "22 @ReignOfApril @watchmen @HBO I’m jealous! How ...\n", + "23 RT @Minxysshoes: @ReignOfApril @watchmen @HBO ...\n", + "24 RT @michaelharriot: Thread:\\n\\nA lot of white ...\n", + "25 RT @vitargaryen13: \"We don’t do lollipops and ...\n", + "26 RT @jilevin: \"Watchmen\" creator and star: This...\n", + "27 RT @jilevin: \"Watchmen\" creator and star: This...\n", + "28 FINALLY watching Watchmen\n", + "29 @Xo_Shereen_Xo Well I'd say \"Peaky Blinders\" i...\n", + "30 Estoy viendo Watchmen 1x02 \"Martial Feats of C...\n", + "31 https://t.co/EpxKLAXshI\\n\\nIf you're watching ...\n", + "32 RT @jilevin: \"Watchmen\" creator and star: This...\n", + "33 RT @thelindsayellis: It’s a shame that the new...\n", + "34 @StuGilmore Did you see the Watchmen series ye...\n", + "35 \"Watchmen\" creator and star: This is America's...\n", + "36 RT @watchmen: \"Breathtaking, first class enter...\n", + "37 \"Watchmen\" creator and star: This is America's...\n", + "38 RT @watchmen: Are you ready to venture into th...\n", + "39 We ready for watchmen tonight !?!?\n", + "40 RT @SoultzKim: 😉 q is very helpful, but Our Cr...\n", + "41 Watchmen is a bit fucking good innit?\n", + "42 Watchmen got interrupted so now I'm on to ... ...\n", + "43 Been home sick all day so I already finished a...\n", + "44 it's watchmen day!!! 😎\n", + "45 \"We don’t do lollipops and rainbows. Because w...\n", + "46 HBO Watchmen Review: Better Than the Movie So ...\n", + "47 @carigervin There’s a great website called The...\n", + "48 @EWDocJensen @LByock That was an incredible ep...\n", + "49 Anatomy of a Scene: How 'Watchmen' Director Ni..." + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list1 = todo(top5df.iloc[0]['Name'])\n", + "list1\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Esto no sale :( y no sé por qué porque con list1 sí sale, pero con las otras ya no. \n", + "list1,list2,list3,list4,list5 = top5df['Name'].apply(todo)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "#función para clasificar tweets\n", + "def get_tweet_sentiment(tweet): \n", + " ''' \n", + " Utility function to classify sentiment of passed tweet \n", + " using textblob's sentiment method \n", + " '''\n", + " # create TextBlob object of passed tweet text \n", + " analysis = TextBlob(tweet) \n", + " # set sentiment \n", + " if analysis.sentiment.polarity > 0: \n", + " return 'positive'\n", + " elif analysis.sentiment.polarity == 0: \n", + " return 'neutral'\n", + " else: \n", + " return 'negative'" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "#Aplicar sentimientos a todo\n", + "sentiments = tweetsdf['text'].apply(get_tweet_sentiment)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
textsentiments
0Are you ready to venture into the great beyond...positive
1It's only just begun. #WatchmenHBO https://t.c...neutral
2What DC Comics needs to learn from Joker’s suc...positive
3RT @Firannion: @neilhimself There needs to be ...positive
4RT @watchmen: Are you ready to venture into th...positive
5RT @Firannion: @neilhimself There needs to be ...positive
6RT @55mmbae: The opening scene of watchmen was...neutral
7RT @Salon: \"Watchmen\" creator and star: This i...negative
8Last week's #WatchmenHBO was full of tiny deta...negative
9The Affair Comes to an End, Power Heads for Hi...positive
10RT @PAPPADEMAS: hearing Alan Moore has decided...negative
11RT @thelindsayellis: It’s a shame that the new...positive
12RT @DCComics: Who's watching @Watchmen? Dive i...negative
13@AkakpoJamal @watchmen @HBO You're an actual c...positive
14Giving Watchmen a chance.neutral
15RT @jilevin: \"Watchmen\" creator and star: This...negative
16@AyeNellz_ Facts, indeed! Random question but ...negative
17RT @michaelharriot: Thread:\\n\\nA lot of white ...negative
18Are the rorschach mask supposed to represent t...neutral
19... and that shall suffice for today. Another ...positive
20@Gary_Gillatt Watchmen worth a look then? I’ve...positive
21RT @55mmbae: The opening scene of watchmen was...neutral
22@ReignOfApril @watchmen @HBO I’m jealous! How ...positive
23RT @Minxysshoes: @ReignOfApril @watchmen @HBO ...neutral
24RT @michaelharriot: Thread:\\n\\nA lot of white ...negative
25RT @vitargaryen13: \"We don’t do lollipops and ...positive
26RT @jilevin: \"Watchmen\" creator and star: This...negative
27RT @jilevin: \"Watchmen\" creator and star: This...negative
28FINALLY watching Watchmenneutral
29@Xo_Shereen_Xo Well I'd say \"Peaky Blinders\" i...positive
30Estoy viendo Watchmen 1x02 \"Martial Feats of C...neutral
31https://t.co/EpxKLAXshI\\n\\nIf you're watching ...positive
32RT @jilevin: \"Watchmen\" creator and star: This...negative
33RT @thelindsayellis: It’s a shame that the new...positive
34@StuGilmore Did you see the Watchmen series ye...neutral
35\"Watchmen\" creator and star: This is America's...negative
36RT @watchmen: \"Breathtaking, first class enter...positive
37\"Watchmen\" creator and star: This is America's...negative
38RT @watchmen: Are you ready to venture into th...positive
39We ready for watchmen tonight !?!?positive
40RT @SoultzKim: 😉 q is very helpful, but Our Cr...positive
41Watchmen is a bit fucking good innit?positive
42Watchmen got interrupted so now I'm on to ... ...neutral
43Been home sick all day so I already finished a...negative
44it's watchmen day!!! 😎neutral
45\"We don’t do lollipops and rainbows. Because w...positive
46HBO Watchmen Review: Better Than the Movie So ...positive
47@carigervin There’s a great website called The...positive
48@EWDocJensen @LByock That was an incredible ep...positive
49Anatomy of a Scene: How 'Watchmen' Director Ni...neutral
\n", + "
" + ], + "text/plain": [ + " text sentiments\n", + "0 Are you ready to venture into the great beyond... positive\n", + "1 It's only just begun. #WatchmenHBO https://t.c... neutral\n", + "2 What DC Comics needs to learn from Joker’s suc... positive\n", + "3 RT @Firannion: @neilhimself There needs to be ... positive\n", + "4 RT @watchmen: Are you ready to venture into th... positive\n", + "5 RT @Firannion: @neilhimself There needs to be ... positive\n", + "6 RT @55mmbae: The opening scene of watchmen was... neutral\n", + "7 RT @Salon: \"Watchmen\" creator and star: This i... negative\n", + "8 Last week's #WatchmenHBO was full of tiny deta... negative\n", + "9 The Affair Comes to an End, Power Heads for Hi... positive\n", + "10 RT @PAPPADEMAS: hearing Alan Moore has decided... negative\n", + "11 RT @thelindsayellis: It’s a shame that the new... positive\n", + "12 RT @DCComics: Who's watching @Watchmen? Dive i... negative\n", + "13 @AkakpoJamal @watchmen @HBO You're an actual c... positive\n", + "14 Giving Watchmen a chance. neutral\n", + "15 RT @jilevin: \"Watchmen\" creator and star: This... negative\n", + "16 @AyeNellz_ Facts, indeed! Random question but ... negative\n", + "17 RT @michaelharriot: Thread:\\n\\nA lot of white ... negative\n", + "18 Are the rorschach mask supposed to represent t... neutral\n", + "19 ... and that shall suffice for today. Another ... positive\n", + "20 @Gary_Gillatt Watchmen worth a look then? I’ve... positive\n", + "21 RT @55mmbae: The opening scene of watchmen was... neutral\n", + "22 @ReignOfApril @watchmen @HBO I’m jealous! How ... positive\n", + "23 RT @Minxysshoes: @ReignOfApril @watchmen @HBO ... neutral\n", + "24 RT @michaelharriot: Thread:\\n\\nA lot of white ... negative\n", + "25 RT @vitargaryen13: \"We don’t do lollipops and ... positive\n", + "26 RT @jilevin: \"Watchmen\" creator and star: This... negative\n", + "27 RT @jilevin: \"Watchmen\" creator and star: This... negative\n", + "28 FINALLY watching Watchmen neutral\n", + "29 @Xo_Shereen_Xo Well I'd say \"Peaky Blinders\" i... positive\n", + "30 Estoy viendo Watchmen 1x02 \"Martial Feats of C... neutral\n", + "31 https://t.co/EpxKLAXshI\\n\\nIf you're watching ... positive\n", + "32 RT @jilevin: \"Watchmen\" creator and star: This... negative\n", + "33 RT @thelindsayellis: It’s a shame that the new... positive\n", + "34 @StuGilmore Did you see the Watchmen series ye... neutral\n", + "35 \"Watchmen\" creator and star: This is America's... negative\n", + "36 RT @watchmen: \"Breathtaking, first class enter... positive\n", + "37 \"Watchmen\" creator and star: This is America's... negative\n", + "38 RT @watchmen: Are you ready to venture into th... positive\n", + "39 We ready for watchmen tonight !?!? positive\n", + "40 RT @SoultzKim: 😉 q is very helpful, but Our Cr... positive\n", + "41 Watchmen is a bit fucking good innit? positive\n", + "42 Watchmen got interrupted so now I'm on to ... ... neutral\n", + "43 Been home sick all day so I already finished a... negative\n", + "44 it's watchmen day!!! 😎 neutral\n", + "45 \"We don’t do lollipops and rainbows. Because w... positive\n", + "46 HBO Watchmen Review: Better Than the Movie So ... positive\n", + "47 @carigervin There’s a great website called The... positive\n", + "48 @EWDocJensen @LByock That was an incredible ep... positive\n", + "49 Anatomy of a Scene: How 'Watchmen' Director Ni... neutral" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Asignar resultado al dataframe\n", + "tweetsdf['sentiments'] = sentiments" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [], + "source": [ + "def get_positive_percentages(column):\n", + " global ltweets\n", + " ltweets = len(column)\n", + " \n", + " # picking positive tweets from column \n", + " global ptweets\n", + " ptweets = [tweet for tweet in column if tweet == 'positive']\n", + " global lptweets\n", + " lptweets = len(ptweets)\n", + " global percentagePos\n", + " percentagePos = 100*len(ptweets)/ltweets\n", + " \n", + " # percentage of positive tweets \n", + " return(\"Positive tweets percentage: {} %\".format(100*len(ptweets)/ltweets)) \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [], + "source": [ + "def get_negative_percentages(column):\n", + " global lntweets\n", + " ltweets = len(column)\n", + " \n", + " # picking negative tweets from column \n", + " global ntweets\n", + " ntweets = [tweet for tweet in column if tweet == 'negative'] \n", + " global lntweets\n", + " lntweets = len(ntweets)\n", + " global percentageNeg\n", + " percentageNeg = 100*len(ntweets)/ltweets\n", + " \n", + " # percentage of negative tweets \n", + " return(\"Negative tweets percentage: {} %\".format(100*len(ntweets)/ltweets)) " + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Negative tweets percentage: 28.0 %'" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_negative_percentages(tweetsdf['sentiments'])" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Positive tweets percentage: 48.0 %'" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_positive_percentages(tweetsdf['sentiments'])" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [], + "source": [ + "#La idea es que cada serie tenga el score y al lado el porcentaje de tweets postivos y tweets negativos\n", + "#para ver si había una relación, pero no me dio la vida ni el cerebro para hacerlo para cada serie porque mi \n", + "#apply no jaló y ya no quería llorar.\n", + "\n", + "top5df['% tweets positivos'] = percentagePos" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [], + "source": [ + "top5df['% tweets negativos'] = percentageNeg" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameScore% tweets positivos% tweets negativos
0Watchmen6.848.028.0
1Cómo vivir contigo mismo7.448.028.0
2Peaky Blinders8.848.028.0
3The Walking Dead8.348.028.0
4Modern Love8.148.028.0
5American Horror Story8.148.028.0
\n", + "
" + ], + "text/plain": [ + " Name Score % tweets positivos % tweets negativos\n", + "0 Watchmen 6.8 48.0 28.0\n", + "1 Cómo vivir contigo mismo 7.4 48.0 28.0\n", + "2 Peaky Blinders 8.8 48.0 28.0\n", + "3 The Walking Dead 8.3 48.0 28.0\n", + "4 Modern Love 8.1 48.0 28.0\n", + "5 American Horror Story 8.1 48.0 28.0" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Ejemplo a medias de cómo sería la tabla final.\n", + "top5df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/your-code/Scraping.ipynb b/your-code/Scraping.ipynb new file mode 100644 index 0000000..b697059 --- /dev/null +++ b/your-code/Scraping.ipynb @@ -0,0 +1,318 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "import pandas as pd\n", + "from pprint import pprint\n", + "from lxml import html\n", + "from lxml.html import fromstring\n", + "import urllib.request\n", + "from urllib.request import urlopen\n", + "import random\n", + "import re\n", + "import scrapy" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "url = 'https://www.imdb.com/title/tt0804503/'" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "soup = requests.get(url).content\n", + "soup = BeautifulSoup(soup,'html.parser')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Jon Hamm',\n", + " 'Elisabeth Moss',\n", + " 'Vincent Kartheiser',\n", + " 'January Jones',\n", + " 'Christina Hendricks',\n", + " 'Aaron Staton',\n", + " 'Rich Sommer',\n", + " 'John Slattery',\n", + " 'Kiernan Shipka',\n", + " 'Robert Morse',\n", + " 'Christopher Stanley',\n", + " 'Jessica Paré',\n", + " 'Jay R. Ferguson',\n", + " 'Michael Gladis',\n", + " 'Bryan Batt',\n", + " 'Alison Brie']" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Lista del casr\n", + "cast = soup.select(\"table.cast_list a[href^='/name']\")\n", + "names = [name.text for name in cast]\n", + "names = list(map(lambda x:x.strip(),repositories))\n", + "final_names = [text for text in names if len(text)>1]\n", + "final_names" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Don Draper',\n", + " 'Peggy Olson',\n", + " 'Pete Campbell',\n", + " 'Betty Francis',\n", + " 'Joan Harris',\n", + " 'Ken Cosgrove',\n", + " 'Harry Crane',\n", + " 'Roger Sterling',\n", + " 'Sally Draper',\n", + " 'Bertram Cooper',\n", + " 'Henry Francis',\n", + " 'Megan Draper',\n", + " 'Stan Rizzo',\n", + " 'Paul Kinsey',\n", + " 'Salvatore Romano',\n", + " 'Trudy Campbell']" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Lista de personajes\n", + "characters = soup.select(\"td.character a[href^='/title']\")\n", + "characters = [name.text for name in characters]\n", + "characters" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['92 episodes',\n", + " '92 episodes',\n", + " '92 episodes',\n", + " '92 episodes',\n", + " '92 episodes',\n", + " '92 episodes',\n", + " '92 episodes',\n", + " '89 episodes',\n", + " '89 episodes',\n", + " '74 episodes',\n", + " '54 episodes',\n", + " '49 episodes',\n", + " '46 episodes',\n", + " '40 episodes',\n", + " '39 episodes',\n", + " '38 episodes']" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Lista de episodios por personaje\n", + "episodes = soup.select(\"a.toggle-episodes\")\n", + "episodes = [episode.text for episode in episodes]\n", + "number_episodes = []\n", + "for i in episodes:\n", + " number_episodes.append(i.split(\",\")[0])\n", + "number_episodes\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['7', '6', '5', '4', '3', '2', '1']" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "seasons = soup.select(\"div.seasons-and-year-nav a[href*='season']\")\n", + "seasons = [season.text for season in seasons]\n", + "seasons\n" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['2015', '2014', '2013', '2012', '2010', '2009']" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "years = soup.select(\"div.seasons-and-year-nav a[href*='year']\")\n", + "years = [year.text for year in years]\n", + "years" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[], [], [], [], [], [], [], [], [], [], [], []]" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ratingValue" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " None,\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " [],\n", + " []]" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/your-code/Scraping2.ipynb b/your-code/Scraping2.ipynb new file mode 100644 index 0000000..70e117d --- /dev/null +++ b/your-code/Scraping2.ipynb @@ -0,0 +1,166 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "url = 'https://www.imdb.com/chart/tvmeter?pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=4da9d9a5-d299-43f2-9c53-f0efa18182cd&pf_rd_r=0KYJP3BJH5DN6EZWRQ5Z&pf_rd_s=right-4&pf_rd_t=15506&pf_rd_i=toptv&ref_=chttvtp_ql_5'" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "soup = requests.get(url).content\n", + "soup = BeautifulSoup(soup,'html.parser')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "#Las series más populares\n", + "popularTV = soup.select(\"td.titleColumn a[href^='/title']\")\n", + "popularTV = [name.text for name in popularTV]\n", + "popularTV = list(map(lambda x:x.strip(),popularTV))\n", + "top20 = popularTV[:11]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "#Score\n", + "scores = soup.select(\"td.ratingColumn strong\")\n", + "score = [score.text for score in scores]\n", + "top20score = score[:11]\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Watchmen',\n", + " 'Cómo vivir contigo mismo',\n", + " 'Peaky Blinders',\n", + " 'The Walking Dead',\n", + " 'Modern Love',\n", + " 'American Horror Story',\n", + " 'Breaking Bad',\n", + " 'Daybreak',\n", + " 'Batwoman',\n", + " 'Supernatural',\n", + " 'Castle Rock']" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top20\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['6.8', '7.4', '8.8', '8.3', '8.1', '8.1', '9.5', '6.8', '3.2', '8.4', '7.7']" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top20score\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}