ta-data-mex · xduarde · Jul 22, 2019 · Jul 22, 2019
diff --git a/README.md b/README.md
diff --git a/your-code/.ipynb_checkpoints/API & Web Scrapping-checkpoint.ipynb b/your-code/.ipynb_checkpoints/API & Web Scrapping-checkpoint.ipynb
@@ -0,0 +1,165 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "!{sys.executable} -m pip install requests pandas lxml html5lib bs4 tweepy\n",
+    "import tweepy\n",
+    "import pandas as pd\n",
+    "import requests\n",
+    "from bs4 import BeautifulSoup\n",
+    "from pandas.io.json import json_normalize"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "API_KEY = '***'\n",
+    "API_SECRET = '***'\n",
+    "ACCESS_TOKEN = '***'\n",
+    "ACCESS_TOKEN_SECRET = '***'\n",
+    "\n",
+    "auth = tweepy.OAuthHandler(API_KEY, API_SECRET)\n",
+    "auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)\n",
+    "api = tweepy.API(auth, wait_on_rate_limit=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "search_tw = tweepy.Cursor(api.search, q='chapo -filter:retweets', since='2019-07-17').items(100)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tweets_chapo = [\n",
+    "    [tweets.user.screen_name, tweets.user.location, tweets.text, str(tweets.created_at), tweets.user.favourites_count] \n",
+    "    for tweets in search_tw\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tweets_chapo = pd.DataFrame(data=tweets_chapo, \n",
+    "                    columns=['User', \"Location\", \"Tweet\", \"Date/Time\", \"Likes\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tweets_chapo.to_csv('Tweets_Chapo_API.csv', index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "get_data_narco = requests.get('https://en.wikipedia.org/wiki/List_of_Mexico%27s_37_most-wanted_drug_lords').content\n",
+    "get_data_narco = BeautifulSoup(get_data_narco, 'lxml')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_narco = get_data_narco.find_all('table', {'class':'wikitable sortable'})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rows_narco1 = [row.find_all('tr') for row in data_narco]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "rows_narco2 = [r.text.strip().split(\"\\n\") for row in rows_narco1 for r in row]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "colnames = rows_narco2[0]\n",
+    "data_narcos = rows_narco2[1:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "narcos_df = pd.DataFrame(data_narcos, columns=colnames)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "narcos_df.to_csv('Mexican_Narcos_WebScrapping.csv', index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/your-code/.ipynb_checkpoints/Funciones API & Web Scrapping-checkpoint.ipynb b/your-code/.ipynb_checkpoints/Funciones API & Web Scrapping-checkpoint.ipynb
@@ -0,0 +1,126 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: requests in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (2.21.0)\n",
+      "Requirement already satisfied: pandas in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (0.24.2)\n",
+      "Requirement already satisfied: lxml in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (4.3.2)\n",
+      "Requirement already satisfied: html5lib in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (1.0.1)\n",
+      "Requirement already satisfied: bs4 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (0.0.1)\n",
+      "Requirement already satisfied: tweepy in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (3.8.0)\n",
+      "Requirement already satisfied: idna<2.9,>=2.5 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from requests) (2.8)\n",
+      "Requirement already satisfied: urllib3<1.25,>=1.21.1 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from requests) (1.24.1)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from requests) (2019.3.9)\n",
+      "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from requests) (3.0.4)\n",
+      "Requirement already satisfied: pytz>=2011k in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from pandas) (2018.9)\n",
+      "Requirement already satisfied: numpy>=1.12.0 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from pandas) (1.16.2)\n",
+      "Requirement already satisfied: python-dateutil>=2.5.0 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from pandas) (2.8.0)\n",
+      "Requirement already satisfied: webencodings in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from html5lib) (0.5.1)\n",
+      "Requirement already satisfied: six>=1.9 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from html5lib) (1.12.0)\n",
+      "Requirement already satisfied: beautifulsoup4 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from bs4) (4.7.1)\n",
+      "Requirement already satisfied: requests-oauthlib>=0.7.0 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from tweepy) (1.2.0)\n",
+      "Requirement already satisfied: PySocks>=1.5.7 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from tweepy) (1.6.8)\n",
+      "Requirement already satisfied: soupsieve>=1.2 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from beautifulsoup4->bs4) (1.8)\n",
+      "Requirement already satisfied: oauthlib>=3.0.0 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from requests-oauthlib>=0.7.0->tweepy) (3.0.2)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sys\n",
+    "!{sys.executable} -m pip install requests pandas lxml html5lib bs4 tweepy\n",
+    "import tweepy\n",
+    "import pandas as pd\n",
+    "import requests\n",
+    "import re\n",
+    "from bs4 import BeautifulSoup\n",
+    "from pandas.io.json import json_normalize"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def Twitter_API(API_KEY, API_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET, filter_tweets, since_date, until_date, total_tweets):\n",
+    "    auth = tweepy.OAuthHandler(API_KEY, API_SECRET)\n",
+    "    auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)\n",
+    "    api = tweepy.API(auth, wait_on_rate_limit=True)\n",
+    "\n",
+    "    search_tw = tweepy.Cursor(api.search, q=filter_tweets, since=since_date, until=until_date).items(total_tweets)\n",
+    "    \n",
+    "    tweets_chapo = [\n",
+    "        [tweets.user.screen_name, tweets.user.location, tweets.text, str(tweets.created_at), tweets.user.favourites_count] \n",
+    "        for tweets in search_tw\n",
+    "    ]\n",
+    "\n",
+    "    tweets_chapo = pd.DataFrame(data=tweets_chapo, \n",
+    "                        columns=['User', \"Location\", \"Tweet\", \"Date/Time\", \"Likes\"])\n",
+    "    tweets_chapo = tweets_chapo.sort_values(by='Likes', ascending=False)\n",
+    "    tweets_chapo.to_csv('TweetsChapo.csv', index=False)\n",
+    "    \n",
+    "Twitter_API('***',\n",
+    "            '***',\n",
+    "            '***',\n",
+    "            '***', \n",
+    "            'chapo -filter:retweets', \n",
+    "            '2019-07-17',\n",
+    "            '2019-07-19',\n",
+    "             5000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def Project_WebScrapping(url):\n",
+    "    get_data_narco = requests.get(url).content\n",
+    "    get_data_narco = BeautifulSoup(get_data_narco, 'lxml')\n",
+    "\n",
+    "    data_narco = get_data_narco.find_all('table', {'class':'wikitable sortable'})\n",
+    "\n",
+    "    rows_narco1 = [row.find_all('tr') for row in data_narco]\n",
+    "\n",
+    "    rows_narco2 = [r.text.strip().split(\"\\n\") for row in rows_narco1 for r in row]\n",
+    "\n",
+    "    colnames = rows_narco2[0]\n",
+    "    data_narcos = rows_narco2[1:]\n",
+    "\n",
+    "    narcos_df = pd.DataFrame(data_narcos, columns=colnames)\n",
+    "    narcos_df.to_csv('MexicanNarcos.csv', index=False)\n",
+    "    \n",
+    "Project_WebScrapping('https://en.wikipedia.org/wiki/List_of_Mexico%27s_37_most-wanted_drug_lords')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}