Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 0 additions & 67 deletions README.md

This file was deleted.

165 changes: 165 additions & 0 deletions your-code/.ipynb_checkpoints/API & Web Scrapping-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"import sys\n",
"!{sys.executable} -m pip install requests pandas lxml html5lib bs4 tweepy\n",
"import tweepy\n",
"import pandas as pd\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"from pandas.io.json import json_normalize"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"API_KEY = '***'\n",
"API_SECRET = '***'\n",
"ACCESS_TOKEN = '***'\n",
"ACCESS_TOKEN_SECRET = '***'\n",
"\n",
"auth = tweepy.OAuthHandler(API_KEY, API_SECRET)\n",
"auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)\n",
"api = tweepy.API(auth, wait_on_rate_limit=True)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"search_tw = tweepy.Cursor(api.search, q='chapo -filter:retweets', since='2019-07-17').items(100)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"tweets_chapo = [\n",
" [tweets.user.screen_name, tweets.user.location, tweets.text, str(tweets.created_at), tweets.user.favourites_count] \n",
" for tweets in search_tw\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"tweets_chapo = pd.DataFrame(data=tweets_chapo, \n",
" columns=['User', \"Location\", \"Tweet\", \"Date/Time\", \"Likes\"])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"tweets_chapo.to_csv('Tweets_Chapo_API.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"get_data_narco = requests.get('https://en.wikipedia.org/wiki/List_of_Mexico%27s_37_most-wanted_drug_lords').content\n",
"get_data_narco = BeautifulSoup(get_data_narco, 'lxml')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"data_narco = get_data_narco.find_all('table', {'class':'wikitable sortable'})"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"rows_narco1 = [row.find_all('tr') for row in data_narco]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"rows_narco2 = [r.text.strip().split(\"\\n\") for row in rows_narco1 for r in row]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"colnames = rows_narco2[0]\n",
"data_narcos = rows_narco2[1:]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"narcos_df = pd.DataFrame(data_narcos, columns=colnames)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"narcos_df.to_csv('Mexican_Narcos_WebScrapping.csv', index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: requests in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (2.21.0)\n",
"Requirement already satisfied: pandas in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (0.24.2)\n",
"Requirement already satisfied: lxml in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (4.3.2)\n",
"Requirement already satisfied: html5lib in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (1.0.1)\n",
"Requirement already satisfied: bs4 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (0.0.1)\n",
"Requirement already satisfied: tweepy in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (3.8.0)\n",
"Requirement already satisfied: idna<2.9,>=2.5 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from requests) (2.8)\n",
"Requirement already satisfied: urllib3<1.25,>=1.21.1 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from requests) (1.24.1)\n",
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from requests) (2019.3.9)\n",
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from requests) (3.0.4)\n",
"Requirement already satisfied: pytz>=2011k in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from pandas) (2018.9)\n",
"Requirement already satisfied: numpy>=1.12.0 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from pandas) (1.16.2)\n",
"Requirement already satisfied: python-dateutil>=2.5.0 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from pandas) (2.8.0)\n",
"Requirement already satisfied: webencodings in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from html5lib) (0.5.1)\n",
"Requirement already satisfied: six>=1.9 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from html5lib) (1.12.0)\n",
"Requirement already satisfied: beautifulsoup4 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from bs4) (4.7.1)\n",
"Requirement already satisfied: requests-oauthlib>=0.7.0 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from tweepy) (1.2.0)\n",
"Requirement already satisfied: PySocks>=1.5.7 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from tweepy) (1.6.8)\n",
"Requirement already satisfied: soupsieve>=1.2 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from beautifulsoup4->bs4) (1.8)\n",
"Requirement already satisfied: oauthlib>=3.0.0 in c:\\users\\eduardo\\anaconda3\\lib\\site-packages (from requests-oauthlib>=0.7.0->tweepy) (3.0.2)\n"
]
}
],
"source": [
"import sys\n",
"!{sys.executable} -m pip install requests pandas lxml html5lib bs4 tweepy\n",
"import tweepy\n",
"import pandas as pd\n",
"import requests\n",
"import re\n",
"from bs4 import BeautifulSoup\n",
"from pandas.io.json import json_normalize"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"def Twitter_API(API_KEY, API_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET, filter_tweets, since_date, until_date, total_tweets):\n",
" auth = tweepy.OAuthHandler(API_KEY, API_SECRET)\n",
" auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)\n",
" api = tweepy.API(auth, wait_on_rate_limit=True)\n",
"\n",
" search_tw = tweepy.Cursor(api.search, q=filter_tweets, since=since_date, until=until_date).items(total_tweets)\n",
" \n",
" tweets_chapo = [\n",
" [tweets.user.screen_name, tweets.user.location, tweets.text, str(tweets.created_at), tweets.user.favourites_count] \n",
" for tweets in search_tw\n",
" ]\n",
"\n",
" tweets_chapo = pd.DataFrame(data=tweets_chapo, \n",
" columns=['User', \"Location\", \"Tweet\", \"Date/Time\", \"Likes\"])\n",
" tweets_chapo = tweets_chapo.sort_values(by='Likes', ascending=False)\n",
" tweets_chapo.to_csv('TweetsChapo.csv', index=False)\n",
" \n",
"Twitter_API('***',\n",
" '***',\n",
" '***',\n",
" '***', \n",
" 'chapo -filter:retweets', \n",
" '2019-07-17',\n",
" '2019-07-19',\n",
" 5000)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def Project_WebScrapping(url):\n",
" get_data_narco = requests.get(url).content\n",
" get_data_narco = BeautifulSoup(get_data_narco, 'lxml')\n",
"\n",
" data_narco = get_data_narco.find_all('table', {'class':'wikitable sortable'})\n",
"\n",
" rows_narco1 = [row.find_all('tr') for row in data_narco]\n",
"\n",
" rows_narco2 = [r.text.strip().split(\"\\n\") for row in rows_narco1 for r in row]\n",
"\n",
" colnames = rows_narco2[0]\n",
" data_narcos = rows_narco2[1:]\n",
"\n",
" narcos_df = pd.DataFrame(data_narcos, columns=colnames)\n",
" narcos_df.to_csv('MexicanNarcos.csv', index=False)\n",
" \n",
"Project_WebScrapping('https://en.wikipedia.org/wiki/List_of_Mexico%27s_37_most-wanted_drug_lords')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading