Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 222 additions & 0 deletions .ipynb_checkpoints/Intento MIERCOLES-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 125,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Running the INIT\n",
"\n",
"Esto es el kickstart\n",
"\n",
"\n",
"Running the request\n",
"Esto es el quotes parser\n",
"Esto es la funcion Output_results\n",
"\n",
"\n",
"[[]]\n",
"\n",
"\n",
"Esto es despues de r\n",
"\n",
"Scrapped the page number 1 \n",
"\n",
"Running the request\n",
"Esto es el quotes parser\n",
"Esto es la funcion Output_results\n",
"\n",
"\n",
"[[], []]\n",
"\n",
"\n",
"Esto es despues de r\n",
"\n",
"Scrapped the page number 2 \n",
"\n",
"Running the request\n",
"Esto es el quotes parser\n",
"Esto es la funcion Output_results\n",
"\n",
"\n",
"[[], [], []]\n",
"\n",
"\n",
"Esto es despues de r\n",
"\n",
"Scrapped the page number 3 \n",
"\n",
"Running the request\n",
"Esto es el quotes parser\n",
"Esto es la funcion Output_results\n",
"\n",
"\n",
"[[], [], [], []]\n",
"\n",
"\n",
"Esto es despues de r\n",
"\n",
"Scrapped the page number 4 \n",
"\n",
"Running the request\n",
"Esto es el quotes parser\n",
"Esto es la funcion Output_results\n",
"\n",
"\n",
"[[], [], [], [], []]\n",
"\n",
"\n",
"Esto es despues de r\n",
"\n",
"Scrapped the page number 5 \n",
"\n",
"Running the request\n",
"Esto es el quotes parser\n",
"Esto es la funcion Output_results\n",
"\n",
"\n",
"[[], [], [], [], [], []]\n",
"\n",
"\n",
"Esto es despues de r\n",
"\n",
"Scrapped the page number 6 \n",
"\n",
"Running the request\n",
"Esto es el quotes parser\n",
"Esto es la funcion Output_results\n",
"\n",
"\n",
"[[], [], [], [], [], [], []]\n",
"\n",
"\n",
"Esto es despues de r\n",
"\n",
"Scrapped the page number 7 \n",
"\n",
"Running the request\n",
"Esto es el quotes parser\n",
"Esto es la funcion Output_results\n",
"\n",
"\n",
"[[], [], [], [], [], [], [], []]\n",
"\n",
"\n",
"Esto es despues de r\n",
"\n",
"Scrapped the page number 8 \n",
"\n",
"Esto es el final del kickstart\n",
"\n"
]
}
],
"source": [
"import requests, sys, time, random\n",
"import pandas as pd\n",
"from bs4 import BeautifulSoup\n",
"\n",
"\n",
"name_list=[]\n",
"price_list =[]\n",
"url_list=[]\n",
"name_final=[]\n",
"\n",
"\n",
"class Euroelect:\n",
" \n",
" def __init__(self, url_pattern_1, pages_to_scrape=10, sleep_interval=-1, content_parser=None):\n",
" print(\"Running the INIT\\n\")\n",
" self.url_pattern = url_pattern_1\n",
" self.pages_to_scrape = pages_to_scrape\n",
" self.sleep_interval = sleep_interval\n",
" self.content_parser = content_parser\n",
" \n",
" \n",
" def scrape_url(self, url):\n",
" response = requests.get(url)\n",
" result = self.content_parser(response.content)\n",
" self.output_results(result)\n",
"\n",
" def output_results(self, r):\n",
" \n",
" #This is the suggested code to export the code to Pandas\n",
" \n",
" if len(name_list)==8:\n",
" name_final = [y for x in name_list for y in x]\n",
" #print(name_final,\"\\n\\n\")\n",
" price_final= [y for x in price_list for y in x]\n",
" #print(price_final,\"\\n\\n\")\n",
" url_final= [y for x in url_list for y in x]\n",
" df = pd.DataFrame(list(zip(name_final,price_final,url_final)), columns=['Articulo', 'Precio', 'URL Link'])\n",
" print(df)\n",
" data = df.to_csv('/Users/macbookair7/Documents/Irving/Profesional/DATA/Entregables/Proyecto/Proyecto_web_scrapping/Datos_Cravioto.csv', index=False)\n",
" \n",
" def kickstart(self):\n",
"\n",
" for i in range(1, self.pages_to_scrape+1):\n",
" self.scrape_url(self.url_pattern % i)\n",
" print(\"Scrapped the page number\",i,\"\\n\")\n",
" time.sleep(random.randint(0,5))\n",
"\n",
"URL_PATTERN_1 = 'https://casacraviotoeshop.com/productos.html?p=%s'\n",
"PAGES_TO_SCRAPE = 8\n",
"\n",
"def quotes_parser_1(content):\n",
" soup = BeautifulSoup(content, 'lxml')\n",
" product_name = soup.select('a.product-item-link')\n",
" product_price = soup.select('span.price')\n",
" product_url= soup.select('img.product-image-photo')\n",
" \n",
" #I had issues trying to get the images URL. I tried as it is write, but when I print the lists, i find out that they\n",
" #are empty. Feedback is welcome (please hahaha).\n",
" \n",
" name_selection = [e.get_text().strip() for e in product_name]\n",
" price_selection = [e.get_text().strip() for e in product_price]\n",
" url_selection = [e.get_text() for e in product_url]\n",
" name_list.append(name_selection)\n",
" price_list.append(price_selection)\n",
" url_list.append(url_selection)\n",
" \n",
" return name_selection, price_selection\n",
"\n",
"\n",
"project = Euroelect(URL_PATTERN_1, PAGES_TO_SCRAPE, content_parser=quotes_parser_1)\n",
"\n",
"project.kickstart()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
25 changes: 25 additions & 0 deletions .ipynb_checkpoints/README-checkpoint.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[IronHack Logo](https://s3-eu-west-1.amazonaws.com/ih-materials/uploads/upload_d5c5793015fec3be28a63c4fa3dd4d55.png)

# Project: API and Web Data Scraping



The following deliverables should be pushed to your Github repo for this chapter.

* **Code Approach** I decided to do a web scrapping of Cravioto's e-shop, which is a tools and building supplies store. The web site is an html web site. Because of the structure of the labels I thought that was an accurate site to scrap, according what we learned in the bootcamp.
* The output I got from the web scrapping are the name of the product, the price and the picture link.
* As a feauture for this web scrapping is to get the "SKU code" and the "Users opinions", in which I have to open the link of each product and scrap them.

* **Results** As I though in the beginning I found out some issues by doing the scrapping. I got in a .CSV file the results of the web scrapping to this page. I would like the page had more data to scrap and practice more. But because the lack of time and some changes to my master plan, I couldn't.

* One of my main issues were to define clear and precisely my action plan. I think that it was the principal task to solve, but is better to have a good action plan before starting. Definitly that saved me hour of work.
* The second important issue is that i couldn't scrap the URL images of each product, I had been having troubles trying to call the right label. Feedback is welcome.


* **Lessons learned**
* The first one I want to writte about is of hard skills because I could do a reinforcement to my already knowledge, specially in OOP. I understood the theme when we did the lab, but when I tried to this project I found that I had a lot of opportunity areas to work.

* The second lesson I had is about soft skills. If you wrote a code and you think you can use it, do it. After hours trying to wrtie by myself the code, I recall to take a look at the labs I had solved and I found that would be easier to copy the structure of another one instead trying to do everything from the start.

* Finally I can say that even I could save hours since the begging, I couldn't get this new experience if I hadn't been through this.
* I found it hard, but not impossible.
96 changes: 96 additions & 0 deletions Datos_Cravioto.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
Articulo,Precio,URL Link
"ABRAZADERA MINI SIN FIN A INOX 10-16mm 3/8-5/8"" C/10 PZAS FIERO AB-04",$46.00,
"ABRAZADERA MINI SIN FIN A INOX 13-19mm 1/2-3/4"" C/10 PZAS FIERO AB-06",$46.00,
"ABRAZADERA SIN FIN A INOX 10-16mm 3/8-5/8"" C/10 PZAS FIERO AB-4",$76.00,
"ABRAZADERA SIN FIN A INOX 13-19mm 1/2-3/4"" C/10 PZAS FIERO AB-6",$76.00,
"ABRAZADERA SIN FIN A INOX 16-23mm 5/8-1"" C/10 PZAS FIERO AB-8",$84.00,
"ABRAZADERA SIN FIN A INOX 16-32mm 5/8-1 1/4"" C/10 PZAS FIERO AB-12",$84.00,
"ABRAZADERA SIN FIN A INOX 19-27mm 3/4-1 1/8"" C/10 PZAS FIERO AB-10",$84.00,
"ABRAZADERA SIN FIN A INOX 19-38mm 3/4-1 1/2"" C/10 PZAS FIERO AB-16",$97.00,
ACEITE LUBRICANTE P/MOTORES A 2 TIEMPOS LIQUIDO 16 Oz TRUPER ACT-2T-16,$109.00,
ACEITE LUBRICANTE P/MOTORES A 2 TIEMPOS LIQUIDO 4 Oz TRUPER ACT-2T-4,$35.00,
"ADAPTADOR RCA EXT ESPIGA GALVANIZADO 19mm 3/4""",$15.70,
"ADAPTADOR RCA EXT ESPIGA GALVANIZADO 25mm 1""",$24.17,
,$18.58,
APAGADOR DE PASO CAFÉ BAQUELITA ROYER 228,$89.00,
ARCO PRETUL APS-12 DE SOLERA,$38.23,
"BRIDA GALVANIZADA 19mm 3/4"" ARXFLUX",$32.00,
BROCHA 100mm 4 MANGO DE PLASTICO PRETUL BRP-4,$39.00,
BROCHA 125mm 5 MANGO DE PLASTICO PRETUL BRP-5,$6.00,
BROCHA 13mm 1/2 MANGO DE PLASTICO PRETUL BRP-1/2,$43.00,
BROCHA 150mm 6 MANGO DE PLASTICO PRETUL BRP-6,$8.50,
BROCHA 25mm 1 MANGO DE PLASTICO PRETUL BRP-1,$10.00,
BROCHA 38mm 1 1/2 MANGO DE PLASTICO PRETUL BRP-1 1/2,$13.50,
BROCHA 51mm 2 MANGO DE PLASTICO PRETUL BRP-2,$16.00,
BROCHA 63mm 2 1/2 MANGO DE PLASTICO PRETUL BRP-2 1/2,$20.00,
BROCHA 76mm 3 MANGO DE PLASTICO PRETUL BRP-3,$3.79,
,$655.52,
"CAJA CUADRADA GALVANIZADA STD. 4x4 S/TAPA KNOCK OUTS 1/2""",$78.00,
CALENTADOR DE DEPOSITO 40Lts LEÑA CORONA C/18,$73.13,
CAUTIN PRETUL CAU-30P TIPO LAPIZ 30W,$20.00,
CESPOL P/LAVABO FLEXIBLE CHICO 2218 FLEXIMATIC,$20.00,
CINTA ADHESIVA CANELA 48mm x 50mts TRUPER CCA-50,$24.00,
CINTA ADHESIVA TRANSPARENTE 48mm x 50mts TRUPER CTR-50,$32.00,
"CINTA MASKING TAPE 19mm 3/4"" x 50mts TRUPER MSK-3/4",$24.23,
"CINTA MASKING TAPE 25mm 1"" x 50mts TRUPER MSK-1",$9.01,
CLAVIJA MOVIL T/BISAGRA 502 MARFIL ROYER,$13.40,
CLAVIJA SENCILLA 2P REDONDA NEGRA PVC IUSA 416,$63.00,
"CONECTOR HEMBRA/MACHO MOVIBLE P/MANGUERA NYLON 6 10mm 3/8"" EDOMEX",$12.77,
CONECTOR HEMBRA/MACHO SIST.CLICK P/MANGUERA TRUPER CLIK-SET,$49.00,
,$176.15,
CONTACTO SENCILLO 1MOD 2P NEGRO COLGANTE ROYER 504,$354.37,
"CUCHARA PARA JARDIN 6"" CON MANGO DE MADERA GTS-SH TRUPER",$24.65,
DISCO DIAMANTE RIN CONTINUO P/LOSETA 4 AUSTROMEX 1501,$32.00,
DISCO DIAMANTE RIN SEGMENTADO P/CONCRETO 7 AUSTROMEX 1507,$30.00,
FLEXICO P/LAVABO 40cm 13-13mm 1/2x1/2 MAXIFLEX 25ME RUGO,$49.00,
FLEXOMETRO PRETUL PRO-3MEB 3 MTS C/BLISTER,$47.00,
FLEXOMETRO PRETUL PRO-3MEC 3 MTS EN COLORES,$29.00,
FLEXOMETRO PRETUL PRO-5MEB 5MTS C/BLISTER,$29.00,
FLEXOMETRO PRETUL PRO-5MEC 5 MTS EN COLORES,$15.56,
GUANTES DE LATEX PARA LIMPIEZA PUÑO LARGO TALLA GRANDE GU-313 TRUPER,$128.28,
GUANTES DE LATEX PARA LIMPIEZA PUÑO LARGO TALLA MEDIANA GU-312 TRUPER,$89.00,
GUIA DE ACERO P/CABLE 20.00mt,$291.22,
,$371.78,
INTERRUPTOR DE SEGURIDAD 2x30amp 2000 ROYER,$371.78,
JUEGO DE LLAVES TORX 8 PIEZAS TIPO NAVAJA CUERPO DE LAMINA TORX-8 TRUPER,$371.78,
KIT DE ACCESORIOS P/TANQUE BAJO SIST. STD PVC FLUIDMASTER FM200AK,$371.78,
LIJA DE AGUA G1000 FAJILLA 25pz A-99 FANDELI 05525,$30.00,
LIJA DE AGUA G1200 FAJILLA 25pz A-99 FANDELI 00048,$505.29,
LIJA DE AGUA G1500 FAJILLA 25pz A-99 FANDELI 00049,$125.00,
LIJA DE AGUA G2000 FAJILLA 25pz A-99 FANDELI 10965,$89.00,
"LLAVE DE GAS 10MM 7/8"" LL-GA-P PRETUL PAVONADA",$309.82,
LLAVE PARA EMPOTRAR Jgo SOLDABLE P/REG S/MAN CROMO URREA 652,$596.43,
"LLAVE PERICO PRETUL PET-10PB CROMADO 10"" C/BLISTER","$1,168.19",
"LLAVE PERICO PRETUL PET-8PB CROMADO 8"" C/BLISTER","$1,692.53",
MANERAL JUEGO JGO QUEEN GRANDE URREA QG,$16.00,
,$72.00,
"MANGUERA TRAMADA VERDE P/AGUA 13mm 1/2"" X100m STV.1/2.100",$40.09,
"MANGUERA TRAMADA VERDE P/AGUA 16mm 5/8""x100m TV.5/8.100",$65.01,
"MANGUERA TRAMADA VERDE P/AGUA 19mm 3/4""x100m TV.3/4.100",$89.73,
"NAVAJA PRETUL CUT-6PB CUTTER PLASTICO 6""",$142.29,
PEGAMENTO BLANCO BOTELLA 1 Kg FURIA,$506.91,
PEGAMENTO DE CONTACTO AMARILLO P/USO GENERAL LATA .135ml FURIA,$21.37,
PEGAMENTO DE CONTACTO AMARILLO P/USO GENERAL LATA .250ml FURIA,$24.51,
PEGAMENTO DE CONTACTO AMARILLO P/USO GENERAL LATA .500ml FURIA,$192.37,
PEGAMENTO DE CONTACTO AMARILLO P/USO GENERAL LATA 1 Lts FURIA,$333.80,
PEGAMENTO DE CONTACTO AMARILLO P/USO GENERAL LATA 4 Lts FURIA,$58.31,
PEGAMENTO INSTANTANEO USO GENERAL APLICADOR 2gr KRAZY KOLA LOKA,$228.64,
PEGAMENTO INSTANTANEO USO GENERAL BOTELLA C/BROCHA 5gr KRAZY KOLA LOKA,$45.19,
,$325.00,
PEGAMENTO PARA PVC RIG TODA PRESION HIDRAULICO LATA 475ml TANGIT,$19.16,
PEGAMENTO PARA PVC RIG TODA PRESION HIDRAULICO LATA 950ml TANGIT,$47.50,
SAPO P/WC PVC ROJO FM501 FLUIDMASTER,$96.45,
SOLDADURA SOLIDA 1/2-1/2 VERDE CARRETE 3mm 450gr OMEGA,$232.86,
SOPORTE P/LAVABO UNIVERSAL ALUMINIO TRES UÑAS,$292.92,
TALACHO TRUPER TP-5MX C/PICO C/MANGO 5 LB,$338.28,
"TUERCA CONICA C/ESPIGA LATON 10mm 3/8"" NACOBRE",$88.32,
VALVULA DE AGUJA 10-10mm 3/8 x3/8 FLARE-FLARE VRJ,$85.13,
VALVULA DE DESCARGA P/TANQUE BAJO PVC FLUIDMASTER FM507A,$80.16,
"VALVULA DE GLOBO ROSCADA 19mm 3/4"" LATON ALTA PRESION HARPER WYMAN",$72.26,
"VALVULA DE LLENADO P/TANQUE ESTACIONARIO 32mm 1 1/4"" HARPER WYMAN IUSA",$96.74,
"VALVULA DE LLENADO PARA TANQUE ESTACIONARIO T.E.2.""A"" INGUSA",$140.71,
,$38.44,
VALVULA DE PASO 10-10mm 3/8x3/8 FLARE-FLARE C/2 TCAS HARPER WYMAN,$38.73,
VALVULA DE PASO 10-13mm 3/8x1/2 FLARE-MNPT C/1 TCA HARPER WYMAN,$12.46,
VALVULA DE PASO 10-13mm 3/8x1/2 FLARE-SOLD C/1 TCA HARPER WYMAN,$120.51,
VALVULA DE PASO 13-13mm 1/2x1/2 SOLD-SOLD S/TCAS HARPER WYMAN,$186.41,
Loading