From 1178281125b5caa8c3796f8174a2527b5ed1254b Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Sun, 7 Apr 2024 19:33:29 +0200 Subject: [PATCH 1/2] add examples for scrapegraph --- Scrapegraph-ai/Scraper_gemini.py | 21 +++++++++++++++++++++ Scrapegraph-ai/Scraper_openai.py | 21 +++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 Scrapegraph-ai/Scraper_gemini.py create mode 100644 Scrapegraph-ai/Scraper_openai.py diff --git a/Scrapegraph-ai/Scraper_gemini.py b/Scrapegraph-ai/Scraper_gemini.py new file mode 100644 index 0000000..ee624cc --- /dev/null +++ b/Scrapegraph-ai/Scraper_gemini.py @@ -0,0 +1,21 @@ +from scrapegraphai.graphs import SmartScraperGraph + +OPENAI_API_KEY = "YOUR_API_KEY" + +# Define the configuration for the graph +graph_config = { + "llm": { + "api_key": OPENAI_API_KEY, + "model": "gemini-pro", + }, +} + +# Create the SmartScraperGraph instance +smart_scraper_graph = SmartScraperGraph( + prompt="List me all the news with their description.", + file_source="https://www.wired.com/", # also accepts a string with the already downloaded HTML code in text format + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) \ No newline at end of file diff --git a/Scrapegraph-ai/Scraper_openai.py b/Scrapegraph-ai/Scraper_openai.py new file mode 100644 index 0000000..52e3a21 --- /dev/null +++ b/Scrapegraph-ai/Scraper_openai.py @@ -0,0 +1,21 @@ +from scrapegraphai.graphs import SmartScraperGraph + +OPENAI_API_KEY = "YOUR_API_KEY" + +# Define the configuration for the graph +graph_config = { + "llm": { + "api_key": OPENAI_API_KEY, + "model": "gpt-3.5-turbo", + }, +} + +# Create the SmartScraperGraph instance +smart_scraper_graph = SmartScraperGraph( + prompt="List me all the news with their description.", + file_source="https://www.wired.com/", # also accepts a string with the already downloaded HTML code in text format + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) \ No newline at end of file From 4a9adcabe869648cac5e929a70a6be6e642fc525 Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Fri, 12 Apr 2024 11:03:34 +0200 Subject: [PATCH 2/2] add docker and ollama examples --- Scrapegraph-ai/Scraper_docker.py | 22 ++++++++++++++++++++++ Scrapegraph-ai/Scraper_ollama.py | 27 +++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 Scrapegraph-ai/Scraper_docker.py create mode 100644 Scrapegraph-ai/Scraper_ollama.py diff --git a/Scrapegraph-ai/Scraper_docker.py b/Scrapegraph-ai/Scraper_docker.py new file mode 100644 index 0000000..17c6b24 --- /dev/null +++ b/Scrapegraph-ai/Scraper_docker.py @@ -0,0 +1,22 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +from scrapegraphai.graphs import SmartScraperGraph + +graph_config = { + "llm": { + "model": "ollama/mistral", + "temperature": 0, + "format": "json", # Ollama needs the format to be specified explicitly + # "model_tokens": 2000, # set context length arbitrarily, + }, +} + +smart_scraper_graph = SmartScraperGraph( + prompt="List me all the news with their description.", + source="https://perinim.github.io/projects", + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) \ No newline at end of file diff --git a/Scrapegraph-ai/Scraper_ollama.py b/Scrapegraph-ai/Scraper_ollama.py new file mode 100644 index 0000000..9be2f40 --- /dev/null +++ b/Scrapegraph-ai/Scraper_ollama.py @@ -0,0 +1,27 @@ + +from scrapegraphai.graphs import SmartScraperGraph + +graph_config = { + "llm": { + "model": "ollama/mistral", + "temperature": 0, + "format": "json", # Ollama needs the format to be specified explicitly + # "model_tokens": 2000, # set context length arbitrarily, + "base_url": "http://localhost:11434", # set ollama URL arbitrarily + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + "base_url": "http://localhost:11434", # set ollama URL arbitrarily + } +} + + +smart_scraper_graph = SmartScraperGraph( + prompt="List me all the news with their description.", + source="https://perinim.github.io/projects", + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) \ No newline at end of file