diff --git a/Scrapegraph-ai/Scraper_docker.py b/Scrapegraph-ai/Scraper_docker.py new file mode 100644 index 0000000..17c6b24 --- /dev/null +++ b/Scrapegraph-ai/Scraper_docker.py @@ -0,0 +1,22 @@ +""" +Basic example of scraping pipeline using SmartScraper +""" +from scrapegraphai.graphs import SmartScraperGraph + +graph_config = { + "llm": { + "model": "ollama/mistral", + "temperature": 0, + "format": "json", # Ollama needs the format to be specified explicitly + # "model_tokens": 2000, # set context length arbitrarily, + }, +} + +smart_scraper_graph = SmartScraperGraph( + prompt="List me all the news with their description.", + source="https://perinim.github.io/projects", + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) \ No newline at end of file diff --git a/Scrapegraph-ai/Scraper_gemini.py b/Scrapegraph-ai/Scraper_gemini.py new file mode 100644 index 0000000..ee624cc --- /dev/null +++ b/Scrapegraph-ai/Scraper_gemini.py @@ -0,0 +1,21 @@ +from scrapegraphai.graphs import SmartScraperGraph + +OPENAI_API_KEY = "YOUR_API_KEY" + +# Define the configuration for the graph +graph_config = { + "llm": { + "api_key": OPENAI_API_KEY, + "model": "gemini-pro", + }, +} + +# Create the SmartScraperGraph instance +smart_scraper_graph = SmartScraperGraph( + prompt="List me all the news with their description.", + file_source="https://www.wired.com/", # also accepts a string with the already downloaded HTML code in text format + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) \ No newline at end of file diff --git a/Scrapegraph-ai/Scraper_ollama.py b/Scrapegraph-ai/Scraper_ollama.py new file mode 100644 index 0000000..9be2f40 --- /dev/null +++ b/Scrapegraph-ai/Scraper_ollama.py @@ -0,0 +1,27 @@ + +from scrapegraphai.graphs import SmartScraperGraph + +graph_config = { + "llm": { + "model": "ollama/mistral", + "temperature": 0, + "format": "json", # Ollama needs the format to be specified explicitly + # "model_tokens": 2000, # set context length arbitrarily, + "base_url": "http://localhost:11434", # set ollama URL arbitrarily + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + "base_url": "http://localhost:11434", # set ollama URL arbitrarily + } +} + + +smart_scraper_graph = SmartScraperGraph( + prompt="List me all the news with their description.", + source="https://perinim.github.io/projects", + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) \ No newline at end of file diff --git a/Scrapegraph-ai/Scraper_openai.py b/Scrapegraph-ai/Scraper_openai.py new file mode 100644 index 0000000..52e3a21 --- /dev/null +++ b/Scrapegraph-ai/Scraper_openai.py @@ -0,0 +1,21 @@ +from scrapegraphai.graphs import SmartScraperGraph + +OPENAI_API_KEY = "YOUR_API_KEY" + +# Define the configuration for the graph +graph_config = { + "llm": { + "api_key": OPENAI_API_KEY, + "model": "gpt-3.5-turbo", + }, +} + +# Create the SmartScraperGraph instance +smart_scraper_graph = SmartScraperGraph( + prompt="List me all the news with their description.", + file_source="https://www.wired.com/", # also accepts a string with the already downloaded HTML code in text format + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) \ No newline at end of file