-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcli.py
More file actions
246 lines (193 loc) · 7.81 KB
/
cli.py
File metadata and controls
246 lines (193 loc) · 7.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
import click
import logging
from sqlalchemy.orm import Session
from dotenv import load_dotenv
from app.database import SessionLocal, init_db, delete_articles_by_ids
from app.scrapping.scraper import scrape_articles
from app.processing.preprocessor import preprocess_text
from app.processing.postprocessor import postprocess_summary
from app.gemini.client import get_gemini_client
from app.models import Article
# Load environment variables
load_dotenv()
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@click.group()
def cli():
pass
@cli.command()
@click.option('--url', '-u', required=True, help='URL to scrape articles from')
@click.option('--limit', '-l', default=5, help='Maximum number of articles to scrape')
@click.option('--verbose', '-v', is_flag=True, help='Enable verbose output')
def scrape(url: str, limit: int, verbose: bool):
"""Scrape articles from a URL and store them with summaries."""
if verbose:
logging.getLogger().setLevel(logging.DEBUG)
try:
# Initialize database
init_db()
db = SessionLocal()
click.echo(f"Scraping articles from: {url}")
click.echo(f"Limit: {limit} articles")
# Scrape articles
scraped_articles = scrape_articles(url, limit)
if not scraped_articles:
click.echo("No articles found at the provided URL")
return
click.echo(f"Found {len(scraped_articles)} articles")
# Get Gemini client
gemini_client = get_gemini_client()
processed_count = 0
with click.progressbar(scraped_articles, label='Processing articles') as articles:
for article_data in articles:
try:
# Preprocess content
preprocessed_content = preprocess_text(article_data['content'])
# Summarize with Gemini
summary = gemini_client.summarize_article(
title=article_data['title'],
content=preprocessed_content,
author=article_data.get('author')
)
# Post-process summary
final_summary = postprocess_summary(summary)
# Store in database
article = Article(
title=article_data['title'],
author=article_data.get('author', 'Unknown'),
content=article_data['content'],
summary=final_summary,
source_url=article_data['source_url']
)
db.add(article)
db.commit()
db.refresh(article)
processed_count += 1
if verbose:
click.echo(f"\n Processed: {article.title}")
click.echo(f" Summary: {final_summary[:100]}...")
except Exception as e:
logger.error(f"Error processing article: {str(e)}")
continue
click.echo(f"\n Successfully processed {processed_count} articles!")
except Exception as e:
click.echo(f" Error: {str(e)}")
finally:
db.close()
@cli.command()
@click.option('--id', '-i', required=True, type=int, help='Article ID to retrieve')
def get_summary(id: int):
"""Get article summary by ID."""
try:
# Initialize database
init_db()
db = SessionLocal()
article = db.query(Article).filter(Article.id == id).first()
if not article:
click.echo(f" Article with ID {id} not found")
return
click.echo(f"\n Title: {article.title}")
click.echo(f" Author: {article.author}")
click.echo(f" Source: {article.source_url}")
click.echo(f" Created: {article.created_at}")
click.echo(f"\n Summary:")
click.echo(f"{article.summary}")
click.echo(f"\n Original Content (first 200 chars):")
click.echo(f"{article.content[:200]}...")
except Exception as e:
click.echo(f" Error: {str(e)}")
finally:
db.close()
@cli.command()
@click.option('--limit', '-l', default=10, help='Maximum number of articles to list')
def list_articles(limit: int):
"""List all stored articles."""
try:
# Initialize database
init_db()
db = SessionLocal()
articles = db.query(Article).limit(limit).all()
if not articles:
click.echo(" No articles found in database")
return
click.echo(f"\n Found {len(articles)} articles:")
click.echo("-" * 80)
for article in articles:
click.echo(f" ID: {article.id}")
click.echo(f" Title: {article.title}")
click.echo(f" Author: {article.author}")
click.echo(f" Created: {article.created_at}")
click.echo(f" Summary: {article.summary[:100]}...")
click.echo("-" * 80)
except Exception as e:
click.echo(f" Error: {str(e)}")
finally:
db.close()
@cli.command()
@click.option('--id', '-i', required=True, type=int, help='Article ID to delete')
@click.confirmation_option(prompt='Are you sure you want to delete this article?')
def delete_article(id: int):
"""Delete an article by ID."""
try:
# Initialize database
init_db()
db = SessionLocal()
article = db.query(Article).filter(Article.id == id).first()
if not article:
click.echo(f" Article with ID {id} not found")
return
title = article.title
db.delete(article)
db.commit()
click.echo(f" Deleted article: {title}")
except Exception as e:
click.echo(f" Error: {str(e)}")
finally:
db.close()
@cli.command()
def init_database():
"""Initialize the database and create tables."""
try:
init_db()
click.echo(" Database initialized successfully!")
except Exception as e:
click.echo(f" Error initializing database: {str(e)}")
@cli.command()
def test_gemini():
"""Test Gemini API connection."""
try:
gemini_client = get_gemini_client()
test_text = "This is a test text to check if Gemini API is working properly."
summary = gemini_client.summarize_text(test_text)
click.echo(" Gemini API connection successful!")
click.echo(f" Test summary: {summary}")
except Exception as e:
click.echo(f" Gemini API error: {str(e)}")
@cli.command()
@click.option('--ids', '-i', multiple=True, type=int, help="IDs of articles to delete")
@click.confirmation_option(prompt='Are you sure you want to delete these articles?')
def delete_articles(ids):
"""Delete multiple articles by their IDs."""
try:
# Initialize database
init_db()
db = SessionLocal()
if not ids:
click.echo(" No IDs provided. Use --ids to specify article IDs.")
return
articles = db.query(Article).filter(Article.id.in_(ids)).all()
if not articles:
click.echo(f" No articles found for the provided IDs: {ids}")
return
for article in articles:
db.delete(article)
click.echo(f" Deleted article: {article.title} (ID: {article.id})")
db.commit()
click.echo(" All specified articles have been deleted successfully!")
except Exception as e:
click.echo(f" Error: {str(e)}")
finally:
db.close()
if __name__ == '__main__':
cli()