diff --git a/Dockerfile b/Dockerfile index 91edc3d..423d0b2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,6 +3,6 @@ LABEL maintainer="Dmitry Titenkov " LABEL version="1.0" LABEL description="Satire Pulp parser" WORKDIR /app -COPY requirements.txt /app +COPY requirements.txt . RUN pip3 install -r /app/requirements.txt --no-cache-dir COPY . . diff --git a/bot/__init__.py b/bot/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bot_storage.py b/bot/bot_storage.py similarity index 98% rename from bot_storage.py rename to bot/bot_storage.py index 5ba4e5d..ef23f1e 100644 --- a/bot_storage.py +++ b/bot/bot_storage.py @@ -1,6 +1,6 @@ import logging -from models import LastSentNews, News +from db.models import LastSentNews, News from sqlalchemy import select from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.asyncio import AsyncSession diff --git a/bot.py b/bot/handlers.py similarity index 77% rename from bot.py rename to bot/handlers.py index b696c54..a21d441 100644 --- a/bot.py +++ b/bot/handlers.py @@ -1,13 +1,14 @@ import logging -from bot_storage import ( +from bot.bot_storage import ( get_all_users, get_last_sent_id, get_news_after_id, save_last_sent_news_id, ) +from bot.sender import send_news from config import setup_logger -from db_async import AsyncSessionLocal +from db.db_async import AsyncSessionLocal from dotenv import load_dotenv from telegram import ( BotCommand, @@ -23,50 +24,6 @@ logger = logging.getLogger(__name__) -MAX_CAPTION_LENGTH = 1024 - - -def format_message(title, text): - message = f"*{title}*\n\n{text}\n" - if len(message) > MAX_CAPTION_LENGTH: - message = f"*{title}*\n\n{text[:MAX_CAPTION_LENGTH]} ...✂️\n" - return message - - -async def send_news( - chat_id: int, context: ContextTypes.DEFAULT_TYPE, title, image, text, url -): - message = format_message(title, text) - keyboard = [ - [InlineKeyboardButton("Читать полную версию на сайте", url=url)] - ] - reply_markup = InlineKeyboardMarkup(keyboard) - if image: - try: - await context.bot.send_photo( - chat_id=chat_id, - photo=image, - caption=message, - parse_mode="Markdown", - reply_markup=reply_markup, - ) - logger.info("Новость отправлена с картинкой") - return - except Exception as e: - logger.error( - f"Не удалось отправить фото по ссылке, ошибка: {e}", - ) - try: - await context.bot.send_message( - chat_id, message, parse_mode="Markdown", reply_markup=reply_markup - ) - logger.info(f"Новость '{title[:25]}' отправлена без картинки") - except Exception as e: - logger.error( - f"Не удалось отправить сообщение с новостью '{title[:25]}', ошибка: {e}" - ) - - async def auto_send_news(context: ContextTypes.DEFAULT_TYPE): async with AsyncSessionLocal() as session: users = await get_all_users(session) diff --git a/bot/sender.py b/bot/sender.py new file mode 100644 index 0000000..9cf63c6 --- /dev/null +++ b/bot/sender.py @@ -0,0 +1,55 @@ +import logging + +from config import setup_logger +from dotenv import load_dotenv +from telegram import InlineKeyboardButton, InlineKeyboardMarkup +from telegram.ext import ContextTypes + +load_dotenv() + +setup_logger() +logger = logging.getLogger(__name__) + + +MAX_CAPTION_LENGTH = 1024 + + +def format_message(title, text): + message = f"*{title}*\n\n{text}\n" + if len(message) > MAX_CAPTION_LENGTH: + message = f"*{title}*\n\n{text[:MAX_CAPTION_LENGTH]} ...✂️\n" + return message + + +async def send_news( + chat_id: int, context: ContextTypes.DEFAULT_TYPE, title, image, text, url +): + message = format_message(title, text) + keyboard = [ + [InlineKeyboardButton("Читать полную версию на сайте", url=url)] + ] + reply_markup = InlineKeyboardMarkup(keyboard) + if image: + try: + await context.bot.send_photo( + chat_id=chat_id, + photo=image, + caption=message, + parse_mode="Markdown", + reply_markup=reply_markup, + ) + logger.info("Новость отправлена с картинкой") + return + except Exception as e: + logger.error( + f"Не удалось отправить фото по ссылке, ошибка: {e}", + ) + try: + await context.bot.send_message( + chat_id, message, parse_mode="Markdown", reply_markup=reply_markup + ) + logger.info(f"Новость '{title[:25]}' отправлена без картинки") + except Exception as e: + logger.error( + f"Не удалось отправить сообщение с новостью '{title[:25]}', ошибка: {e}" + ) diff --git a/db/__init__.py b/db/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/db_async.py b/db/db_async.py similarity index 64% rename from db_async.py rename to db/db_async.py index 5d00ea3..44c6e37 100644 --- a/db_async.py +++ b/db/db_async.py @@ -6,7 +6,10 @@ load_dotenv() -engine = create_async_engine(os.getenv("DATABASE_URL_ASYNC"), echo=False) +DATABASE_URL_ASYNC = os.getenv("DATABASE_URL_ASYNC") + + +engine = create_async_engine(DATABASE_URL_ASYNC, echo=False) AsyncSessionLocal = async_sessionmaker(engine, expire_on_commit=False) diff --git a/db_sync.py b/db/db_sync.py similarity index 64% rename from db_sync.py rename to db/db_sync.py index 1423a78..4c9c0eb 100644 --- a/db_sync.py +++ b/db/db_sync.py @@ -7,6 +7,9 @@ load_dotenv() -engine = create_engine(os.getenv("DATABASE_URL_SYNC")) +DATABASE_URL_SYNC = os.getenv("DATABASE_URL_SYNC") + + +engine = create_engine(DATABASE_URL_SYNC) SessionLocal = sessionmaker(engine) diff --git a/init_db.py b/db/init_db.py similarity index 82% rename from init_db.py rename to db/init_db.py index 7fe950b..88c230a 100644 --- a/init_db.py +++ b/db/init_db.py @@ -2,9 +2,9 @@ import logging from config import setup_logger -from db_async import engine +from db.db_async import engine +from db.models import Base, LastSentNews, News # noqa from dotenv import load_dotenv -from models import Base, LastSentNews, News # noqa load_dotenv() diff --git a/models.py b/db/models.py similarity index 100% rename from models.py rename to db/models.py diff --git a/docker-compose.yml b/docker-compose.yml index 2827f07..6a380e5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,7 +8,7 @@ services: ports: - "5432:5432" env_file: - - ./.env + - .env bot: build: . @@ -16,18 +16,18 @@ services: depends_on: - db env_file: - - ./.env + - .env command: python3 main.py scheduler: build: . restart: always - command: python3 scheduler.py + command: python3 -m scheduler.scheduler depends_on: - db - bot env_file: - - ./.env + - .env volumes: postgres_data: \ No newline at end of file diff --git a/main.py b/main.py index 306aea6..d2e15e0 100644 --- a/main.py +++ b/main.py @@ -2,7 +2,7 @@ import logging import os -from bot import ( +from bot.handlers import ( auto_send_news, button_handler, help_command, diff --git a/satire_pulp_parser/items.py b/satire_pulp_parser/items.py index f95d9ea..3b19bb9 100644 --- a/satire_pulp_parser/items.py +++ b/satire_pulp_parser/items.py @@ -1,12 +1,8 @@ -# Define here the models for your scraped items -# -# See documentation in: -# https://docs.scrapy.org/en/latest/topics/items.html - import scrapy -class SatirePulpParserItem(scrapy.Item): - # define the fields for your item here like: - # name = scrapy.Field() - pass +class NewsItem(scrapy.Item): + title = scrapy.Field() + text = scrapy.Field() + image = scrapy.Field() + url = scrapy.Field() diff --git a/satire_pulp_parser/pipelines.py b/satire_pulp_parser/pipelines.py index 164045c..c863928 100644 --- a/satire_pulp_parser/pipelines.py +++ b/satire_pulp_parser/pipelines.py @@ -1,6 +1,24 @@ -# from itemadapter import ItemAdapter +import logging +from db.db_sync import SessionLocal +from satire_pulp_parser.items import NewsItem +from satire_pulp_parser.spider_storage import save_news -class SatirePulpParserPipeline: - def process_item(self, item, spider): +logger = logging.getLogger(__name__) + + +class SaveNewsPipeline: + def process_item(self, item: NewsItem, spider): + try: + with SessionLocal() as session: + save_news( + url=item["url"], + title=item["title"], + image=item["image"], + text=item["text"], + session=session, + ) + logger.info("...Новость сохранена...") + except Exception as e: + logger.error(f"Ошибка при сохранении новости: {e}") return item diff --git a/satire_pulp_parser/settings.py b/satire_pulp_parser/settings.py index bd1ddc2..5921b6b 100644 --- a/satire_pulp_parser/settings.py +++ b/satire_pulp_parser/settings.py @@ -7,7 +7,7 @@ # https://docs.scrapy.org/en/latest/topics/downloader-middleware.html # https://docs.scrapy.org/en/latest/topics/spider-middleware.html -BOT_NAME = "satire_pulp_parser" +BOT_NAME = "satire_pulp" SPIDER_MODULES = ["satire_pulp_parser.spiders"] NEWSPIDER_MODULE = "satire_pulp_parser.spiders" @@ -16,7 +16,7 @@ # Crawl responsibly by identifying yourself (and your website) on the user-agent -# USER_AGENT = "satire_pulp_parser (+http://www.yourdomain.com)" +USER_AGENT = "satire_pulp (+http://panorama.pub)" # Obey robots.txt rules ROBOTSTXT_OBEY = True @@ -58,9 +58,9 @@ # Configure item pipelines # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html -# ITEM_PIPELINES = { -# "satire_pulp_parser.pipelines.SatirePulpParserPipeline": 300, -# } +ITEM_PIPELINES = { + "satire_pulp_parser.pipelines.SaveNewsPipeline": 300, +} # Enable and configure the AutoThrottle extension (disabled by default) # See https://docs.scrapy.org/en/latest/topics/autothrottle.html diff --git a/spider_storage.py b/satire_pulp_parser/spider_storage.py similarity index 93% rename from spider_storage.py rename to satire_pulp_parser/spider_storage.py index 2e9bfc4..f84e886 100644 --- a/spider_storage.py +++ b/satire_pulp_parser/spider_storage.py @@ -1,6 +1,6 @@ import logging -from models import News +from db.models import News from sqlalchemy.exc import SQLAlchemyError logger = logging.getLogger(__name__) @@ -23,5 +23,6 @@ def save_news(url: str, title: str, image: str, text: str, session): session.add(news) session.commit() except SQLAlchemyError as e: + session.rollback() logger.error(f"Ошибка сохранения новости в бд: {e}") raise diff --git a/satire_pulp_parser/spiders/satire_pulp.py b/satire_pulp_parser/spiders/satire_pulp.py index b4ab849..700162b 100644 --- a/satire_pulp_parser/spiders/satire_pulp.py +++ b/satire_pulp_parser/spiders/satire_pulp.py @@ -1,6 +1,7 @@ import scrapy -from db_sync import SessionLocal -from spider_storage import is_news_exists, save_news +from db.db_sync import SessionLocal +from satire_pulp_parser.items import NewsItem +from satire_pulp_parser.spider_storage import is_news_exists class SatirePulpSpider(scrapy.Spider): @@ -30,11 +31,7 @@ def parse_news(self, response): else: image = None - with SessionLocal() as session: - save_news(response.url, final_title, image, final_text, session) - yield { - "title": title, - "text": final_text, - "image": image, - "url": response.url, - } + item = NewsItem( + title=final_title, text=final_text, image=image, url=response.url + ) + yield item diff --git a/scheduler/__init__.py b/scheduler/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scheduler.py b/scheduler/scheduler.py similarity index 100% rename from scheduler.py rename to scheduler/scheduler.py