Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ LABEL maintainer="Dmitry Titenkov <lt200711@yandex.ru>"
LABEL version="1.0"
LABEL description="Satire Pulp parser"
WORKDIR /app
COPY requirements.txt /app
COPY requirements.txt .
RUN pip3 install -r /app/requirements.txt --no-cache-dir
COPY . .
Empty file added bot/__init__.py
Empty file.
2 changes: 1 addition & 1 deletion bot_storage.py → bot/bot_storage.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging

from models import LastSentNews, News
from db.models import LastSentNews, News
from sqlalchemy import select
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession
Expand Down
49 changes: 3 additions & 46 deletions bot.py → bot/handlers.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import logging

from bot_storage import (
from bot.bot_storage import (
get_all_users,
get_last_sent_id,
get_news_after_id,
save_last_sent_news_id,
)
from bot.sender import send_news
from config import setup_logger
from db_async import AsyncSessionLocal
from db.db_async import AsyncSessionLocal
from dotenv import load_dotenv
from telegram import (
BotCommand,
Expand All @@ -23,50 +24,6 @@
logger = logging.getLogger(__name__)


MAX_CAPTION_LENGTH = 1024


def format_message(title, text):
message = f"*{title}*\n\n{text}\n"
if len(message) > MAX_CAPTION_LENGTH:
message = f"*{title}*\n\n{text[:MAX_CAPTION_LENGTH]} ...✂️\n"
return message


async def send_news(
chat_id: int, context: ContextTypes.DEFAULT_TYPE, title, image, text, url
):
message = format_message(title, text)
keyboard = [
[InlineKeyboardButton("Читать полную версию на сайте", url=url)]
]
reply_markup = InlineKeyboardMarkup(keyboard)
if image:
try:
await context.bot.send_photo(
chat_id=chat_id,
photo=image,
caption=message,
parse_mode="Markdown",
reply_markup=reply_markup,
)
logger.info("Новость отправлена с картинкой")
return
except Exception as e:
logger.error(
f"Не удалось отправить фото по ссылке, ошибка: {e}",
)
try:
await context.bot.send_message(
chat_id, message, parse_mode="Markdown", reply_markup=reply_markup
)
logger.info(f"Новость '{title[:25]}' отправлена без картинки")
except Exception as e:
logger.error(
f"Не удалось отправить сообщение с новостью '{title[:25]}', ошибка: {e}"
)


async def auto_send_news(context: ContextTypes.DEFAULT_TYPE):
async with AsyncSessionLocal() as session:
users = await get_all_users(session)
Expand Down
55 changes: 55 additions & 0 deletions bot/sender.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import logging

from config import setup_logger
from dotenv import load_dotenv
from telegram import InlineKeyboardButton, InlineKeyboardMarkup
from telegram.ext import ContextTypes

load_dotenv()

setup_logger()
logger = logging.getLogger(__name__)


MAX_CAPTION_LENGTH = 1024


def format_message(title, text):
message = f"*{title}*\n\n{text}\n"
if len(message) > MAX_CAPTION_LENGTH:
message = f"*{title}*\n\n{text[:MAX_CAPTION_LENGTH]} ...✂️\n"
return message


async def send_news(
chat_id: int, context: ContextTypes.DEFAULT_TYPE, title, image, text, url
):
message = format_message(title, text)
keyboard = [
[InlineKeyboardButton("Читать полную версию на сайте", url=url)]
]
reply_markup = InlineKeyboardMarkup(keyboard)
if image:
try:
await context.bot.send_photo(
chat_id=chat_id,
photo=image,
caption=message,
parse_mode="Markdown",
reply_markup=reply_markup,
)
logger.info("Новость отправлена с картинкой")
return
except Exception as e:
logger.error(
f"Не удалось отправить фото по ссылке, ошибка: {e}",
)
try:
await context.bot.send_message(
chat_id, message, parse_mode="Markdown", reply_markup=reply_markup
)
logger.info(f"Новость '{title[:25]}' отправлена без картинки")
except Exception as e:
logger.error(
f"Не удалось отправить сообщение с новостью '{title[:25]}', ошибка: {e}"
)
Empty file added db/__init__.py
Empty file.
5 changes: 4 additions & 1 deletion db_async.py → db/db_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
load_dotenv()


engine = create_async_engine(os.getenv("DATABASE_URL_ASYNC"), echo=False)
DATABASE_URL_ASYNC = os.getenv("DATABASE_URL_ASYNC")


engine = create_async_engine(DATABASE_URL_ASYNC, echo=False)


AsyncSessionLocal = async_sessionmaker(engine, expire_on_commit=False)
5 changes: 4 additions & 1 deletion db_sync.py → db/db_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
load_dotenv()


engine = create_engine(os.getenv("DATABASE_URL_SYNC"))
DATABASE_URL_SYNC = os.getenv("DATABASE_URL_SYNC")


engine = create_engine(DATABASE_URL_SYNC)

SessionLocal = sessionmaker(engine)
4 changes: 2 additions & 2 deletions init_db.py → db/init_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import logging

from config import setup_logger
from db_async import engine
from db.db_async import engine
from db.models import Base, LastSentNews, News # noqa
from dotenv import load_dotenv
from models import Base, LastSentNews, News # noqa

load_dotenv()

Expand Down
File renamed without changes.
8 changes: 4 additions & 4 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,26 @@ services:
ports:
- "5432:5432"
env_file:
- ./.env
- .env

bot:
build: .
restart: always
depends_on:
- db
env_file:
- ./.env
- .env
command: python3 main.py

scheduler:
build: .
restart: always
command: python3 scheduler.py
command: python3 -m scheduler.scheduler
depends_on:
- db
- bot
env_file:
- ./.env
- .env

volumes:
postgres_data:
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import os

from bot import (
from bot.handlers import (
auto_send_news,
button_handler,
help_command,
Expand Down
14 changes: 5 additions & 9 deletions satire_pulp_parser/items.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
# Define here the models for your scraped items
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/items.html

import scrapy


class SatirePulpParserItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
pass
class NewsItem(scrapy.Item):
title = scrapy.Field()
text = scrapy.Field()
image = scrapy.Field()
url = scrapy.Field()
24 changes: 21 additions & 3 deletions satire_pulp_parser/pipelines.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,24 @@
# from itemadapter import ItemAdapter
import logging

from db.db_sync import SessionLocal
from satire_pulp_parser.items import NewsItem
from satire_pulp_parser.spider_storage import save_news

class SatirePulpParserPipeline:
def process_item(self, item, spider):
logger = logging.getLogger(__name__)


class SaveNewsPipeline:
def process_item(self, item: NewsItem, spider):
try:
with SessionLocal() as session:
save_news(
url=item["url"],
title=item["title"],
image=item["image"],
text=item["text"],
session=session,
)
logger.info("...Новость сохранена...")
except Exception as e:
logger.error(f"Ошибка при сохранении новости: {e}")
return item
10 changes: 5 additions & 5 deletions satire_pulp_parser/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html

BOT_NAME = "satire_pulp_parser"
BOT_NAME = "satire_pulp"

SPIDER_MODULES = ["satire_pulp_parser.spiders"]
NEWSPIDER_MODULE = "satire_pulp_parser.spiders"
Expand All @@ -16,7 +16,7 @@


# Crawl responsibly by identifying yourself (and your website) on the user-agent
# USER_AGENT = "satire_pulp_parser (+http://www.yourdomain.com)"
USER_AGENT = "satire_pulp (+http://panorama.pub)"

# Obey robots.txt rules
ROBOTSTXT_OBEY = True
Expand Down Expand Up @@ -58,9 +58,9 @@

# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
# ITEM_PIPELINES = {
# "satire_pulp_parser.pipelines.SatirePulpParserPipeline": 300,
# }
ITEM_PIPELINES = {
"satire_pulp_parser.pipelines.SaveNewsPipeline": 300,
}

# Enable and configure the AutoThrottle extension (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
Expand Down
3 changes: 2 additions & 1 deletion spider_storage.py → satire_pulp_parser/spider_storage.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging

from models import News
from db.models import News
from sqlalchemy.exc import SQLAlchemyError

logger = logging.getLogger(__name__)
Expand All @@ -23,5 +23,6 @@ def save_news(url: str, title: str, image: str, text: str, session):
session.add(news)
session.commit()
except SQLAlchemyError as e:
session.rollback()
logger.error(f"Ошибка сохранения новости в бд: {e}")
raise
17 changes: 7 additions & 10 deletions satire_pulp_parser/spiders/satire_pulp.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import scrapy
from db_sync import SessionLocal
from spider_storage import is_news_exists, save_news
from db.db_sync import SessionLocal
from satire_pulp_parser.items import NewsItem
from satire_pulp_parser.spider_storage import is_news_exists


class SatirePulpSpider(scrapy.Spider):
Expand Down Expand Up @@ -30,11 +31,7 @@ def parse_news(self, response):

else:
image = None
with SessionLocal() as session:
save_news(response.url, final_title, image, final_text, session)
yield {
"title": title,
"text": final_text,
"image": image,
"url": response.url,
}
item = NewsItem(
title=final_title, text=final_text, image=image, url=response.url
)
yield item
Empty file added scheduler/__init__.py
Empty file.
File renamed without changes.