From 3cdeca91ed57820cba3abecb306e3f620cfc5a02 Mon Sep 17 00:00:00 2001 From: "Karina J. Kwiatek" Date: Tue, 6 Aug 2024 01:14:38 +0200 Subject: [PATCH] Send blog posts from RSS feed into text channel --- requirements.in | 2 ++ requirements.txt | 8 +++++++ src/bot.py | 2 +- src/constants.py | 4 ++++ src/main.py | 13 ++++++++++- src/rss.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 src/rss.py diff --git a/requirements.in b/requirements.in index 733ecc5..c1aa6c0 100644 --- a/requirements.in +++ b/requirements.in @@ -1,2 +1,4 @@ python-dotenv discord +feedparser +beautifulsoup4 diff --git a/requirements.txt b/requirements.txt index 2e61730..fe925c5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,10 +14,14 @@ async-timeout==4.0.3 # via aiohttp attrs==24.1.0 # via aiohttp +beautifulsoup4==4.12.3 + # via -r requirements.in discord==2.3.2 # via -r requirements.in discord-py==2.4.0 # via discord +feedparser==6.0.11 + # via -r requirements.in frozenlist==1.4.1 # via # aiohttp @@ -30,5 +34,9 @@ multidict==6.0.5 # yarl python-dotenv==1.0.1 # via -r requirements.in +sgmllib3k==1.0.0 + # via feedparser +soupsieve==2.5 + # via beautifulsoup4 yarl==1.9.4 # via aiohttp diff --git a/src/bot.py b/src/bot.py index 01ad9db..9b747cc 100644 --- a/src/bot.py +++ b/src/bot.py @@ -4,7 +4,7 @@ import logging from constants import * -from typing import Tuple, AsyncGenerator +from typing import Tuple, AsyncGenerator, Optional class BotClient(discord.Client): diff --git a/src/constants.py b/src/constants.py index de56ee5..0ea1d5a 100644 --- a/src/constants.py +++ b/src/constants.py @@ -17,3 +17,7 @@ SPECIAL_ROLE = "unverified-volunteer" PASSWORDS_CHANNEL_NAME = "role-passwords" + +FEED_URL = "https://studentrobotics.org/feed.xml" +FEED_CHANNEL_NAME = "blog" +FEED_CHECK_INTERVAL = 10 # seconds diff --git a/src/main.py b/src/main.py index 9c159bd..e38e489 100644 --- a/src/main.py +++ b/src/main.py @@ -1,9 +1,11 @@ +import asyncio import os import sys import logging from dotenv import load_dotenv from bot import BotClient +from rss import post_check_timer logger = logging.getLogger('srbot') logger.setLevel(logging.INFO) @@ -13,4 +15,13 @@ load_dotenv() bot = BotClient(logger=logger) -bot.run(os.getenv('DISCORD_TOKEN')) +loop = asyncio.get_event_loop() + +try: + loop.create_task(post_check_timer(bot)) + loop.run_until_complete(bot.start(os.getenv('DISCORD_TOKEN'))) +except KeyboardInterrupt: + loop.run_until_complete(bot.close()) + # cancel all tasks lingering +finally: + loop.close() diff --git a/src/rss.py b/src/rss.py new file mode 100644 index 0000000..710bb8d --- /dev/null +++ b/src/rss.py @@ -0,0 +1,58 @@ +import asyncio +from typing import Optional + +import discord +import feedparser +from bs4 import BeautifulSoup +from feedparser import FeedParserDict + +from src.bot import BotClient +from src.constants import FEED_URL, FEED_CHECK_INTERVAL, FEED_CHANNEL_NAME + + +def get_feed_channel(bot: BotClient) -> discord.TextChannel: + for channel in bot.get_all_channels(): + if channel.name == FEED_CHANNEL_NAME: + return channel + + +async def get_last_blog_post(channel: discord.TextChannel) -> str | None: + # TODO: This doesn't work when the bot is restarted, store the URL instead + last_message: Optional[discord.Message] = channel.last_message + if last_message is not None and len(last_message.embeds) > 0: + return last_message.embeds[0].url + + return None + + +async def check_posts(bot: BotClient): + feed = feedparser.parse(FEED_URL) + channel = get_feed_channel(bot) + post = feed.entries[0] + newest_post_url = post.link + last_message_url = await get_last_blog_post(channel) + if newest_post_url != last_message_url: + await channel.send(embed=create_embed(post)) + + +def create_embed(post: FeedParserDict) -> discord.Embed: + soup = BeautifulSoup(post.content[0].value, 'html.parser') + + embed = discord.Embed( + title=post.title, + type="article", + url=post.link, + description=soup.p.text, + ) + + if len(post.media_thumbnail) > 0: + embed.set_image(url=post.media_thumbnail[0]['url']) + + return embed + + +async def post_check_timer(bot: BotClient): + await bot.wait_until_ready() + while True: + await check_posts(bot) + await asyncio.sleep(FEED_CHECK_INTERVAL)