From 4a9a6ede143fee441e30bcd97b19bf0fb5432e52 Mon Sep 17 00:00:00 2001 From: Pierre Kancir Date: Tue, 31 Oct 2023 11:00:45 +0100 Subject: [PATCH] frontend: add another regext to catch blog image in first 5 lines --- frontend/scripts/get_discourse_posts.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/frontend/scripts/get_discourse_posts.py b/frontend/scripts/get_discourse_posts.py index 405f476d4f..d42fcdc49e 100755 --- a/frontend/scripts/get_discourse_posts.py +++ b/frontend/scripts/get_discourse_posts.py @@ -87,10 +87,14 @@ def get_first_youtube_link(request: str) -> str: # Regular expression to find URLs that contain 'YouTube' or image links url_pattern = re.compile(r'href=[\'"]?(https?://www\.youtube[^\'" >]+)') img_pattern = re.compile(r'(?:href|src)=[\'"]?(https?://[^\'" >]+\.(jpg|jpeg|png|gif|svg|bmp|webp))') + img_pattern2 = re.compile(r'img src=[\'"]?(https?://[^\'" >]+)') # catch google link and such # Find all matches youtube_links = url_pattern.findall(first_five_lines) img_links = img_pattern.findall(first_five_lines_lower)[0] if img_pattern.findall(first_five_lines_lower) else None + if img_links is None: + img_links = img_pattern2.findall(first_five_lines_lower)[0] if img_pattern2.findall( + first_five_lines_lower) else None # If there are image links before YouTube links, return empty string if img_links and (not youtube_links or