From f37672b808af7759f7254651b06b5c6bb21c138a Mon Sep 17 00:00:00 2001 From: Rongrong Date: Tue, 24 Dec 2024 03:17:52 +0800 Subject: [PATCH] feat(parsing.utils): add JSON Feed support feedparser@develop has added support for JSON Feed before. However, entry.content maybe a bare dict this time, unlike Atom that use an array (list) of dicts. Add JSON Feed support by adding a case that deals with such behavior. With this patch and feedparser from its develop branch, we can finally gain the support for JSON Feed. See also #273. Signed-off-by: Rongrong --- src/parsing/utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/parsing/utils.py b/src/parsing/utils.py index edd6cdbc2e..65abede732 100644 --- a/src/parsing/utils.py +++ b/src/parsing/utils.py @@ -258,8 +258,8 @@ class EntryParsed: enclosures: list[Enclosure] = None content = ( - entry.get('content') # Atom - or entry.get('summary', '') # Atom summary or RSS description + entry.get('content') # Atom: ; JSON Feed: .content_html, .content_text + or entry.get('summary', '') # Atom: ; RSS: ) if isinstance(content, list) and len(content) > 0: # Atom @@ -271,6 +271,9 @@ class EntryParsed: else: content = content[0] content = content.get('value', '') + elif isinstance(content, dict): # JSON Feed + # TODO: currently feedparser always prefer content_text rather than content_html, we'd like to change that + content = content.get('value', '') EntryParsed.content = await html_validator(content) EntryParsed.link = entry.get('link') or entry.get('guid')