Skip to content

Commit

Permalink
Update creativenovels.py
Browse files Browse the repository at this point in the history
  • Loading branch information
TypoTami authored Sep 17, 2020
1 parent d13329d commit 9303fd8
Showing 1 changed file with 18 additions and 6 deletions.
24 changes: 18 additions & 6 deletions lncrawl/sources/creativenovels.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ def download_chapter_body(self, chapter):
'mark',
'ins',
'sub',
'sup'
'sup',
'br'
]

body = soup.select_one('article .entry-content')
Expand All @@ -130,14 +131,25 @@ def download_chapter_body(self, chapter):
span.decompose()
# end for
for span in body.find_all('span'):
if span.parent.name in FORMATTING_TAGS:
# If its parent is a formatting tag: Just remove the span tag
span.replace_with(span.text)
if len(span.parent.contents) <= 3:
if (span.parent.name in FORMATTING_TAGS) or (span.next_sibling is not None or span.previous_sibling is not None):
if span.next_sibling != None:
if span.next_sibling.name == FORMATTING_TAGS:
span.replace_with(span.text)
elif span.previous_sibling != None:
if span.previous_sibling.name == FORMATTING_TAGS:
span.replace_with(span.text)
# If its parent is a formatting tag: Just remove the span tag
span.replace_with(span.text)
else:
# Else: change it into a paragraph
span.name = 'p'
span.attrs = {}
# end if
else:
# Else: change it into a paragraph
span.name = 'p'
span.attrs = {}
# end if
#end if
# end for
for span in body.find_all('style'):
span.decompose()
Expand Down

0 comments on commit 9303fd8

Please sign in to comment.