diff --git a/article_maker.cc b/article_maker.cc index db56ca2b4..e18b63fbc 100644 --- a/article_maker.cc +++ b/article_maker.cc @@ -596,7 +596,7 @@ void ArticleRequest::bodyFinished() } } - int size = QTextDocumentFragment::fromHtml( text ).toPlainText().length(); + int size = htmlTextSize( text ); if( size > articleSizeLimit ) collapse = true; } @@ -750,6 +750,24 @@ void ArticleRequest::bodyFinished() } } +int ArticleRequest::htmlTextSize( QString html ) +{ + // website dictionary. + if( html.contains( QRegularExpression( "]*>", QRegularExpression::CaseInsensitiveOption ) ) ) + { + //arbitary number; + return 1000; + } + + //https://bugreports.qt.io/browse/QTBUG-102757 + QString stripStyleSheet = + html.remove( QRegularExpression( "]*>", QRegularExpression::CaseInsensitiveOption ) ) + .remove( QRegularExpression( "[\\s\\S]*?<\\/script>", QRegularExpression::CaseInsensitiveOption|QRegularExpression::MultilineOption ) ); + int size = QTextDocumentFragment::fromHtml( stripStyleSheet ).toPlainText().length(); + + return size; +} + void ArticleRequest::stemmedSearchFinished() { // Got stemmed matching results diff --git a/article_maker.hh b/article_maker.hh index c3a017d45..665515f34 100644 --- a/article_maker.hh +++ b/article_maker.hh @@ -148,7 +148,7 @@ private slots: void individualWordFinished(); private: - + int htmlTextSize( QString html ); /// Appends the given string to 'data', with locking its mutex. void appendToData( std::string const & );