Skip to content

Commit

Permalink
Merge pull request #1987 from shenlebantongying/refactor/gd-text
Browse files Browse the repository at this point in the history
refactor: use standard string types and merge wstring(-qt)/utf8/ namespaces to Text
  • Loading branch information
shenlebantongying authored Nov 23, 2024
2 parents abeacef + 1471bc3 commit 0c42c30
Show file tree
Hide file tree
Showing 62 changed files with 1,055 additions and 1,079 deletions.
3 changes: 3 additions & 0 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@ c8af0450f1f7f8188004db96e3f53e7e33e2ccad

# remove gddebug.hh and associated functions
76aaed116bdc3aeb53fd61553aedb877baf9b510

# wstring & wchar -> std::u32string & char32_t
f1e158578f62c96059bef1a616b75495adb6e2c6
14 changes: 6 additions & 8 deletions src/article_maker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
#include "htmlescape.hh"
#include "langcoder.hh"
#include "utils.hh"
#include "wstring_qt.hh"
#include <QDir>
#include <QFile>
#include <QTextDocumentFragment>
Expand All @@ -21,7 +20,6 @@

using std::vector;
using std::string;
using gd::wstring;
using std::set;
using std::list;

Expand Down Expand Up @@ -484,7 +482,7 @@ ArticleRequest::ArticleRequest( QString const & word,

// Accumulate main forms
for ( const auto & activeDict : activeDicts ) {
auto const s = activeDict->findHeadwordsForSynonym( gd::removeTrailingZero( word ) );
auto const s = activeDict->findHeadwordsForSynonym( Text::removeTrailingZero( word ) );

connect( s.get(), &Dictionary::Request::finished, this, &ArticleRequest::altSearchFinished, Qt::QueuedConnection );

Expand Down Expand Up @@ -521,9 +519,9 @@ void ArticleRequest::altSearchFinished()

altsDone = true; // So any pending signals in queued mode won't mess us up

vector< wstring > altsVector( alts.begin(), alts.end() );
vector< std::u32string > altsVector( alts.begin(), alts.end() );

wstring wordStd = word.toStdU32String();
std::u32string wordStd = word.toStdU32String();

if ( activeDicts.size() <= 1 ) {
articleSizeLimit = -1; // Don't collapse article if only one dictionary presented
Expand All @@ -534,7 +532,7 @@ void ArticleRequest::altSearchFinished()
sptr< Dictionary::DataRequest > r = activeDict->getArticle(
wordStd,
altsVector,
gd::removeTrailingZero( contexts.value( QString::fromStdString( activeDict->getId() ) ) ),
Text::removeTrailingZero( contexts.value( QString::fromStdString( activeDict->getId() ) ) ),
ignoreDiacritics );

connect( r.get(), &Dictionary::Request::finished, this, &ArticleRequest::bodyFinished, Qt::QueuedConnection );
Expand Down Expand Up @@ -1008,7 +1006,7 @@ void ArticleRequest::individualWordFinished()
WordFinder::SearchResults const & results = stemmedWordFinder->getResults();

if ( results.size() ) {
wstring source = Folding::applySimpleCaseOnly( currentSplittedWordCompound );
std::u32string source = Folding::applySimpleCaseOnly( currentSplittedWordCompound );

bool hadSomething = false;

Expand All @@ -1022,7 +1020,7 @@ void ArticleRequest::individualWordFinished()

// Prefix match found. Check if the aliases are acceptable.

wstring result( Folding::applySimpleCaseOnly( results[ x ].first ) );
std::u32string result( Folding::applySimpleCaseOnly( results[ x ].first ) );

if ( source.size() <= result.size() && result.compare( 0, source.size(), source ) == 0 ) {
// The resulting string begins with the source one
Expand Down
2 changes: 1 addition & 1 deletion src/article_maker.hh
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ class ArticleRequest: public Dictionary::DataRequest
QMap< QString, QString > contexts;
std::vector< sptr< Dictionary::Class > > activeDicts;

std::set< gd::wstring, std::less<> > alts; // Accumulated main forms
std::set< std::u32string, std::less<> > alts; // Accumulated main forms
std::list< sptr< Dictionary::WordSearchRequest > > altSearches;
std::list< sptr< Dictionary::DataRequest > > bodyRequests;
bool altsDone{ false };
Expand Down
6 changes: 3 additions & 3 deletions src/common/filetype.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */

#include "filetype.hh"
#include "utf8.hh"
#include "text.hh"
#include <ctype.h>

namespace Filetype {
Expand All @@ -26,13 +26,13 @@ string simplifyString( string const & str, bool lowercase )

size_t beginPos = 0;

while ( beginPos < str.size() && Utf8::isspace( str[ beginPos ] ) ) {
while ( beginPos < str.size() && Text::isspace( str[ beginPos ] ) ) {
++beginPos;
}

size_t endPos = str.size();

while ( endPos && Utf8::isspace( str[ endPos - 1 ] ) ) {
while ( endPos && Text::isspace( str[ endPos - 1 ] ) ) {
--endPos;
}

Expand Down
70 changes: 35 additions & 35 deletions src/common/folding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include "folding.hh"

#include "utf8.hh"
#include "text.hh"
#include "globalregex.hh"
#include "inc_case_folding.hh"

Expand All @@ -13,12 +13,12 @@ namespace Folding {
/// caught by the diacritics folding table, but they are only handled there
/// when they come with their main characters, not by themselves. The rest
/// are caught here.
bool isCombiningMark( wchar ch )
bool isCombiningMark( char32_t ch )
{
return QChar::isMark( ch );
}

wstring apply( wstring const & in, bool preserveWildcards )
std::u32string apply( std::u32string const & in, bool preserveWildcards )
{
// remove diacritics (normalization), white space, punt,
auto temp = QString::fromStdU32String( in )
Expand All @@ -32,19 +32,19 @@ wstring apply( wstring const & in, bool preserveWildcards )
// case folding
std::u32string caseFolded;
caseFolded.reserve( temp.size() );
wchar buf[ foldCaseMaxOut ];
char32_t buf[ foldCaseMaxOut ];
for ( const char32_t ch : temp ) {
auto n = foldCase( ch, buf );
caseFolded.append( buf, n );
}
return caseFolded;
}

wstring applySimpleCaseOnly( wstring const & in )
std::u32string applySimpleCaseOnly( std::u32string const & in )
{
wchar const * nextChar = in.data();
char32_t const * nextChar = in.data();

wstring out;
std::u32string out;

out.reserve( in.size() );

Expand All @@ -55,27 +55,27 @@ wstring applySimpleCaseOnly( wstring const & in )
return out;
}

wstring applySimpleCaseOnly( QString const & in )
std::u32string applySimpleCaseOnly( QString const & in )
{
//qt only support simple case folding.
return in.toCaseFolded().toStdU32String();
}

wstring applySimpleCaseOnly( std::string const & in )
std::u32string applySimpleCaseOnly( std::string const & in )
{
return applySimpleCaseOnly( Utf8::decode( in ) );
return applySimpleCaseOnly( Text::toUtf32( in ) );
// return QString::fromStdString( in ).toCaseFolded().toStdU32String();
}

wstring applyFullCaseOnly( wstring const & in )
std::u32string applyFullCaseOnly( std::u32string const & in )
{
wstring caseFolded;
std::u32string caseFolded;

caseFolded.reserve( in.size() * foldCaseMaxOut );

wchar const * nextChar = in.data();
char32_t const * nextChar = in.data();

wchar buf[ foldCaseMaxOut ];
char32_t buf[ foldCaseMaxOut ];

for ( size_t left = in.size(); left--; ) {
caseFolded.append( buf, foldCase( *nextChar++, buf ) );
Expand All @@ -84,17 +84,17 @@ wstring applyFullCaseOnly( wstring const & in )
return caseFolded;
}

wstring applyDiacriticsOnly( wstring const & in )
std::u32string applyDiacriticsOnly( std::u32string const & in )
{
auto noAccent = QString::fromStdU32String( in ).normalized( QString::NormalizationForm_KD ).remove( RX::accentMark );
return noAccent.toStdU32String();
}

wstring applyPunctOnly( wstring const & in )
std::u32string applyPunctOnly( std::u32string const & in )
{
wchar const * nextChar = in.data();
char32_t const * nextChar = in.data();

wstring out;
std::u32string out;

out.reserve( in.size() );

Expand All @@ -119,11 +119,11 @@ QString applyPunctOnly( QString const & in )
return out;
}

wstring applyWhitespaceOnly( wstring const & in )
std::u32string applyWhitespaceOnly( std::u32string const & in )
{
wchar const * nextChar = in.data();
char32_t const * nextChar = in.data();

wstring out;
std::u32string out;

out.reserve( in.size() );

Expand All @@ -136,11 +136,11 @@ wstring applyWhitespaceOnly( wstring const & in )
return out;
}

wstring applyWhitespaceAndPunctOnly( wstring const & in )
std::u32string applyWhitespaceAndPunctOnly( std::u32string const & in )
{
wchar const * nextChar = in.data();
char32_t const * nextChar = in.data();

wstring out;
std::u32string out;

out.reserve( in.size() );

Expand All @@ -153,26 +153,26 @@ wstring applyWhitespaceAndPunctOnly( wstring const & in )
return out;
}

bool isWhitespace( wchar ch )
bool isWhitespace( char32_t ch )
{
//invisible character should be treated as whitespace as well.
return QChar::isSpace( ch ) || !QChar::isPrint( ch );
}

bool isWhitespaceOrPunct( wchar ch )
bool isWhitespaceOrPunct( char32_t ch )
{
return isWhitespace( ch ) || QChar::isPunct( ch );
}

bool isPunct( wchar ch )
bool isPunct( char32_t ch )
{
return QChar::isPunct( ch );
}

wstring trimWhitespaceOrPunct( wstring const & in )
std::u32string trimWhitespaceOrPunct( std::u32string const & in )
{
wchar const * wordBegin = in.c_str();
wstring::size_type wordSize = in.size();
char32_t const * wordBegin = in.c_str();
std::u32string::size_type wordSize = in.size();

// Skip any leading whitespace
while ( *wordBegin && Folding::isWhitespaceOrPunct( *wordBegin ) ) {
Expand All @@ -185,7 +185,7 @@ wstring trimWhitespaceOrPunct( wstring const & in )
--wordSize;
}

return wstring( wordBegin, wordSize );
return std::u32string( wordBegin, wordSize );
}

QString trimWhitespaceOrPunct( QString const & in )
Expand All @@ -209,13 +209,13 @@ QString trimWhitespaceOrPunct( QString const & in )
return in.mid( wordBegin, wordSize );
}

wstring trimWhitespace( wstring const & in )
std::u32string trimWhitespace( std::u32string const & in )
{
if ( in.empty() ) {
return in;
}
wchar const * wordBegin = in.c_str();
wstring::size_type wordSize = in.size();
char32_t const * wordBegin = in.c_str();
std::u32string::size_type wordSize = in.size();

// Skip any leading whitespace
while ( *wordBegin && Folding::isWhitespace( *wordBegin ) ) {
Expand All @@ -228,7 +228,7 @@ wstring trimWhitespace( wstring const & in )
--wordSize;
}

return wstring( wordBegin, wordSize );
return std::u32string( wordBegin, wordSize );
}

QString trimWhitespace( QString const & in )
Expand Down
34 changes: 16 additions & 18 deletions src/common/folding.hh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#pragma once

#include "wstring.hh"
#include "text.hh"
#include <QString>

/// Folding provides means to translate several possible ways to write a
Expand All @@ -17,8 +17,6 @@

namespace Folding {

using gd::wstring;
using gd::wchar;

/// The algorithm's version.
enum {
Expand All @@ -27,48 +25,48 @@ enum {

/// Applies the folding algorithm to each character in the given string,
/// making another one as a result.
wstring apply( wstring const &, bool preserveWildcards = false );
std::u32string apply( std::u32string const &, bool preserveWildcards = false );

/// Applies only simple case folding algorithm. Since many dictionaries have
/// different case style, we interpret words differing only by case as synonyms.
wstring applySimpleCaseOnly( wstring const & );
wstring applySimpleCaseOnly( QString const & in );
wstring applySimpleCaseOnly( std::string const & in );
std::u32string applySimpleCaseOnly( std::u32string const & );
std::u32string applySimpleCaseOnly( QString const & in );
std::u32string applySimpleCaseOnly( std::string const & in );

/// Applies only full case folding algorithm. This includes simple case, but also
/// decomposing ligatures and complex letters.
wstring applyFullCaseOnly( wstring const & );
std::u32string applyFullCaseOnly( std::u32string const & );

/// Applies only diacritics folding algorithm.
wstring applyDiacriticsOnly( wstring const & );
std::u32string applyDiacriticsOnly( std::u32string const & );

/// Applies only punctuation folding algorithm.
wstring applyPunctOnly( wstring const & );
std::u32string applyPunctOnly( std::u32string const & );
QString applyPunctOnly( QString const & in );
/// Applies only whitespace folding algorithm.
wstring applyWhitespaceOnly( wstring const & );
std::u32string applyWhitespaceOnly( std::u32string const & );

/// Applies only whitespace&punctuation folding algorithm.
wstring applyWhitespaceAndPunctOnly( wstring const & );
std::u32string applyWhitespaceAndPunctOnly( std::u32string const & );

/// Returns true if the given character is any form of whitespace, false
/// otherwise. Whitespace corresponds to Zl/Zp/Zs Unicode classes, and also
/// includes \n, \r and \t.
bool isWhitespace( wchar ch );
bool isWhitespaceOrPunct( wchar ch );
bool isWhitespace( char32_t ch );
bool isWhitespaceOrPunct( char32_t ch );

/// Returns true if the given character is any form of punctuation, false
/// otherwise. Punctuation corresponds to Pc/Pd/Pe/Pf/Pi/Po/Ps classes.
bool isPunct( wchar ch );
bool isPunct( char32_t ch );

/// Removes any whitespace or punctuation from the beginning and the end of
/// the word.
wstring trimWhitespaceOrPunct( wstring const & );
std::u32string trimWhitespaceOrPunct( std::u32string const & );
QString trimWhitespaceOrPunct( QString const & in );

/// Removes any whitespace from the beginning and the end of
/// the word.
wstring trimWhitespace( wstring const & );
std::u32string trimWhitespace( std::u32string const & );
QString trimWhitespace( QString const & in );

/// Same as apply( wstring ), but without any heap operations, therefore
Expand All @@ -86,6 +84,6 @@ QString unescapeWildcardSymbols( QString const & );
QString escapeWildcardSymbols( QString const & );

/// Tests if the given char is one of the Unicode combining marks.
bool isCombiningMark( wchar ch );
bool isCombiningMark( char32_t ch );

} // namespace Folding
Loading

0 comments on commit 0c42c30

Please sign in to comment.