Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

opt: remove qtextcodec #2064

Merged
merged 5 commits into from
Jan 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions src/common/iconv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,50 @@ Iconv::~Iconv()
iconv_close( state );
}

QByteArray Iconv::fromUnicode( const QString & input, const char * toEncoding )
{
// Convert QString to UTF-8
QByteArray utf8Data = input.toUtf8();
const char * inBuf = utf8Data.constData();
size_t inBytesLeft = utf8Data.size();

// Initialize iconv
iconv_t cd = iconv_open( toEncoding, "UTF-8" );
if ( cd == (iconv_t)-1 ) {
throw std::runtime_error( "iconv_open failed" );
}

// Prepare output buffer
size_t outBytesLeft = inBytesLeft * 4; // Allocate enough space
std::vector< char > outBuf( outBytesLeft );
char * outBufPtr = outBuf.data();

// Perform conversion
while ( inBytesLeft > 0 ) {
size_t result = iconv( cd, const_cast< char ** >( &inBuf ), &inBytesLeft, &outBufPtr, &outBytesLeft );
if ( result == (size_t)-1 ) {
if ( errno == E2BIG ) {
// Grow the buffer and retry
size_t offset = outBufPtr - outBuf.data();
outBuf.resize( outBuf.size() + inBytesLeft * 4 );
outBufPtr = outBuf.data() + offset;
outBytesLeft += inBytesLeft * 4;
}
else {
iconv_close( cd );
throw std::runtime_error( "iconv conversion failed" );
}
}
}

// Clean up
iconv_close( cd );

// Resize output buffer to actual size
outBuf.resize( outBuf.size() - outBytesLeft );
return QByteArray( outBuf.data(), outBuf.size() );
}

QString Iconv::convert( void const *& inBuf, size_t & inBytesLeft )
{
size_t dsz = inBytesLeft;
Expand Down
1 change: 1 addition & 0 deletions src/common/iconv.hh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ public:
explicit Iconv( char const * from );

~Iconv();
static QByteArray fromUnicode( const QString & input, const char * toEncoding );

QString convert( void const *& inBuf, size_t & inBytesLeft );

Expand Down
96 changes: 36 additions & 60 deletions src/dict/epwing_book.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "folding.hh"
#include "epwing_charmap.hh"
#include "htmlescape.hh"
#include "iconv.hh"
#if defined( Q_OS_WIN32 ) || defined( Q_OS_MAC )
#define _FILE_OFFSET_BITS 64
#endif
Expand Down Expand Up @@ -146,10 +147,8 @@ EB_Error_Code
hook_iso8859_1( EB_Book * book, EB_Appendix *, void * container, EB_Hook_Code, int, const unsigned int * argv )
{
EpwingBook * ebook = static_cast< EpwingBook * >( container );
if ( ebook->codecISO() ) {
QByteArray b = ebook->codecISO()->toUnicode( (const char *)argv, 1 ).toUtf8();
eb_write_text( book, b.data(), b.size() );
}
QByteArray b = Iconv::toQString( ebook->codec_ISO_name, (const char *)argv, 1 ).toUtf8();
eb_write_text( book, b.data(), b.size() );
return EB_SUCCESS;
}

Expand All @@ -175,15 +174,12 @@ hook_narrow_jisx0208( EB_Book * book, EB_Appendix *, void * container, EB_Hook_C

if ( out_code == 0 ) {
EContainer * cont = static_cast< EContainer * >( container );
if ( cont->book->codecEuc() ) {
QByteArray str = cont->book->codecEuc()->toUnicode( (const char *)buf, 2 ).toUtf8();
eb_write_text( book, str.data(), str.size() );
}
else
eb_write_text( book, (const char *)buf, 2 );
QByteArray str = Iconv::toQString( cont->book->codec_Euc_name, (const char *)buf, 2 ).toUtf8();
eb_write_text( book, str.data(), str.size() );
}
else
else {
eb_write_text_byte1( book, out_code );
}
}

return EB_SUCCESS;
Expand All @@ -198,12 +194,8 @@ hook_wide_jisx0208( EB_Book * book, EB_Appendix *, void * ptr, EB_Hook_Code, int
buf[ 1 ] = *argv & 0xFF;
buf[ 0 ] = ( *argv & 0xFF00 ) >> 8;

if ( ebook->codecEuc() ) {
QByteArray b = ebook->codecEuc()->toUnicode( buf, 2 ).toUtf8();
eb_write_text( book, b.data(), b.size() );
}
else
eb_write_text_byte2( book, buf[ 0 ], buf[ 1 ] );
QByteArray b = Iconv::toQString( ebook->codec_Euc_name, buf, 2 ).toUtf8();
eb_write_text( book, b.data(), b.size() );

return EB_SUCCESS;
}
Expand All @@ -217,12 +209,8 @@ hook_gb2312( EB_Book * book, EB_Appendix *, void * container, EB_Hook_Code, int,
buf[ 1 ] = *argv & 0xFF;
buf[ 0 ] = ( *argv & 0xFF00 ) >> 8;

if ( ebook->codecGB() ) {
QByteArray b = ebook->codecGB()->toUnicode( buf, 2 ).toUtf8();
eb_write_text( book, b.data(), b.size() );
}
else
eb_write_text_byte2( book, buf[ 0 ], buf[ 1 ] );
QByteArray b = Iconv::toQString( ebook->codec_GB_name, buf, 2 ).toUtf8();
eb_write_text( book, b.data(), b.size() );

return EB_SUCCESS;
}
Expand Down Expand Up @@ -397,9 +385,9 @@ hook_candidate( EB_Book * book, EB_Appendix *, void * container, EB_Hook_Code co
EpwingBook::EpwingBook():
currentSubBook( -1 )
{
codec_ISO = QTextCodec::codecForName( "ISO8859-1" );
codec_GB = QTextCodec::codecForName( "GB2312" );
codec_Euc = QTextCodec::codecForName( "EUC-JP" );
codec_ISO_name = "ISO8859-1";
codec_GB_name = "GB2312";
codec_Euc_name = "EUC-JP";

eb_initialize_book( &book );
eb_initialize_appendix( &appendix );
Expand All @@ -422,8 +410,8 @@ void EpwingBook::setErrorString( QString const & func, EB_Error_Code code )
{
error_string = QString( "EB \"%1\" function error: %2 (%3)" )
.arg( func )
.arg( QTextCodec::codecForLocale()->toUnicode( eb_error_string( code ) ) )
.arg( QTextCodec::codecForLocale()->toUnicode( eb_error_message( code ) ) );
.arg( QString::fromLocal8Bit( eb_error_string( code ) ) )
.arg( QString::fromLocal8Bit( eb_error_message( code ) ) );

if ( currentPosition.page != 0 )
error_string += QString( " on page %1, offset %2" )
Expand Down Expand Up @@ -488,9 +476,6 @@ int EpwingBook::setBook( string const & directory )
setErrorString( "eb_appendix_subbook_list", ret );
}

if ( !codec_Euc || ( book.character_code == EB_CHARCODE_ISO8859_1 && !codec_ISO )
|| ( book.character_code == EB_CHARCODE_JISX0208_GB2312 && !codec_GB ) )
throw exEpwing( "No required codec to decode dictionary" );

rootDir = QString::fromStdString( directory );

Expand Down Expand Up @@ -657,10 +642,7 @@ QString EpwingBook::title()
}

buf[ EB_MAX_TITLE_LENGTH ] = 0;
if ( codec_Euc )
return codec_Euc->toUnicode( buf );

return {};
return Iconv::toQString( codec_Euc_name, buf, strlen( buf ) );
}

QString EpwingBook::copyright()
Expand Down Expand Up @@ -1086,14 +1068,12 @@ bool EpwingBook::isHeadwordCorrect( QString const & headword )
if ( headword.isEmpty() )
return false;

if ( book.character_code == EB_CHARCODE_ISO8859_1 && codec_ISO )
buf = codec_ISO->fromUnicode( headword );
else if ( ( book.character_code == EB_CHARCODE_JISX0208 || book.character_code == EB_CHARCODE_JISX0208_GB2312 )
&& codec_Euc )
buf = codec_Euc->fromUnicode( headword );

if ( book.character_code == EB_CHARCODE_JISX0208_GB2312 && codec_GB )
buf2 = codec_GB->fromUnicode( headword );
if ( book.character_code == EB_CHARCODE_ISO8859_1 )
buf = Iconv::fromUnicode( headword, codec_ISO_name );
else if ( ( book.character_code == EB_CHARCODE_JISX0208 || book.character_code == EB_CHARCODE_JISX0208_GB2312 ) )
buf = Iconv::fromUnicode( headword, codec_Euc_name );
if ( book.character_code == EB_CHARCODE_JISX0208_GB2312 )
buf2 = Iconv::fromUnicode( headword, codec_GB_name );

if ( !buf.isEmpty() && eb_search_exactword( &book, buf.data() ) == EB_SUCCESS ) {
ret = eb_hit_list( &book, 2, hits, &hit_count );
Expand Down Expand Up @@ -1846,9 +1826,7 @@ QString EpwingBook::currentCandidate()
const char * s = eb_current_candidate( &book );
if ( book.character_code == EB_CHARCODE_ISO8859_1 )
return QString::fromLatin1( s );
if ( codec_Euc )
return codec_Euc->toUnicode( s );
return QString{};
return Iconv::toQString( codec_Euc_name, s, strlen( s ) );
}

bool EpwingBook::getMatches( QString word, QList< QString > & matches )
Expand All @@ -1857,14 +1835,13 @@ bool EpwingBook::getMatches( QString word, QList< QString > & matches )
EB_Hit hits[ HitsBufferSize ];
int hitCount = 0;

if ( book.character_code == EB_CHARCODE_ISO8859_1 && codec_ISO )
bword = codec_ISO->fromUnicode( word );
else if ( ( book.character_code == EB_CHARCODE_JISX0208 || book.character_code == EB_CHARCODE_JISX0208_GB2312 )
&& codec_Euc )
bword = codec_Euc->fromUnicode( word );
if ( book.character_code == EB_CHARCODE_ISO8859_1 )
bword = Iconv::fromUnicode( word, codec_ISO_name );
else if ( ( book.character_code == EB_CHARCODE_JISX0208 || book.character_code == EB_CHARCODE_JISX0208_GB2312 ) )
bword = Iconv::fromUnicode( word, codec_Euc_name );

if ( book.character_code == EB_CHARCODE_JISX0208_GB2312 && codec_GB )
bword2 = codec_GB->fromUnicode( word );
if ( book.character_code == EB_CHARCODE_JISX0208_GB2312 )
bword2 = Iconv::fromUnicode( word, codec_GB_name );

if ( !bword.isEmpty() ) {
EB_Error_Code ret = eb_search_word( &book, bword.data() );
Expand Down Expand Up @@ -1928,14 +1905,13 @@ bool EpwingBook::getArticlePos( QString word, QList< int > & pages, QList< int >
EB_Hit hits[ HitsBufferSize ];
int hitCount = 0;

if ( book.character_code == EB_CHARCODE_ISO8859_1 && codec_ISO )
bword = codec_ISO->fromUnicode( word );
else if ( ( book.character_code == EB_CHARCODE_JISX0208 || book.character_code == EB_CHARCODE_JISX0208_GB2312 )
&& codec_Euc )
bword = codec_Euc->fromUnicode( word );
if ( book.character_code == EB_CHARCODE_ISO8859_1 )
bword = Iconv::fromUnicode( word, codec_ISO_name );
else if ( ( book.character_code == EB_CHARCODE_JISX0208 || book.character_code == EB_CHARCODE_JISX0208_GB2312 ) )
bword = Iconv::fromUnicode( word, codec_Euc_name );

if ( book.character_code == EB_CHARCODE_JISX0208_GB2312 && codec_GB )
bword2 = codec_GB->fromUnicode( word );
if ( book.character_code == EB_CHARCODE_JISX0208_GB2312 )
bword2 = Iconv::fromUnicode( word, codec_GB_name );

if ( !bword.isEmpty() ) {
EB_Error_Code ret = eb_search_exactword( &book, bword.data() );
Expand Down
18 changes: 1 addition & 17 deletions src/dict/epwing_book.hh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#endif

#include <QString>
#include <QtCore5Compat/QTextCodec>

// POSIX symbol unavailable on Windows needed for eb headers
#ifdef Q_OS_WIN
Expand Down Expand Up @@ -72,7 +71,6 @@ class EpwingBook
QString mainCacheDir, rootDir;
QString cacheImagesDir, cacheSoundsDir, cacheMoviesDir, cacheFontsDir;
QString dictID;
QTextCodec *codec_ISO, *codec_GB, *codec_Euc;
QStack< unsigned int > decorationStack;
int monoWidth, monoHeight;
QStringList imageCacheList, soundsCacheList, moviesCacheList, fontsCacheList;
Expand Down Expand Up @@ -110,6 +108,7 @@ class EpwingBook
QByteArray codeToUnicode( QString const & code );

public:
const char *codec_ISO_name, *codec_GB_name, *codec_Euc_name;

enum DecorationCodes {
UNKNOWN = 0,
Expand All @@ -133,21 +132,6 @@ public:
return error_string;
}

QTextCodec * codecISO()
{
return codec_ISO;
}

QTextCodec * codecGB()
{
return codec_GB;
}

QTextCodec * codecEuc()
{
return codec_Euc;
}

int getSubBookCount()
{
return subBookCount;
Expand Down
6 changes: 2 additions & 4 deletions src/dict/mdictparser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,11 @@
#include <QDomDocument>
#include <QTextDocumentFragment>
#include <QDataStream>
#include <QtCore5Compat/QTextCodec>

#include "decompress.hh"
#include "ripemd.hh"
#include "utils.hh"
#include "htmlescape.hh"
#include "iconv.hh"

namespace Mdict {

Expand Down Expand Up @@ -187,8 +186,7 @@ QString MdictParser::toUtf16( const char * fromCode, const char * from, size_t f
return QString();
}

QTextCodec * codec = QTextCodec::codecForName( fromCode );
return codec->toUnicode( from, fromSize );
return Iconv::toQString( fromCode, from, fromSize );
}

bool MdictParser::decryptHeadWordIndex( char * buffer, qint64 len )
Expand Down
8 changes: 1 addition & 7 deletions src/dict/website.cc
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,6 @@ class WebSiteArticleRequest: public WebSiteDataRequestSlots
private:

void requestFinished( QNetworkReply * ) override;
static QTextCodec * codecForHtml( QByteArray const & ba );
};

void WebSiteArticleRequest::cancel()
Expand Down Expand Up @@ -152,11 +151,6 @@ WebSiteArticleRequest::WebSiteArticleRequest( QString const & url_, QNetworkAcce
#endif
}

QTextCodec * WebSiteArticleRequest::codecForHtml( QByteArray const & ba )
{
return QTextCodec::codecForHtml( ba, 0 );
}

void WebSiteArticleRequest::requestFinished( QNetworkReply * r )
{
if ( isFinished() ) { // Was cancelled
Expand Down Expand Up @@ -188,7 +182,7 @@ void WebSiteArticleRequest::requestFinished( QNetworkReply * r )
QByteArray replyData = netReply->readAll();
QString articleString;

QTextCodec * codec = WebSiteArticleRequest::codecForHtml( replyData );
QTextCodec * codec = QTextCodec::codecForHtml( replyData, 0 );
if ( codec ) {
articleString = codec->toUnicode( replyData );
}
Expand Down
8 changes: 4 additions & 4 deletions src/iframeschemehandler.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "iframeschemehandler.hh"

#include <QTextCodec>
#include "iconv.hh"

IframeSchemeHandler::IframeSchemeHandler( QObject * parent ):
QWebEngineUrlSchemeHandler( parent )
Expand Down Expand Up @@ -36,9 +36,9 @@ void IframeSchemeHandler::requestStarted( QWebEngineUrlRequestJob * requestJob )
QByteArray replyData = reply->readAll();
QString articleString;

QTextCodec * codec = QTextCodec::codecForUtfText( replyData, QTextCodec::codecForName( codecName.toUtf8() ) );
if ( codec ) {
articleString = codec->toUnicode( replyData );
auto encoding = Iconv::findValidEncoding( { codecName } );
if ( !encoding.isEmpty() ) {
articleString = Iconv::toQString( encoding.toUtf8().constData(), replyData.data(), replyData.size() );
}
else {
articleString = QString::fromUtf8( replyData );
Expand Down
Loading