Skip to content

Commit

Permalink
Generalize charset converting
Browse files Browse the repository at this point in the history
  • Loading branch information
linniksa committed Aug 28, 2015
1 parent 3fbee96 commit 7cb5de0
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 22 deletions.
2 changes: 1 addition & 1 deletion src/Fetch/MIME.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public static function decode($text, $targetCharset = 'utf-8')
foreach (imap_mime_header_decode($text) as $word) {
$ch = 'default' === $word->charset ? 'ascii' : $word->charset;

$result .= iconv($ch, $targetCharset, $word->text);
$result .= Message::charsetConvert($word->text, $ch, $targetCharset) ?: $text;
}

return $result;
Expand Down
63 changes: 42 additions & 21 deletions src/Fetch/Message.php
Original file line number Diff line number Diff line change
Expand Up @@ -520,27 +520,8 @@ protected function processStructure($structure, $partIdentifier = null)

$messageBody = self::decode($messageBody, $structure->encoding);

if (!empty($parameters['charset']) && $parameters['charset'] !== self::$charset) {
$mb_converted = false;
if (function_exists('mb_convert_encoding')) {
if (!in_array($parameters['charset'], mb_list_encodings())) {
if ($structure->encoding === 0) {
$parameters['charset'] = 'US-ASCII';
} else {
$parameters['charset'] = 'UTF-8';
}
}

$messageBody = @mb_convert_encoding($messageBody, self::$charset, $parameters['charset']);
$mb_converted = true;
}
if (!$mb_converted) {
$messageBodyConv = @iconv($parameters['charset'], self::$charset . self::$charsetFlag, $messageBody);

if ($messageBodyConv !== false) {
$messageBody = $messageBodyConv;
}
}
if (!empty($parameters['charset'])) {
$messageBody = self::charsetConvert($messageBody, $parameters['charset'], self::$charset) ?: $messageBody;
}

if (strtolower($structure->subtype) === 'plain' || ($structure->type == 1 && strtolower($structure->subtype) !== 'alternative')) {
Expand Down Expand Up @@ -575,6 +556,46 @@ protected function processStructure($structure, $partIdentifier = null)
}
}

/**
* @param string $text
* @param string $from
* @param string $to
*
* @return string|null
*/
public static function charsetConvert($text, $from, $to = null)
{
if (!$text) {
return '';
}

if (null === $to) {
$to = self::$charset;
}

$from = strtolower($from);
$to = strtolower($to);

if ($from === $to) {
return $text;
}

$converted = null;
if (!$converted && function_exists('mb_convert_encoding') && @mb_check_encoding($text, $from)) {
$converted = @mb_convert_encoding($text, $to, $from);
}

if (!$converted && function_exists('iconv')) {
$converted = @iconv($from, $to . self::$charsetFlag, $text);
}

if ($converted) {
return $converted;
}

return null;
}

/**
* This function takes in the message data and encoding type and returns the decoded data.
*
Expand Down
15 changes: 15 additions & 0 deletions tests/Fetch/Test/MessageTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,21 @@ public function testMoveToMailbox()
$this->assertEquals($sentFolderNumStart + 1, $server->numMessages(), 'Message moved into Sent Folder.');
}

public function testCharsetConvert()
{
$this->assertSame('Привет', Message::charsetConvert(
implode(array_map('chr', array(0xF0, 0xD2, 0xC9, 0xD7, 0xC5, 0xD4))),
'koi8-r',
'utf-8'
));

$this->assertSame('test', Message::charsetConvert('test', 'unk1', 'unk1'), 'Same charsets not try converting');
$this->assertSame('', Message::charsetConvert('', 'unk1', 'unk1'), 'Empty text not try converting');

$this->assertSame(null, Message::charsetConvert('test', 'unk1', 'utf-8'), 'Null when source charset is unknown');
$this->assertSame(null, Message::charsetConvert('test', 'utf-8', 'unk1'), 'Null when destination charset is unknown');
}

public function testDecode()
{
$quotedPrintableDecoded = "Now's the time for all folk to come to the aid of their country.";
Expand Down

0 comments on commit 7cb5de0

Please sign in to comment.