Skip to content

Commit

Permalink
Merge pull request #80 from UN-OCHA/develop
Browse files Browse the repository at this point in the history
Fix Arabic OCR
  • Loading branch information
attiks authored Apr 24, 2024
2 parents bfe9bf1 + d7f920e commit 6b90bb6
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 2 deletions.
13 changes: 13 additions & 0 deletions PATCHES/core--drupal--3418098-php-mailer.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
diff --git a/core/lib/Drupal/Core/Mail/Plugin/Mail/PhpMail.php b/core/lib/Drupal/Core/Mail/Plugin/Mail/PhpMail.php
index 09a56006a6..1767dc4875 100644
--- a/core/lib/Drupal/Core/Mail/Plugin/Mail/PhpMail.php
+++ b/core/lib/Drupal/Core/Mail/Plugin/Mail/PhpMail.php
@@ -113,7 +113,7 @@ public function mail(array $message) {
$mail_body = preg_replace('@\r?\n@', $line_endings, $message['body']);
$mail_headers = $headers->toString();

- if (!$this->request->server->has('WINDIR') && !str_contains($this->request->server->get('SERVER_SOFTWARE'), 'Win32')) {
+ if (!$this->request->server->has('WINDIR') && !str_contains($this->request->server->get('SERVER_SOFTWARE', ''), 'Win32')) {
// On most non-Windows systems, the "-f" option to the sendmail command
// is used to set the Return-Path. There is no space between -f and
// the value of the return path.
3 changes: 3 additions & 0 deletions composer.patches.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
"drupal/admin_feedback": {
"https://www.drupal.org/project/admin_feedback/issues/3389123": "https://www.drupal.org/files/issues/2023-09-22/admin_feedback-install_from_config-3112866-2.patch"
},
"drupal/core": {
"https://www.drupal.org/project/drupal/issues/3418098": "PATCHES/core--drupal--3418098-php-mailer.patch"
},
"drupal/csp": {
"Simplify log format": "PATCHES/csp-log-format.patch"
},
Expand Down
15 changes: 13 additions & 2 deletions html/modules/custom/ocha_ai_summarize/ocha_ai_summarize.module
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use GuzzleHttp\Exception\ClientException;
use GuzzleHttp\HandlerStack;
use GuzzleHttp\Middleware;
use OpenAI\Client;
use PhpOffice\PhpWord\Element\TextRun;
use PhpOffice\PhpWord\IOFactory;
use Psr\Http\Message\ResponseInterface;
use Symfony\Component\Process\Process;
Expand Down Expand Up @@ -799,7 +800,16 @@ function ocha_ai_summarize_extract_pages_from_doc($filename, $document_language
}
}
elseif (method_exists($element, 'getText')) {
$text .= $element->getText() . "\n";
$t = $element->getText();
if (is_string($t)) {
$text .= $element->getText() . "\n";
}
elseif ($t instanceof TextRun) {
$text .= $t->getText() . "\n";
}
else {
// Ignore it.
}
}
}

Expand Down Expand Up @@ -1288,7 +1298,7 @@ function ocha_ai_summarize_check_length($text, $bot) {
break;

case 'bedrock':
$max_tokens = 3.5 * 42000;
$max_tokens = 2 * 42000;
break;
}

Expand Down Expand Up @@ -1359,6 +1369,7 @@ function ocha_ai_summarize_get_lang_name($code) {
function ocha_ai_summarize_get_lang_code($code) {
$lang_codes = [
'Arabic' => 'ar',
'ara' => 'ar',
'chi_sim' => 'zh-hans',
'eng' => 'en',
'fra' => 'fr',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,14 @@ public function processItem($data) {
$bot = $data->brain ?? 'openai';
$nid = $data->nid;
$document_language = $data->language ?? 'eng';
if ($document_language == 'Arabic') {
$document_language = 'ara';
}

$output_language = $data->output_language ?? 'eng';
if ($output_language == 'Arabic') {
$output_language = 'ara';
}

if (empty($nid)) {
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ public static function create(ContainerInterface $container, array $configuratio
public function processItem($data) {
$nid = $data->nid;
$document_language = $data->language ?? 'eng';
if ($document_language == 'Arabic') {
$document_language = 'ara';
}

if (empty($nid)) {
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,14 @@ public function processItem($data) {
$nid = $data->nid;
$num_paragraphs = $data->num_paragraphs;
$document_language = $data->language ?? 'eng';
if ($document_language == 'Arabic') {
$document_language = 'ara';
}

$output_language = $data->output_language ?? 'eng';
if ($output_language == 'Arabic') {
$output_language = 'ara';
}

if (empty($nid)) {
return;
Expand Down

0 comments on commit 6b90bb6

Please sign in to comment.