Skip to content

Commit

Permalink
Implemented fixes for supporting webpack; fixed bug in previous commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Balearica committed Aug 27, 2024
1 parent dd99124 commit 3051366
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 34 deletions.
14 changes: 8 additions & 6 deletions js/fontContainerMain.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,19 @@ import { gs } from './generalWorkerMain.js';
* @param {string} fileName
*/
async function fontPathToArrayBuffer(fileName) {
const browserMode = typeof process === 'undefined';
const absPath = getFontAbsPath(fileName);

if (browserMode) {
if (typeof process === 'undefined') {
const res = await fetch(absPath);
return res.arrayBuffer();
// Important: Do not remove `else` statement.
// Some build tools (Webpack) need it to avoid trying to `node:fs` in the browser.
// eslint-disable-next-line no-else-return
} else {
const { readFileSync } = await import('node:fs');
const res = readFileSync(absPath);
return res.buffer;
}
const { readFileSync } = await import('node:fs');
const res = readFileSync(absPath);
return res.buffer;
}

async function fontPathToArrayBufferAll(fileNameObj) {
Expand Down Expand Up @@ -117,7 +120,6 @@ export async function loadChiSimFont() {
* This should be used when switching from unvalidated to validated optimized fonts.
*/
export async function enableFontOpt(enable, useInitial = false, forceWorkerUpdate = false) {

// Enable/disable optimized font
if (enable && useInitial && fontAll.optInitial) {
fontAll.active = fontAll.optInitial;
Expand Down
12 changes: 5 additions & 7 deletions js/fontEval.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ import { DebugData, fontMetricsObj, pageMetricsArr } from './containers/dataCont
import { fontAll } from './containers/fontContainer.js';
import { ImageCache } from './containers/imageContainer.js';
import {
enableFontOpt, optimizeFontContainerAll, setDefaultFontAuto, loadBuiltInFontsRaw,
enableFontOpt,
loadBuiltInFontsRaw,
optimizeFontContainerAll, setDefaultFontAuto,
} from './fontContainerMain.js';
import { gs } from './generalWorkerMain.js';

Expand All @@ -15,8 +17,6 @@ import { gs } from './generalWorkerMain.js';
export async function evalPageFonts(font, pageArr, n = 500) {
if (!gs.scheduler) throw new Error('GeneralScheduler must be defined before this function can run.');

const browserMode = typeof process === 'undefined';

let metricTotal = 0;
let wordsTotal = 0;

Expand All @@ -28,7 +28,7 @@ export async function evalPageFonts(font, pageArr, n = 500) {
// The Node.js canvas package does not currently support worke threads
// https://github.com/Automattic/node-canvas/issues/1394
let res;
if (!browserMode) {
if (!(typeof process === 'undefined')) {
const { evalPageFont } = await import('./worker/compareOCRModule.js');

res = await evalPageFont({
Expand Down Expand Up @@ -162,8 +162,6 @@ export async function evaluateFonts(pageArr) {
* and (2) no images are provided to compare against.
*/
export async function runFontOptimization(ocrArr) {
const browserMode = typeof process === 'undefined';

await loadBuiltInFontsRaw();

const fontRaw = fontAll.getContainer('raw');
Expand Down Expand Up @@ -195,7 +193,7 @@ export async function runFontOptimization(ocrArr) {
await enableFontOpt(false);

// This step needs to happen here as all fonts must be registered before initializing the canvas.
if (!browserMode) {
if (!(typeof process === 'undefined')) {
const { initCanvasNode } = await import('./worker/compareOCRModule.js');
await initCanvasNode();
}
Expand Down
6 changes: 2 additions & 4 deletions js/recognizeConvert.js
Original file line number Diff line number Diff line change
Expand Up @@ -266,12 +266,10 @@ export function checkCharWarn(warnArr) {
const charWarnCt = warnArr.filter((x) => x?.char === 'char_warning').length;
const charGoodCt = warnArr.length - charErrorCt - charWarnCt;

const browserMode = typeof process === 'undefined';

// The UI warning/error messages cannot be thrown within this function,
// as that would make this file break when imported into contexts that do not have the main UI.
if (charGoodCt === 0 && charErrorCt > 0) {
if (browserMode) {
if (typeof process === 'undefined') {
const errorHTML = `No character-level OCR data detected. Abbyy XML is only supported with character-level data.
<a href="https://docs.scribeocr.com/faq.html#is-character-level-ocr-data-required--why" target="_blank" class="alert-link">Learn more.</a>`;
opt.errorHandler(errorHTML);
Expand All @@ -281,7 +279,7 @@ export function checkCharWarn(warnArr) {
opt.errorHandler(errorText);
}
} if (charGoodCt === 0 && charWarnCt > 0) {
if (browserMode) {
if (typeof process === 'undefined') {
const warningHTML = `No character-level OCR data detected. Font optimization features will be disabled.
<a href="https://docs.scribeocr.com/faq.html#is-character-level-ocr-data-required--why" target="_blank" class="alert-link">Learn more.</a>`;
opt.warningHandler(warningHTML);
Expand Down
6 changes: 2 additions & 4 deletions js/utils/imageUtils.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
/* eslint-disable no-bitwise */

const browserMode = typeof process === 'undefined';
/**
* Handles various image formats, always returns a ImageBitmap.
*
Expand All @@ -13,7 +11,7 @@ export async function getImageBitmap(img) {
if (img === null) throw new Error('Input is null');

if (typeof img === 'string') {
if (browserMode) {
if (typeof process === 'undefined') {
const imgBlob = imageStrToBlob(img);
const imgBit = await createImageBitmap(imgBlob);
return imgBit;
Expand All @@ -26,7 +24,7 @@ export async function getImageBitmap(img) {
// In Node.js the input is assumed to be already compatible with the `canvas.drawImage` method.
// Additionally, `ImageBitmap` does not exist within the Node canvas package.
// Second condition exists for type detection purposes.
if (!browserMode && (typeof img !== 'string') && (typeof img !== 'number')) return img;
if (!(typeof process === 'undefined') && (typeof img !== 'string') && (typeof img !== 'number')) return img;

return img;
}
Expand Down
8 changes: 3 additions & 5 deletions js/worker/compareOCRModule.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ import { drawWordActual, drawWordRender } from './renderWordCanvas.js';
import { fontAll } from '../containers/fontContainer.js';
// import { CompDebug } from '../objects/imageObjects.js';

const browserMode = typeof process === 'undefined';

/** @type {OffscreenCanvasRenderingContext2D} */
let calcCtx;
/** @type {OffscreenCanvasRenderingContext2D} */
Expand All @@ -22,7 +20,7 @@ let viewCtx1;
let viewCtx2;

// Browser case
if (browserMode) {
if (typeof process === 'undefined') {
// For whatever reason, this can fail silently in some browsers that do not support OffscreenCanvas, where the worker simply stops running.
// Therefore, an explicit error message is added here to make the issue evident. Features will still fail, so this is not a fix.
try {
Expand Down Expand Up @@ -327,7 +325,7 @@ export async function evalWords({
/** @type {?CompDebugBrowser|CompDebugNode} */
let debugImg = null;
if (view) {
if (browserMode) {
if (typeof process === 'undefined') {
const imageRaw = await viewCtx0.canvas.convertToBlob();
const imageA = await viewCtx1.canvas.convertToBlob();
const imageB = await viewCtx2.canvas.convertToBlob();
Expand Down Expand Up @@ -979,7 +977,7 @@ export async function checkWords(wordsA, binaryImage, imageRotated, pageMetricsO
tessedit_pageseg_mode: '6', // "Single block"
};

const inputImage = browserMode ? await calcCtx.canvas.convertToBlob() : await calcCtx.canvas.toBuffer('image/png');
const inputImage = typeof process === 'undefined' ? await calcCtx.canvas.convertToBlob() : await calcCtx.canvas.toBuffer('image/png');

let res;
if (options.tessScheduler) {
Expand Down
5 changes: 2 additions & 3 deletions js/worker/generalWorker.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@ import {
import { optimizeFont } from './optimizeFontModule.js';

// import Tesseract from "../../tess/tesseract.esm.min.js";
const browserMode = typeof process === 'undefined';

const Tesseract = browserMode ? (await import('../../tess/tesseract.esm.min.js')).default : await import('@scribe.js/tesseract.js');
const Tesseract = typeof process === 'undefined' ? (await import('../../tess/tesseract.esm.min.js')).default : await import('@scribe.js/tesseract.js');

const defaultConfigs = {
// TODO: Add back support for multiple PSM modes.
Expand Down Expand Up @@ -64,7 +63,7 @@ const corePath = vanillaMode_ ? '../tess/core_vanilla/' : '../tess/core/';

// Custom build is currently only used for browser version, while the Node.js version uses the published npm package.
// If recognition capabilities are ever added for the Node.js version, then we should use the same build for consistency. .
const tessConfig = browserMode ? {
const tessConfig = typeof process === 'undefined' ? {
corePath,
workerPath: '../../tess/worker.min.js',
// langPath: '/tess/tessdata_dist',
Expand Down
9 changes: 5 additions & 4 deletions scribe.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@ import { ImageCache } from './js/containers/imageContainer.js';
import coords from './js/coordinates.js';
import { drawDebugImages } from './js/debug.js';
import { download, exportData } from './js/export/export.js';
import { writeDebugCsv, convertToCSV } from './js/export/exportDebugCsv.js';
import { convertToCSV, writeDebugCsv } from './js/export/exportDebugCsv.js';
import { extractSingleTableContent } from './js/export/exportWriteTabular.js';
import { loadBuiltInFontsRaw, enableFontOpt } from './js/fontContainerMain.js';
import { extractInternalPDFText } from './js/extractPDFText.js';
import { enableFontOpt, loadBuiltInFontsRaw } from './js/fontContainerMain.js';
import { gs } from './js/generalWorkerMain.js';
import { importFiles, importFilesSupp } from './js/import/import.js';
import { calcBoxOverlap, combineOCRPage } from './js/modifyOCR.js';
Expand All @@ -30,7 +31,6 @@ import { imageStrToBlob } from './js/utils/imageUtils.js';
import { countSubstringOccurrences, getRandomAlphanum, replaceSmartQuotes } from './js/utils/miscUtils.js';
import { calcConf, mergeOcrWords, splitOcrWord } from './js/utils/ocrUtils.js';
import { assignParagraphs } from './js/utils/reflowPars.js';
import { extractInternalPDFText } from './js/extractPDFText.js';

/**
* Initialize the program and optionally pre-load resources.
Expand Down Expand Up @@ -83,7 +83,8 @@ const extractText = async (files, langs = ['eng'], outputFormat = 'txt', options
const skipRecPDFTextOCR = options?.skipRecPDFTextOCR ?? false;
init({ ocr: true, font: true });
await importFiles(files, { extractPDFTextNative: skipRecPDFTextNative, extractPDFTextOCR: skipRecPDFTextOCR });
if (!(ImageCache.pdfType === 'text' && skipRecPDFTextNative || ImageCache.pdfType === 'ocr' && skipRecPDFTextOCR)) await recognize({ langs });
const skipRecPDF = inputData.pdfMode && (ImageCache.pdfType === 'text' && skipRecPDFTextNative || ImageCache.pdfType === 'ocr' && skipRecPDFTextOCR);
if (!skipRecPDF) await recognize({ langs });
return exportData(outputFormat);
};

Expand Down
2 changes: 1 addition & 1 deletion scrollview-web

0 comments on commit 3051366

Please sign in to comment.