From b835d5b852b41df0b552e79b662e7f0e59fa9660 Mon Sep 17 00:00:00 2001 From: Oskari Noppa Date: Tue, 19 Mar 2024 00:47:11 +0200 Subject: [PATCH] Optimize input reading and output writing This commit aims to fix issue #20. Use the Emscripten FS.writeFile API for accepting XML input files, instead of the createDataFile and especially the intArrayFromString function. Those were inherited from the parent upstream project, but this writeFile API seems to be simpler to use and performs better. The bigger fix, though, is in the output side, as pushing one piece of stdout (I guess it was pushing one byte at a time?) caused the stdoutBuffer array to eventually grow so large that it'd throw > RangeError [Error]: Invalid array length when the output was very big, like when normalizing a big input XML, as described in #20. Here, too, we can switch to the print/printErr APIs, which seem to be not only simpler but also more resilient to the input size growing. --- src/worker-post.js | 25 +++++++++++++++---------- src/worker-pre.js | 2 +- test/test-valid-c14n.xml | 2 +- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/worker-post.js b/src/worker-post.js index 9362405..216811b 100644 --- a/src/worker-post.js +++ b/src/worker-post.js @@ -3,18 +3,14 @@ const {parentPort} = require('worker_threads'); // #endif - function bytesToUtf8(buffer) { - return new TextDecoder().decode(Uint8Array.from(buffer)); - } - - const stdoutBuffer = []; - const stderrBuffer = []; + let stdout = ''; + let stderr = ''; function onExit(exitCode) { const message = { exitCode, - stdout: bytesToUtf8(stdoutBuffer), - stderr: bytesToUtf8(stderrBuffer), + stdout, + stderr, }; // #ifdef node parentPort.postMessage(message); @@ -37,8 +33,17 @@ Module({ inputFiles: data.inputFiles, arguments: data.args, - stderr: stderrBuffer.push.bind(stderrBuffer), - stdout: stdoutBuffer.push.bind(stdoutBuffer), + // TODO: We could eagerly start sending stdout to the parent thread while + // waiting for more. Or we could probably use some other, more efficient + // Emscripten API for output communication in the first place. + // But this seems to work fine for now, better than pushing the stdout + // values to an array. + print(text) { + stdout += text + '\n'; + }, + printErr(text) { + stderr += text + '\n'; + }, onExit, wasmMemory, // #ifdef browser diff --git a/src/worker-pre.js b/src/worker-pre.js index e36995c..3f38150 100644 --- a/src/worker-pre.js +++ b/src/worker-pre.js @@ -1,5 +1,5 @@ Module['preRun'] = function () { Module['inputFiles'].forEach(function(inputFile) { - FS.createDataFile('/', inputFile['fileName'], intArrayFromString(inputFile['contents']), true, true); + FS.writeFile('/' + inputFile['fileName'], inputFile['contents']); }); }; diff --git a/test/test-valid-c14n.xml b/test/test-valid-c14n.xml index d282bfc..d86db9c 100644 --- a/test/test-valid-c14n.xml +++ b/test/test-valid-c14n.xml @@ -28,4 +28,4 @@ 1999-05-21 - \ No newline at end of file +