From 6574f502e0c36db3901eaa3b9fcfa873a6623b35 Mon Sep 17 00:00:00 2001 From: "Douglas Cerna (Soy Douglas)" Date: Wed, 11 Sep 2024 16:45:59 +0000 Subject: [PATCH 1/4] Add test for decoding non utf-8 output --- .../test_execute_functions.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/archivematicaCommon/test_execute_functions.py b/tests/archivematicaCommon/test_execute_functions.py index 0effb27190..2273832fa3 100644 --- a/tests/archivematicaCommon/test_execute_functions.py +++ b/tests/archivematicaCommon/test_execute_functions.py @@ -1,6 +1,7 @@ import shlex import tempfile from unittest.mock import ANY +from unittest.mock import Mock from unittest.mock import patch import executeOrRunSubProcess as execsub @@ -89,3 +90,20 @@ def test_createAndRunScript_creates_tmpfile_in_custom_dir(launchSubProcess, temp ) args, _ = launchSubProcess.call_args assert args[0][0].startswith(temp_path) + + +@patch("subprocess.Popen") +def test_launchSubProcess_replaces_non_utf8_output_with_replacement_characters(Popen): + communicate_return_code = 0 + communicate_output = b"Output \xae" + communicate_error = b"Error \xae" + Popen.return_value = Mock( + returncode=communicate_return_code, + **{"communicate.return_value": (communicate_output, communicate_error)}, + ) + + code, stdout, stderr = execsub.launchSubProcess("mycommand", capture_output=True) + + assert code == communicate_return_code + assert stdout == communicate_output.decode(errors="replace") + assert stderr == communicate_error.decode(errors="replace") From c42a26939e4f2bd1a87838fb262072ba6f7dcab5 Mon Sep 17 00:00:00 2001 From: "Douglas Cerna (Soy Douglas)" Date: Wed, 11 Sep 2024 16:55:15 +0000 Subject: [PATCH 2/4] Escape output and error in launchSubProcess --- src/archivematicaCommon/lib/executeOrRunSubProcess.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/archivematicaCommon/lib/executeOrRunSubProcess.py b/src/archivematicaCommon/lib/executeOrRunSubProcess.py index 86a3778906..b23bcfaedc 100644 --- a/src/archivematicaCommon/lib/executeOrRunSubProcess.py +++ b/src/archivematicaCommon/lib/executeOrRunSubProcess.py @@ -22,6 +22,8 @@ import sys import tempfile +from archivematicaFunctions import escape + def launchSubProcess( command, @@ -103,8 +105,8 @@ def launchSubProcess( env=my_env, ) std_out, std_error = p.communicate(input=communicate_input) - stdOut = std_out.decode() - stdError = std_error.decode() + stdOut = escape(std_out) + stdError = escape(std_error) else: # Ignore the stdout of the subprocess, capturing only stderr with open(os.devnull, "w") as devnull: @@ -116,7 +118,7 @@ def launchSubProcess( stderr=subprocess.PIPE, ) __, std_error = p.communicate(input=communicate_input) - stdError = std_error.decode() + stdError = escape(std_error) retcode = p.returncode # If we are not capturing output and the subprocess has succeeded, set # its stderr to the empty string. From 39fb35df96c0ccd3bd23a35805a8bea9cb18c09c Mon Sep 17 00:00:00 2001 From: "Douglas Cerna (Soy Douglas)" Date: Wed, 11 Sep 2024 17:29:46 +0000 Subject: [PATCH 3/4] Annotate tests --- pyproject.toml | 1 + .../test_execute_functions.py | 16 +++++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 86d11bc484..926b62045c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,6 +64,7 @@ module = [ "src.MCPClient.lib.clientScripts.policy_check", "src.MCPClient.lib.clientScripts.transcribe_file", "src.MCPClient.lib.clientScripts.validate_file", + "tests.archivematicaCommon.test_execute_functions", "tests.dashboard.fpr.test_views", "tests.MCPClient.conftest", "tests.MCPClient.test_characterize_file", diff --git a/tests/archivematicaCommon/test_execute_functions.py b/tests/archivematicaCommon/test_execute_functions.py index 2273832fa3..0f1fa41b91 100644 --- a/tests/archivematicaCommon/test_execute_functions.py +++ b/tests/archivematicaCommon/test_execute_functions.py @@ -1,5 +1,7 @@ +import pathlib import shlex import tempfile +from typing import Generator from unittest.mock import ANY from unittest.mock import Mock from unittest.mock import patch @@ -8,7 +10,7 @@ import pytest -def test_capture_output(): +def test_capture_output() -> None: """Tests behaviour of capture_output when executing sub processes.""" # Test that stdout and stderr are not captured by default @@ -62,7 +64,7 @@ def test_capture_output(): @pytest.fixture -def temp_path(tmp_path): +def temp_path(tmp_path: pathlib.Path) -> Generator[str, None, None]: """Creates custom temp path, yields the value, and resets to original value.""" original_tempdir = tempfile.tempdir @@ -74,7 +76,9 @@ def temp_path(tmp_path): @patch("executeOrRunSubProcess.launchSubProcess") -def test_createAndRunScript_creates_tmpfile_in_custom_dir(launchSubProcess, temp_path): +def test_createAndRunScript_creates_tmpfile_in_custom_dir( + launchSubProcess: Mock, temp_path: str +) -> None: """Tests execution of launchSubProcess when executing createAndRunScript.""" script_content = "#!/bin/bash\necho 'Script output'\nexit 0" @@ -93,11 +97,13 @@ def test_createAndRunScript_creates_tmpfile_in_custom_dir(launchSubProcess, temp @patch("subprocess.Popen") -def test_launchSubProcess_replaces_non_utf8_output_with_replacement_characters(Popen): +def test_launchSubProcess_replaces_non_utf8_output_with_replacement_characters( + popen: Mock, +) -> None: communicate_return_code = 0 communicate_output = b"Output \xae" communicate_error = b"Error \xae" - Popen.return_value = Mock( + popen.return_value = Mock( returncode=communicate_return_code, **{"communicate.return_value": (communicate_output, communicate_error)}, ) From f279191ecd65d6d1841dbfe52097c4fd489eb46e Mon Sep 17 00:00:00 2001 From: "Douglas Cerna (Soy Douglas)" Date: Wed, 11 Sep 2024 18:58:53 +0000 Subject: [PATCH 4/4] Annotate executeOrRunSubProcess module --- pyproject.toml | 1 + .../lib/executeOrRunSubProcess.py | 66 +++++++++++++------ 2 files changed, 46 insertions(+), 21 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 926b62045c..37e4161032 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,7 @@ warn_unused_configs = true [[tool.mypy.overrides]] module = [ + "src.archivematicaCommon.lib.executeOrRunSubProcess", "src.MCPClient.lib.client.*", "src.MCPClient.lib.clientScripts.characterize_file", "src.MCPClient.lib.clientScripts.has_packages", diff --git a/src/archivematicaCommon/lib/executeOrRunSubProcess.py b/src/archivematicaCommon/lib/executeOrRunSubProcess.py index b23bcfaedc..0b8ada6cf5 100644 --- a/src/archivematicaCommon/lib/executeOrRunSubProcess.py +++ b/src/archivematicaCommon/lib/executeOrRunSubProcess.py @@ -21,18 +21,29 @@ import subprocess import sys import tempfile +from typing import Dict +from typing import List +from typing import Optional +from typing import Tuple +from typing import Union from archivematicaFunctions import escape +Arguments = List[str] +Input = Union[str, bytes, io.IOBase] +Environment = Dict[str, str] +Command = Union[str, List[str]] +Result = Tuple[int, str, str] + def launchSubProcess( - command, - stdIn="", - printing=True, - arguments=None, - env_updates=None, - capture_output=False, -): + command: Command, + stdIn: Input = "", + printing: bool = True, + arguments: Optional[Arguments] = None, + env_updates: Optional[Environment] = None, + capture_output: bool = False, +) -> Result: """ Launches a subprocess using ``command``, where ``command`` is either: a) a single string containing a commandline statement, or @@ -91,7 +102,7 @@ def launchSubProcess( stdin_pipe = subprocess.PIPE communicate_input = stdIn elif isinstance(stdIn, io.IOBase): - stdin_pipe = stdIn + stdin_pipe = stdIn.fileno() communicate_input = None else: raise Exception("stdIn must be a string or a file object") @@ -141,8 +152,13 @@ def launchSubProcess( def createAndRunScript( - text, stdIn="", printing=True, arguments=None, env_updates=None, capture_output=True -): + text: Command, + stdIn: Input = "", + printing: bool = True, + arguments: Optional[Arguments] = None, + env_updates: Optional[Environment] = None, + capture_output: bool = True, +) -> Result: if arguments is None: arguments = [] if env_updates is None: @@ -152,7 +168,10 @@ def createAndRunScript( encoding="utf-8", mode="wt", delete=False ) as tmpfile: os.chmod(tmpfile.name, 0o770) - tmpfile.write(text) + if isinstance(text, str): + tmpfile.write(text) + else: + tmpfile.write(" ".join(text)) tmpfile.close() cmd = [tmpfile.name] cmd.extend(arguments) @@ -170,14 +189,14 @@ def createAndRunScript( def executeOrRun( - type, - text, - stdIn="", - printing=True, - arguments=None, - env_updates=None, - capture_output=True, -): + type: str, + text: Command, + stdIn: Input = "", + printing: bool = True, + arguments: Optional[Arguments] = None, + env_updates: Optional[Environment] = None, + capture_output: bool = True, +) -> Result: """ Attempts to run the provided command on the shell, with the text of "stdIn" passed as standard input if provided. The type parameter @@ -222,7 +241,9 @@ def executeOrRun( capture_output=capture_output, ) if type == "bashScript": - text = "#!/bin/bash\n" + text + if not isinstance(text, str): + raise ValueError("command must be a str") + text = f"#!/bin/bash\n{text}" return createAndRunScript( text, stdIn=stdIn, @@ -232,7 +253,9 @@ def executeOrRun( capture_output=capture_output, ) if type == "pythonScript": - text = "#!/usr/bin/env python\n" + text + if not isinstance(text, str): + raise ValueError("command must be a str") + text = f"#!/usr/bin/env python\n{text}" return createAndRunScript( text, stdIn=stdIn, @@ -250,3 +273,4 @@ def executeOrRun( env_updates=env_updates, capture_output=capture_output, ) + raise ValueError(f"unknown type {type}")