From 6574f502e0c36db3901eaa3b9fcfa873a6623b35 Mon Sep 17 00:00:00 2001
From: "Douglas Cerna (Soy Douglas)" <douglascerna@yahoo.com>
Date: Wed, 11 Sep 2024 16:45:59 +0000
Subject: [PATCH 1/4] Add test for decoding non utf-8 output

---
 .../test_execute_functions.py                  | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/archivematicaCommon/test_execute_functions.py b/tests/archivematicaCommon/test_execute_functions.py
index 0effb27190..2273832fa3 100644
--- a/tests/archivematicaCommon/test_execute_functions.py
+++ b/tests/archivematicaCommon/test_execute_functions.py
@@ -1,6 +1,7 @@
 import shlex
 import tempfile
 from unittest.mock import ANY
+from unittest.mock import Mock
 from unittest.mock import patch
 
 import executeOrRunSubProcess as execsub
@@ -89,3 +90,20 @@ def test_createAndRunScript_creates_tmpfile_in_custom_dir(launchSubProcess, temp
     )
     args, _ = launchSubProcess.call_args
     assert args[0][0].startswith(temp_path)
+
+
+@patch("subprocess.Popen")
+def test_launchSubProcess_replaces_non_utf8_output_with_replacement_characters(Popen):
+    communicate_return_code = 0
+    communicate_output = b"Output \xae"
+    communicate_error = b"Error \xae"
+    Popen.return_value = Mock(
+        returncode=communicate_return_code,
+        **{"communicate.return_value": (communicate_output, communicate_error)},
+    )
+
+    code, stdout, stderr = execsub.launchSubProcess("mycommand", capture_output=True)
+
+    assert code == communicate_return_code
+    assert stdout == communicate_output.decode(errors="replace")
+    assert stderr == communicate_error.decode(errors="replace")

From c42a26939e4f2bd1a87838fb262072ba6f7dcab5 Mon Sep 17 00:00:00 2001
From: "Douglas Cerna (Soy Douglas)" <douglascerna@yahoo.com>
Date: Wed, 11 Sep 2024 16:55:15 +0000
Subject: [PATCH 2/4] Escape output and error in launchSubProcess

---
 src/archivematicaCommon/lib/executeOrRunSubProcess.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/archivematicaCommon/lib/executeOrRunSubProcess.py b/src/archivematicaCommon/lib/executeOrRunSubProcess.py
index 86a3778906..b23bcfaedc 100644
--- a/src/archivematicaCommon/lib/executeOrRunSubProcess.py
+++ b/src/archivematicaCommon/lib/executeOrRunSubProcess.py
@@ -22,6 +22,8 @@
 import sys
 import tempfile
 
+from archivematicaFunctions import escape
+
 
 def launchSubProcess(
     command,
@@ -103,8 +105,8 @@ def launchSubProcess(
                 env=my_env,
             )
             std_out, std_error = p.communicate(input=communicate_input)
-            stdOut = std_out.decode()
-            stdError = std_error.decode()
+            stdOut = escape(std_out)
+            stdError = escape(std_error)
         else:
             # Ignore the stdout of the subprocess, capturing only stderr
             with open(os.devnull, "w") as devnull:
@@ -116,7 +118,7 @@ def launchSubProcess(
                     stderr=subprocess.PIPE,
                 )
                 __, std_error = p.communicate(input=communicate_input)
-                stdError = std_error.decode()
+                stdError = escape(std_error)
         retcode = p.returncode
         # If we are not capturing output and the subprocess has succeeded, set
         # its stderr to the empty string.

From 39fb35df96c0ccd3bd23a35805a8bea9cb18c09c Mon Sep 17 00:00:00 2001
From: "Douglas Cerna (Soy Douglas)" <douglascerna@yahoo.com>
Date: Wed, 11 Sep 2024 17:29:46 +0000
Subject: [PATCH 3/4] Annotate tests

---
 pyproject.toml                                   |  1 +
 .../test_execute_functions.py                    | 16 +++++++++++-----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 86d11bc484..926b62045c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -64,6 +64,7 @@ module = [
     "src.MCPClient.lib.clientScripts.policy_check",
     "src.MCPClient.lib.clientScripts.transcribe_file",
     "src.MCPClient.lib.clientScripts.validate_file",
+    "tests.archivematicaCommon.test_execute_functions",
     "tests.dashboard.fpr.test_views",
     "tests.MCPClient.conftest",
     "tests.MCPClient.test_characterize_file",
diff --git a/tests/archivematicaCommon/test_execute_functions.py b/tests/archivematicaCommon/test_execute_functions.py
index 2273832fa3..0f1fa41b91 100644
--- a/tests/archivematicaCommon/test_execute_functions.py
+++ b/tests/archivematicaCommon/test_execute_functions.py
@@ -1,5 +1,7 @@
+import pathlib
 import shlex
 import tempfile
+from typing import Generator
 from unittest.mock import ANY
 from unittest.mock import Mock
 from unittest.mock import patch
@@ -8,7 +10,7 @@
 import pytest
 
 
-def test_capture_output():
+def test_capture_output() -> None:
     """Tests behaviour of capture_output when executing sub processes."""
 
     # Test that stdout and stderr are not captured by default
@@ -62,7 +64,7 @@ def test_capture_output():
 
 
 @pytest.fixture
-def temp_path(tmp_path):
+def temp_path(tmp_path: pathlib.Path) -> Generator[str, None, None]:
     """Creates custom temp path, yields the value, and resets to original value."""
 
     original_tempdir = tempfile.tempdir
@@ -74,7 +76,9 @@ def temp_path(tmp_path):
 
 
 @patch("executeOrRunSubProcess.launchSubProcess")
-def test_createAndRunScript_creates_tmpfile_in_custom_dir(launchSubProcess, temp_path):
+def test_createAndRunScript_creates_tmpfile_in_custom_dir(
+    launchSubProcess: Mock, temp_path: str
+) -> None:
     """Tests execution of launchSubProcess when executing createAndRunScript."""
 
     script_content = "#!/bin/bash\necho 'Script output'\nexit 0"
@@ -93,11 +97,13 @@ def test_createAndRunScript_creates_tmpfile_in_custom_dir(launchSubProcess, temp
 
 
 @patch("subprocess.Popen")
-def test_launchSubProcess_replaces_non_utf8_output_with_replacement_characters(Popen):
+def test_launchSubProcess_replaces_non_utf8_output_with_replacement_characters(
+    popen: Mock,
+) -> None:
     communicate_return_code = 0
     communicate_output = b"Output \xae"
     communicate_error = b"Error \xae"
-    Popen.return_value = Mock(
+    popen.return_value = Mock(
         returncode=communicate_return_code,
         **{"communicate.return_value": (communicate_output, communicate_error)},
     )

From f279191ecd65d6d1841dbfe52097c4fd489eb46e Mon Sep 17 00:00:00 2001
From: "Douglas Cerna (Soy Douglas)" <douglascerna@yahoo.com>
Date: Wed, 11 Sep 2024 18:58:53 +0000
Subject: [PATCH 4/4] Annotate executeOrRunSubProcess module

---
 pyproject.toml                                |  1 +
 .../lib/executeOrRunSubProcess.py             | 66 +++++++++++++------
 2 files changed, 46 insertions(+), 21 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 926b62045c..37e4161032 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,6 +56,7 @@ warn_unused_configs = true
 
 [[tool.mypy.overrides]]
 module = [
+    "src.archivematicaCommon.lib.executeOrRunSubProcess",
     "src.MCPClient.lib.client.*",
     "src.MCPClient.lib.clientScripts.characterize_file",
     "src.MCPClient.lib.clientScripts.has_packages",
diff --git a/src/archivematicaCommon/lib/executeOrRunSubProcess.py b/src/archivematicaCommon/lib/executeOrRunSubProcess.py
index b23bcfaedc..0b8ada6cf5 100644
--- a/src/archivematicaCommon/lib/executeOrRunSubProcess.py
+++ b/src/archivematicaCommon/lib/executeOrRunSubProcess.py
@@ -21,18 +21,29 @@
 import subprocess
 import sys
 import tempfile
+from typing import Dict
+from typing import List
+from typing import Optional
+from typing import Tuple
+from typing import Union
 
 from archivematicaFunctions import escape
 
+Arguments = List[str]
+Input = Union[str, bytes, io.IOBase]
+Environment = Dict[str, str]
+Command = Union[str, List[str]]
+Result = Tuple[int, str, str]
+
 
 def launchSubProcess(
-    command,
-    stdIn="",
-    printing=True,
-    arguments=None,
-    env_updates=None,
-    capture_output=False,
-):
+    command: Command,
+    stdIn: Input = "",
+    printing: bool = True,
+    arguments: Optional[Arguments] = None,
+    env_updates: Optional[Environment] = None,
+    capture_output: bool = False,
+) -> Result:
     """
     Launches a subprocess using ``command``, where ``command`` is either:
     a) a single string containing a commandline statement, or
@@ -91,7 +102,7 @@ def launchSubProcess(
             stdin_pipe = subprocess.PIPE
             communicate_input = stdIn
         elif isinstance(stdIn, io.IOBase):
-            stdin_pipe = stdIn
+            stdin_pipe = stdIn.fileno()
             communicate_input = None
         else:
             raise Exception("stdIn must be a string or a file object")
@@ -141,8 +152,13 @@ def launchSubProcess(
 
 
 def createAndRunScript(
-    text, stdIn="", printing=True, arguments=None, env_updates=None, capture_output=True
-):
+    text: Command,
+    stdIn: Input = "",
+    printing: bool = True,
+    arguments: Optional[Arguments] = None,
+    env_updates: Optional[Environment] = None,
+    capture_output: bool = True,
+) -> Result:
     if arguments is None:
         arguments = []
     if env_updates is None:
@@ -152,7 +168,10 @@ def createAndRunScript(
         encoding="utf-8", mode="wt", delete=False
     ) as tmpfile:
         os.chmod(tmpfile.name, 0o770)
-        tmpfile.write(text)
+        if isinstance(text, str):
+            tmpfile.write(text)
+        else:
+            tmpfile.write(" ".join(text))
         tmpfile.close()
         cmd = [tmpfile.name]
         cmd.extend(arguments)
@@ -170,14 +189,14 @@ def createAndRunScript(
 
 
 def executeOrRun(
-    type,
-    text,
-    stdIn="",
-    printing=True,
-    arguments=None,
-    env_updates=None,
-    capture_output=True,
-):
+    type: str,
+    text: Command,
+    stdIn: Input = "",
+    printing: bool = True,
+    arguments: Optional[Arguments] = None,
+    env_updates: Optional[Environment] = None,
+    capture_output: bool = True,
+) -> Result:
     """
     Attempts to run the provided command on the shell, with the text of
     "stdIn" passed as standard input if provided. The type parameter
@@ -222,7 +241,9 @@ def executeOrRun(
             capture_output=capture_output,
         )
     if type == "bashScript":
-        text = "#!/bin/bash\n" + text
+        if not isinstance(text, str):
+            raise ValueError("command must be a str")
+        text = f"#!/bin/bash\n{text}"
         return createAndRunScript(
             text,
             stdIn=stdIn,
@@ -232,7 +253,9 @@ def executeOrRun(
             capture_output=capture_output,
         )
     if type == "pythonScript":
-        text = "#!/usr/bin/env python\n" + text
+        if not isinstance(text, str):
+            raise ValueError("command must be a str")
+        text = f"#!/usr/bin/env python\n{text}"
         return createAndRunScript(
             text,
             stdIn=stdIn,
@@ -250,3 +273,4 @@ def executeOrRun(
             env_updates=env_updates,
             capture_output=capture_output,
         )
+    raise ValueError(f"unknown type {type}")