From 6a92d55ac2751e3885d58a7b360bbbebee663da3 Mon Sep 17 00:00:00 2001
From: Bernhard Kaindl <bernhard.kaindl@cloud.com>
Date: Mon, 12 Feb 2024 19:23:55 +0100
Subject: [PATCH] CA-388587: Fix filtering the `xapi-clusterd` `db` and log any
 errors (#67)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implemented using TDD for the two main fixes for CA-388587

- also add tests that to make sure that the fix works and keeps working.
- refactor to log errors into the xen-bugtool log instead (don't fully ignore them)
  - Also this latter case is unit-test

Signed-off-by: Bernhard Kaindl <bernhard.kaindl@cloud.com>
Signed-off-by: Edwin Török <edvin.torok@cloud.com>
---
 pytest.ini                                 |  22 ++
 tests/unit/conftest.py                     |  89 ++++++
 tests/unit/test_filter_xapi_clusterd_db.py | 339 +++++++++++++++++++++
 xen-bugtool                                |  31 +-
 4 files changed, 474 insertions(+), 7 deletions(-)
 create mode 100644 tests/unit/test_filter_xapi_clusterd_db.py

diff --git a/pytest.ini b/pytest.ini
index 6197705e..25a9f82c 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -8,3 +8,25 @@ log_cli=True
 log_cli_level=INFO
 # By default, run the tests in the tests directory:
 testpaths=tests/
+
+#
+# Make @pytest.mark.xfail(strict=True) the default:
+#
+# pytest.mark.xfail(strict=True) will fail if the test passes, but the default
+# is to not fail if the test passes.
+#
+# This is a problem because it can lead to tests that are marked as XFAIL, but
+# are actually passing, and we don't notice because they are marked as XFAIL.
+#
+# For example, if we have a test that is marked as XFAIL, but it passes, we
+# will see a XPASS instead of a FAILED. This is a problem because we don't
+# notice that the test is actually passing, and we don't update the test to
+# now be fixed and require it to pass instead from now on.
+#
+# To fix this, we can set the strict parameter to True, so that if a test is
+# marked as XFAIL, but passes, it will fail. This way, we will see a FAILED
+# instead of a PASSED (or XPASS).
+#
+# Make @pytest.mark.xfail(strict=True) the default:
+#
+xfail_strict=True
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index f0ca6068..53c5e896 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -1,5 +1,8 @@
 """tests/unit/conftest.py: pytest fixtures for unit-testing functions in the xen-bugtool script"""
+from __future__ import print_function
+
 import os
+import shutil
 import sys
 
 import pytest
@@ -72,3 +75,89 @@ def bugtool(imported_bugtool):
     imported_bugtool.data = {}
     sys.argv = ["xen-bugtool", "--unlimited"]
     return imported_bugtool
+
+
+@pytest.fixture(scope="function")
+def in_tmpdir(tmpdir):
+    """
+    Run each test function in it's own temporary directory
+
+    This fixture warps pytest's built-in tmpdir fixture with a chdir()
+    to the tmpdir and a check for leftover files after the test returns.
+
+    Usage in a test function:
+
+    @pytest.usefixtures("in_tmpdir")
+    def test_foo(other_fixtures):
+        # ... test code that runs with the tmpdir as its working directory ...
+
+    # If the test function wants to use the in_tmpdir variable:
+    def test_foo(in_tmpdir):
+        in_tmpdir.mkdir("subdir").join("filename").write("content_for_testing")
+        # code under test, that runs with the tmpdir as its working directory:
+        with open("subdir/filename") as f:
+            assert f.read() == "content_for_testing"
+
+    Documentation on the wrapped tmpdir fixture:
+    https://docs.pytest.org/en/6.2.x/tmpdir.html#the-tmpdir-fixture
+    """
+
+    # Get the current directory:
+    curdir = os.getcwd()
+
+    # Change to the temporary directory:
+    os.chdir(str(tmpdir))
+
+    # Run the test:
+    yield tmpdir  # provide the fixture value to the pytest test function
+
+    # Change back to the original directory:
+    os.chdir(curdir)
+
+    # upon return, the tmpdir fixture will cleanup the temporary directory
+
+
+@pytest.fixture(scope="function")
+def bugtool_log(in_tmpdir, bugtool):
+    """Like in_tmpdir and check that no logs are left in XEN_BUGTOOL_LOG"""
+
+    in_tmpdir.mkdir("tmp")  # Create a tmp directory for use by test cases
+
+    in_tmpdir.join(bugtool.XEN_BUGTOOL_LOG).write("")  # create the log file
+
+    # Run the test:
+    yield bugtool  # provide the bugtool to the test function
+
+    log = in_tmpdir.join(bugtool.XEN_BUGTOOL_LOG).read()  # read the log file
+    if log:  # pragma: no cover
+        print("Content of " + bugtool.XEN_BUGTOOL_LOG + ":" + log, file=sys.stderr)
+        pytest.fail("Code under test left logs in " + bugtool.XEN_BUGTOOL_LOG)
+
+    # Cleanup the temporary directory to prepare to check leftover files:
+    os.remove(bugtool.XEN_BUGTOOL_LOG)
+    shutil.rmtree("tmp")
+
+    # Check for files that the code under test might have left:
+    files = in_tmpdir.listdir()
+    if files:  # pragma: no cover
+        print("Files left in temporary working dir:", files, file=sys.stderr)
+        pytest.fail("Code under test left files in the its working directory")
+
+    # upon return, the in_tmpdir switches back to the original directory
+
+
+@pytest.fixture(scope="function")
+def isolated_bugtool(bugtool_log):
+    """
+    Like `bugtool_log` and make the temporary working directory read-only
+    to prevent creating files in it
+    """
+
+    # Make the current cwd (a temporary directory) read-only:
+    os.chmod(".", 0o555)
+
+    yield bugtool_log  # runs the test function in the read-only directory
+
+    os.chmod(".", 0o777)  # restore write permissions (for cleanup)
+
+    # upon return, bugtool_log continues with its cleanup
diff --git a/tests/unit/test_filter_xapi_clusterd_db.py b/tests/unit/test_filter_xapi_clusterd_db.py
new file mode 100644
index 00000000..16511188
--- /dev/null
+++ b/tests/unit/test_filter_xapi_clusterd_db.py
@@ -0,0 +1,339 @@
+"""tests/unit/test_filter_xapi_clusterd_db.py: Test filter_xapi_clusterd_db()
+
+Steps taken for TDD - Test-Driven Develpment - for fixing CA-388587:
+
+To write reliable unit tests, always start writing a failing test. And make
+sure it fails for the right reasons.
+
+Follow the Red, Green, Refactor principle of Test-Driven Development (TDD).
+
+- Write a failing test (Red)
+- make it pass, and    (Green)
+- refactor the code.   (Refactor)
+
+This plan was used for TDD of these test cases:
+-----------------------------------------------
+
+These tests were made to initially fail to cover two current bugtool bugs
+because the filter_xapi_clusterd_db() function is not implemented correctly.
+
+Now fixed bugs in filter_xapi_clusterd_db():
+
+1.  The filter must replace "token": "secret-token", with "token": "REMOVED"
+2.  The filter must also work if the "pems" key is missing
+3.  The filter must also work if "pems.blobs" is missing
+4.  The filter must also work if "authkey" is missing
+    Points 2-4 above apply to "cluster_config" and "old_cluster_config".
+
+Explanation for PASS tests: Those are the cases that are currently passing.
+Explanation for XFAIL tests: Those are the cases that are currently failing
+and must be fixed:
+- They use the pytest.mark.xfail decorator to mark the tests as expected to fail.
+- They are marked with "XFAIL" in the test output.
+- The pytest.mark.xfail decorator must be removed after the bug is fixed
+  to ensure the test is run and passes in the future.
+
+DONE START:
+
+1.  Add additional XFAIL tests, one for each, which fail on the cases above
+[DONE]
+
+2.  Add additional PASS test:
+    The filter must also work if "cluster_config" or/and "old_cluster_config"
+    are missing.
+[DONE]
+
+3.  Add additional PASS test:
+    The filter must also work if the "token" key is missing.
+[DONE]
+
+4.  The changes are added as additional commits and pushed.
+    The individual sub-tests must be shown failing on each the cases above.
+[DONE] (done for each case)
+
+5.  The filter must be updated to handle these cases.
+[DONE] (done for each case)
+
+6.  The PR is then pushed again and then pass.
+[DONE]
+
+    Changes to the test should only be needed if e.g. logging was added
+    to test the logging output.
+
+Ideally, if there is capacity for it, pre-commit hooks should run on every
+commit to ensure that each works without any errors
+
+This can be done by running:
+    git rebase -x 'pre-commit run --from-ref HEAD~ --to-ref HEAD' HEAD~1
+[DONE] (for all commits in this PR)
+
+Replace the `1` with the number of commits to run the pre-commit hooks on.
+This opens an editor with the commits and checks to run on them. When it,
+by mistake contains one or more commits that are not only the ones that
+are in the current PR, remove them all commits from the editor and save
+the file. In this case `git rebase` will do nothing and no commits are changed.
+
+With all of these TODO done, step-by-step, in order, work will be complete.
+
+Tip:
+
+Run `pip install pre-commit` and then use `pre-commit run`
+to check the code and run all unit tests and checks before committing
+and pushing the code.
+
+pre-commit run will:
+- remove trailing whitespace from the code for uniformity
+- fix the code style of the tests with black and of xen-bugtool with darker
+- run unit tests with coverage and check the coverage on the changed lines
+
+If you run `pre-commit install`, it will run on every commit in your own clone.
+
+DONE END.
+
+"""
+
+import json
+import os
+import re
+
+# Minimal example of a xapi-clusterd/db file, with sensitive data
+# BUG: The new cluster_config has no "pems" key, so the filter has to be updated
+# to handle this case, pass data and still filter the "old_cluster_config" key.
+ORIGINAL = r"""
+{
+    "token": "secret-token",
+    "cluster_config": {
+        "pems": {
+            "blobs": "secret-blob"
+        },
+        "authkey": "secret-key"
+    },
+    "old_cluster_config": {
+        "pems": {
+            "blobs": "secret-blob"
+        },
+        "authkey": "secret-key"
+    },
+    "max_config_version": 1
+}"""
+
+# Same as original, but with passwords and private data replaced by: "REMOVED"
+# BUG: "secret-token" has to be replaced by "REMOVED"
+EXPECTED = r"""{
+    "token": "REMOVED",
+    "cluster_config": {
+        "pems": {
+            "blobs": "REMOVED"
+        },
+        "authkey": "REMOVED"
+    },
+    "old_cluster_config": {
+        "pems": {
+            "blobs": "REMOVED"
+        },
+        "authkey": "REMOVED"
+    },
+    "max_config_version": 1
+}"""
+
+
+def assert_filter_xapi_clusterd_db(bugtool, original, expected):
+    """Assert that filter_xapi_clusterd_db() replaces sensitive strings"""
+
+    # -------------------------------------------------------------------------
+    # Common setup:
+    # -------------
+    # - Define the file name of the temporary xAPI clusterd db.
+    #
+    # Notes:
+    # ------
+    # - To allow for checking of logged errors, the bugtool_log fixture will be used.
+    #
+    # - After the test function returns, the bugtool_log fixture checks the log:
+    #
+    #   The bugtool_log fixture will raise an error if the xen-bugtool.log file
+    #   as unexpected content, and the error message will be printed to stderr.
+    #
+    #   This way, cases where errors logged by the code under test can be checked.
+    #
+    temporary_test_clusterd_db = "tmp/xapi_clusterd_db.json"
+    # -------------------------------------------------------------------------
+
+    # -------------------------------------------------------------------------
+    # Prepare:
+    # --------
+    #
+    # 1. Write the original xapi-clusterd/db contents to the temporary file
+    # 2. Patch the bugtool module to use the temporary file for reading the db
+    #
+    with open(temporary_test_clusterd_db, "w") as f:
+        f.write(original)
+
+    bugtool.XAPI_CLUSTERD = temporary_test_clusterd_db
+    # -------------------------------------------------------------------------
+
+    # -------------------------------------------------------------------------
+    # Act:
+    # ----
+    #
+    # -> Call the  filter function
+    #
+    filtered = bugtool.filter_xapi_clusterd_db("_")
+    # -------------------------------------------------------------------------
+
+    # -------------------------------------------------------------------------
+    # Assert:
+    # -------
+    #
+    # -> Compare the filtered output with the expected output
+    #
+    try:
+        assert json.loads(filtered) == json.loads(expected)
+    except ValueError:  # For invalid json (to test handing of corrupted db)
+        assert filtered == expected
+
+    os.remove(temporary_test_clusterd_db)
+    # -------------------------------------------------------------------------
+
+
+def test_pems_blobs(isolated_bugtool):
+    """Assert that filter_xapi_clusterd_db() replaces pems.blobs strings"""
+    assert_filter_xapi_clusterd_db(isolated_bugtool, ORIGINAL, EXPECTED)
+
+
+# CA-358870: filter_xapi_clusterd_db: remove token from the report
+def test_remove_token(isolated_bugtool):
+    """CA-358870: Assert that filter_xapi_clusterd_db() removes the token"""
+
+    # Load the expected output and remove the token from it
+    expected = json.loads(EXPECTED)
+    expected["token"] = "REMOVED"
+    expected_json = json.dumps(expected)
+
+    assert_filter_xapi_clusterd_db(isolated_bugtool, ORIGINAL, expected_json)
+
+
+def test_no_authkey(isolated_bugtool):
+    """Assert that filter_xapi_clusterd_db() handles missing authkey"""
+
+    def remove_authkey(json_data):
+        """Remove the authkey from the clusterd db"""
+        data = json.loads(json_data)
+        # sourcery skip: no-loop-in-tests
+        for key in ["cluster_config", "old_cluster_config"]:
+            if "authkey" in data[key]:
+                data[key].pop("authkey")
+        return json.dumps(data)
+
+    assert_filter_xapi_clusterd_db(
+        isolated_bugtool,
+        remove_authkey(ORIGINAL),
+        remove_authkey(EXPECTED),
+    )
+
+
+def test_no_pems(isolated_bugtool):
+    """Assert that filter_xapi_clusterd_db() handles missing pems"""
+
+    def remove_pems(json_data):
+        """Remove the pems key from the clusterd db"""
+        data = json.loads(json_data)
+        # sourcery skip: no-loop-in-tests
+        for key in ["cluster_config", "old_cluster_config"]:
+            if "pems" in data[key]:
+                data[key].pop("pems")
+        return json.dumps(data)
+
+    assert_filter_xapi_clusterd_db(
+        isolated_bugtool,
+        remove_pems(ORIGINAL),
+        remove_pems(EXPECTED),
+    )
+
+
+def test_no_blobs(isolated_bugtool):
+    """Assert that filter_xapi_clusterd_db() handles missing blobs"""
+
+    def remove_blobs(json_data):
+        """Remove the pems.blobs key from the clusterd db"""
+        data = json.loads(json_data)
+        # sourcery skip: no-loop-in-tests
+        for key in ["cluster_config", "old_cluster_config"]:
+            if "pems" in data[key] and "blobs" in data[key]["pems"]:
+                data[key]["pems"].pop("blobs")
+        return json.dumps(data)
+
+    assert_filter_xapi_clusterd_db(
+        isolated_bugtool,
+        remove_blobs(ORIGINAL),
+        remove_blobs(EXPECTED),
+    )
+
+
+def test_no_config(isolated_bugtool):
+    """Assert that filter_xapi_clusterd_db() handles missing cluster_configs"""
+
+    def remove_cluster_configs(json_data):
+        """Remove the {,old_}cluster_configs keys from the clusterd db"""
+        data = json.loads(json_data)
+        data.pop("cluster_config")
+        data.pop("old_cluster_config")
+        return json.dumps(data)
+
+    assert_filter_xapi_clusterd_db(
+        isolated_bugtool,
+        remove_cluster_configs(ORIGINAL),
+        remove_cluster_configs(EXPECTED),
+    )
+
+
+def test_no_token(isolated_bugtool):
+    """Assert that filter_xapi_clusterd_db() handles missing token"""
+
+    def remove_cluster_token(json_data):
+        """Remove the token key from the clusterd db"""
+        data = json.loads(json_data)
+        data.pop("token")
+        return json.dumps(data)
+
+    assert_filter_xapi_clusterd_db(
+        isolated_bugtool,
+        remove_cluster_token(ORIGINAL),
+        remove_cluster_token(EXPECTED),
+    )
+
+
+def test_no_database(isolated_bugtool):
+    """Assert that filter_xapi_clusterd_db() handles missing token"""
+
+    isolated_bugtool.XAPI_CLUSTERD = "/does/not/exist"
+    assert isolated_bugtool.filter_xapi_clusterd_db("_") == ""
+
+
+def test_invalid_db(isolated_bugtool, capsys):
+    """Assert how filter_xapi_clusterd_db() handles a corrupted db"""
+
+    # The filter must handle invalid json and return the expected output,
+    # which no output, as the db is not readable and the filter should
+    # return an empty string (in an ideal world, it would return an error message)
+    assert_filter_xapi_clusterd_db(isolated_bugtool, "invalid json", "")
+
+    with capsys.disabled():
+        stdout = capsys.readouterr().out
+        assert stdout.startswith("bugtool: Internal error: ")
+        assert "filter_xapi_clusterd_db" in stdout
+        assert "JSON" in stdout
+
+    with open(isolated_bugtool.XEN_BUGTOOL_LOG, "r") as f:
+        log = f.read()
+        assert "filter_xapi_clusterd_db" in log
+        assert "JSON" in log
+
+    # Python2 error message
+    log = re.sub(r"(?s)bugtool: Internal error:.*could be decoded\n\n", "", log)
+
+    # Python3 error message is different
+    log = re.sub(r"(?s)bugtool: Internal error:.*umn 1 \(char 0\)\n\n", "", log)
+
+    with open(isolated_bugtool.XEN_BUGTOOL_LOG, "w") as f:
+        f.write(log)
diff --git a/xen-bugtool b/xen-bugtool
index 92483e21..56e8f1a9 100755
--- a/xen-bugtool
+++ b/xen-bugtool
@@ -491,6 +491,13 @@ def no_unicode(x):
         return x.encode('utf-8')
     return x
 
+
+def log_internal_error(e):
+    """Log an internal error and return an empty string as bugtool output data"""
+    log("bugtool: Internal error: " + traceback.format_exc())
+    return ""  # For now, no diagnostic output in the individual output files
+
+
 def log(x, print_output=True):
     if print_output:
         output(x)
@@ -1502,19 +1509,29 @@ def filter_xenstore_secrets(s, state):
 def filter_xapi_clusterd_db(cap):
     clusterd_data = {}
 
+    if not os.path.exists(XAPI_CLUSTERD):
+        return ""
+
     try:
         with open(XAPI_CLUSTERD, 'r') as f:
             clusterd_data = json.load(f)
-
+    except Exception as e:  # pylint: disable=broad-exception-caught
+        return log_internal_error(e)
+    try:
         config_keys = ['cluster_config', 'old_cluster_config']
         for config_key in config_keys:
-            if config_key in clusterd_data:
-                clusterd_data[config_key]['pems']['blobs'] = "REMOVED"
-                clusterd_data[config_key]['authkey'] = "REMOVED"
-
+            if config_key in clusterd_data.keys():
+                conf = clusterd_data[config_key]
+                if "pems" in conf and "blobs" in conf["pems"]:
+                    conf["pems"]["blobs"] = "REMOVED"
+                if "authkey" in conf:
+                    conf["authkey"] = "REMOVED"
+
+        if "token" in clusterd_data:
+            clusterd_data["token"] = "REMOVED"
         output_str = json.dumps(clusterd_data)
-    except Exception:
-        output_str = ''
+    except Exception as e:  # pylint: disable=broad-exception-caught
+        return log_internal_error(e)
 
     return output_str