diff --git a/src/matchcode_toolkit/fingerprinting.py b/src/matchcode_toolkit/fingerprinting.py
index 90b7ad3..aeef5d6 100644
--- a/src/matchcode_toolkit/fingerprinting.py
+++ b/src/matchcode_toolkit/fingerprinting.py
@@ -12,12 +12,11 @@
 
 from matchcode_toolkit.halohash import BitAverageHaloHash
 
-
 # A collection of directory fingerprints that we want to avoid
 IGNORED_DIRECTORY_FINGERPRINTS = [
     # This is both the directory content and directory structure fingerprint for
     # an empty directory.
-    '0000000000000000000000000000000000000000',
+    "0000000000000000000000000000000000000000",
 ]
 
 
@@ -25,11 +24,11 @@ def _create_directory_fingerprint(inputs):
     """
     Return a 128-bit BitAverageHaloHash fingerprint in hex from `inputs`
     """
-    inputs = [i.encode('utf-8') for i in inputs if i]
+    inputs = [i.encode("utf-8") for i in inputs if i]
     bah128 = BitAverageHaloHash(inputs, size_in_bits=128).hexdigest()
     inputs_count = len(inputs)
-    inputs_count_hex_str = '%08x' % inputs_count
-    bah128 = bah128.decode('utf-8')
+    inputs_count_hex_str = "%08x" % inputs_count
+    bah128 = bah128.decode("utf-8")
     directory_fingerprint = inputs_count_hex_str + bah128
     return directory_fingerprint
 
@@ -55,7 +54,7 @@ def _get_resource_subpath(resource, top):
     The subpath returned would be 'baz.c'
     """
     _, _, subpath = resource.path.partition(top.path)
-    subpath = subpath.lstrip('/')
+    subpath = subpath.lstrip("/")
     return subpath
 
 
@@ -88,16 +87,16 @@ def _compute_directory_fingerprints(directory, codebase):
         return
 
     directory_content_fingerprint = create_content_fingerprint(children)
-    if hasattr(directory, 'directory_content_fingerprint'):
+    if hasattr(directory, "directory_content_fingerprint"):
         directory.directory_content_fingerprint = directory_content_fingerprint
     else:
-        directory.extra_data['directory_content'] = directory_content_fingerprint
+        directory.extra_data["directory_content"] = directory_content_fingerprint
 
     directory_structure_fingerprint = create_structure_fingerprint(directory, children)
-    if hasattr(directory, 'directory_structure_fingerprint'):
+    if hasattr(directory, "directory_structure_fingerprint"):
         directory.directory_structure_fingerprint = directory_structure_fingerprint
     else:
-        directory.extra_data['directory_structure'] = directory_structure_fingerprint
+        directory.extra_data["directory_structure"] = directory_structure_fingerprint
 
     directory.save(codebase)
     return directory
@@ -162,7 +161,7 @@ def create_halohash_chunks(bah128):
 
 
 # Split on whitespace and punctuations: keep only characters and numbers
-query_pattern = '[^_\\W]+'
+query_pattern = "[^_\\W]+"
 word_splitter = re.compile(query_pattern, re.UNICODE).findall
 
 
@@ -237,28 +236,30 @@ def create_file_fingerprints(content, ngram_length=8, window_length=64):
         "hailstorm": [],
     }
 
-    # Create fingerprint
-    words = tokenizer(content)
+    # tokenize content intow words
+    words = list(tokenizer(content))
+
+    # Create a file fingerprint from the number of elements in the content hash
+    # and the content hash digest iteself.
     ngs = ngrams(words, ngram_length)
-    ngs_bytes = [[g.encode('utf-8') for g in ng] for ng in ngs]
-    ngs_bytes = [b''.join(ng) for ng in ngs_bytes]
+    ngs_bytes = [[g.encode("utf-8") for g in ng] for ng in ngs]
+    ngs_bytes = [b"".join(ng) for ng in ngs_bytes]
     content_hash, ngs_count = BitAverageHaloHash(ngs_bytes), len(ngs_bytes)
     if content_hash:
-        content_fingerprint = content_hash.hexdigest().decode('utf-8')
-        ngs_count_hex_str = '%08x' % ngs_count
+        content_fingerprint = content_hash.hexdigest().decode("utf-8")
+        ngs_count_hex_str = "%08x" % ngs_count
         file_fingerprint = ngs_count_hex_str + content_fingerprint
-        fingerprints['halo1'] = file_fingerprint
+        fingerprints["halo1"] = file_fingerprint
 
-    words = tokenizer(content)
+    # Select windows from the content to find snippet similarities
     windows = ngrams(words, window_length)
     selected_windows = select_ngrams(windows)
-    selected_windows_bytes = [[g.encode('utf-8') for g in window] for window in selected_windows]
-    selected_windows_bytes = [b''.join(window) for window in selected_windows_bytes]
+    selected_windows_bytes = [[g.encode("utf-8") for g in window] for window in selected_windows]
+    selected_windows_bytes = [b"".join(window) for window in selected_windows_bytes]
     hailstorm_hashes = [
-        BitAverageHaloHash(window).hexdigest().decode('utf-8')
-        for window in selected_windows_bytes
+        BitAverageHaloHash(window).hexdigest().decode("utf-8") for window in selected_windows_bytes
     ]
     if hailstorm_hashes:
-        fingerprints['hailstorm'] = hailstorm_hashes
+        fingerprints["hailstorm"] = hailstorm_hashes
 
     return fingerprints
diff --git a/src/matchcode_toolkit/halohash.py b/src/matchcode_toolkit/halohash.py
index 538db55..36aa00f 100644
--- a/src/matchcode_toolkit/halohash.py
+++ b/src/matchcode_toolkit/halohash.py
@@ -176,8 +176,10 @@ def __init__(self, msg=None, size_in_bits=128):
             # TODO: pick one hash algorithm
             self.hashmodule = commoncode_hash.get_hasher(size_in_bits)
         except:
-            raise Exception('No available hash module for the requested '
-                            'hash size in bits: %(size_in_bits)d' % locals())
+            raise Exception(
+                "No available hash module for the requested "
+                "hash size in bits: %(size_in_bits)d" % locals()
+            )
         self.update(msg)
 
     @property
@@ -190,7 +192,13 @@ def update(self, msg):
         """
         if not msg:
             return
-        if isinstance(msg, (list, tuple,)):
+        if isinstance(
+            msg,
+            (
+                list,
+                tuple,
+            ),
+        ):
             for m in msg:
                 self.__hashup(m)
         else:
@@ -242,7 +250,9 @@ def combine(cls, hashes):
         """
         size_in_bits = hashes[0].size_in_bits
         for h in hashes:
-            assert isinstance(hash, cls), 'all hashes should be a BitAverageHaloHash, not {}'.format(type(h))
+            assert isinstance(
+                hash, cls
+            ), "all hashes should be a BitAverageHaloHash, not {}".format(type(h))
             assert h.size_in_bits == size_in_bits
 
         all_columns = [h.columns for h in hashes]
@@ -313,7 +323,9 @@ def slices(s, size):
     ...    pass
     """
     length = len(s)
-    assert length % size == 0, 'Invalid slice size: len(%(s)r) is not a multiple of %(size)r' % locals()
+    assert length % size == 0, (
+        "Invalid slice size: len(%(s)r) is not a multiple of %(size)r" % locals()
+    )
     # TODO: time alternative
     # return [s[index:index + size] for index in range(0, length, size)]
     chunks = [iter(s)] * size
diff --git a/src/matchcode_toolkit/plugin_fingerprint.py b/src/matchcode_toolkit/plugin_fingerprint.py
index 6046fd6..ee972ef 100644
--- a/src/matchcode_toolkit/plugin_fingerprint.py
+++ b/src/matchcode_toolkit/plugin_fingerprint.py
@@ -9,13 +9,14 @@
 
 import attr
 
-from commoncode.cliutils import PluggableCommandLineOption
 from commoncode.cliutils import SCAN_GROUP
-from matchcode_toolkit.fingerprinting import compute_codebase_directory_fingerprints
-from matchcode_toolkit.fingerprinting import get_file_fingerprint_hashes
+from commoncode.cliutils import PluggableCommandLineOption
 from plugincode.scan import ScanPlugin
 from plugincode.scan import scan_impl
 
+from matchcode_toolkit.fingerprinting import compute_codebase_directory_fingerprints
+from matchcode_toolkit.fingerprinting import get_file_fingerprint_hashes
+
 
 @scan_impl
 class FingerprintScanner(ScanPlugin):
@@ -28,12 +29,10 @@ class FingerprintScanner(ScanPlugin):
     sort_order = 6
     options = [
         PluggableCommandLineOption(
-            (
-                '--fingerprint',
-            ),
+            ("--fingerprint",),
             is_flag=True,
             default=False,
-            help='Compute directory and resource fingerprints that are used for matching',
+            help="Compute directory and resource fingerprints that are used for matching",
             help_group=SCAN_GROUP,
             sort_order=20,
         )
diff --git a/tests/test_fingerprinting.py b/tests/test_fingerprinting.py
index 19bb9c6..dd4bc6e 100644
--- a/tests/test_fingerprinting.py
+++ b/tests/test_fingerprinting.py
@@ -23,160 +23,150 @@
 from matchcode_toolkit.halohash import byte_hamming_distance
 
 
-class Resource():
-    def __init__(self, path='', size=0, sha1=''):
+class Resource:
+    def __init__(self, path="", size=0, sha1=""):
         self.path = path
         self.size = size
         self.sha1 = sha1
 
 
 class TestFingerprintingFunctions(FileBasedTesting):
-    test_data_dir = os.path.join(os.path.dirname(
-        __file__), 'testfiles/fingerprinting')
+    test_data_dir = os.path.join(os.path.dirname(__file__), "testfiles/fingerprinting")
 
     def test__create_directory_fingerprint(self):
         test_input = [
-            'package',
-            'package/readme.txt',
-            'package/index.js',
-            'package/package.json',
+            "package",
+            "package/readme.txt",
+            "package/index.js",
+            "package/package.json",
         ]
         directory_fingerprint = _create_directory_fingerprint(test_input)
-        expected_directory_fingerprint = '0000000410d24471969646cb5402032288493126'
+        expected_directory_fingerprint = "0000000410d24471969646cb5402032288493126"
         self.assertEqual(expected_directory_fingerprint, directory_fingerprint)
         indexed_elements_count, _ = split_fingerprint(directory_fingerprint)
         self.assertEqual(len(test_input), indexed_elements_count)
 
     def test_split_fingerprint(self):
-        directory_fingerprint = '0000000410d24471969646cb5402032288493126'
-        indexed_elements_count, bah128 = split_fingerprint(
-            directory_fingerprint)
+        directory_fingerprint = "0000000410d24471969646cb5402032288493126"
+        indexed_elements_count, bah128 = split_fingerprint(directory_fingerprint)
 
         expected_indexed_elements_count = 4
-        self.assertEqual(expected_indexed_elements_count,
-                         indexed_elements_count)
+        self.assertEqual(expected_indexed_elements_count, indexed_elements_count)
 
-        expected_bah128 = '10d24471969646cb5402032288493126'
+        expected_bah128 = "10d24471969646cb5402032288493126"
         self.assertEqual(expected_bah128, bah128)
 
     def test_create_content_fingerprint(self):
         test_resources = [
-            Resource(sha1='d4e4abbe8e2a8169d6a52907152c2c80ec884745'),
-            Resource(sha1='0c94f137f6e0536db8cb2622a9dc84253b91b90c'),
-            Resource(sha1='10cab45fe6f353b47b587a576c1077a96ce348f5'),
-            Resource(sha1='134f2b052b6e5f56b631be2eded70f89d44cf381'),
+            Resource(sha1="d4e4abbe8e2a8169d6a52907152c2c80ec884745"),
+            Resource(sha1="0c94f137f6e0536db8cb2622a9dc84253b91b90c"),
+            Resource(sha1="10cab45fe6f353b47b587a576c1077a96ce348f5"),
+            Resource(sha1="134f2b052b6e5f56b631be2eded70f89d44cf381"),
         ]
         fingerprint = create_content_fingerprint(test_resources)
-        expected_fingerprint = '00000004005b88c2800f0044044781ae05680419'
+        expected_fingerprint = "00000004005b88c2800f0044044781ae05680419"
         self.assertEqual(expected_fingerprint, fingerprint)
 
     def test__get_resource_subpath(self):
-        test_resource = Resource(path='foo/bar/baz/qux.c')
-        test_top_resource = Resource(path='foo/bar/')
+        test_resource = Resource(path="foo/bar/baz/qux.c")
+        test_top_resource = Resource(path="foo/bar/")
         subpath = _get_resource_subpath(test_resource, test_top_resource)
-        expected_subpath = 'baz/qux.c'
+        expected_subpath = "baz/qux.c"
         self.assertEqual(expected_subpath, subpath)
 
     def test_create_structure_fingerprint(self):
-        test_top_resource = Resource(path='package')
+        test_top_resource = Resource(path="package")
         test_child_resources = [
-            Resource(path='package/readme.txt', size=771),
-            Resource(path='package/index.js', size=608),
-            Resource(path='package/package.json', size=677),
+            Resource(path="package/readme.txt", size=771),
+            Resource(path="package/index.js", size=608),
+            Resource(path="package/package.json", size=677),
         ]
-        fingerprint = create_structure_fingerprint(
-            test_top_resource, test_child_resources)
-        expected_fingerprint = '00000003ce72f4308a1bc1afb0fb47ed590b5c53'
+        fingerprint = create_structure_fingerprint(test_top_resource, test_child_resources)
+        expected_fingerprint = "00000003ce72f4308a1bc1afb0fb47ed590b5c53"
         self.assertEqual(expected_fingerprint, fingerprint)
 
     def test_create_halohash_chunks(self):
-        test_bah128 = 'ce72f4308a1bc1afb0fb47ed590b5c53'
+        test_bah128 = "ce72f4308a1bc1afb0fb47ed590b5c53"
         chunk1, chunk2, chunk3, chunk4 = create_halohash_chunks(test_bah128)
-        expected_chunk1 = bytearray(b'\xcer\xf40')
-        expected_chunk2 = bytearray(b'\x8a\x1b\xc1\xaf')
-        expected_chunk3 = bytearray(b'\xb0\xfbG\xed')
-        expected_chunk4 = bytearray(b'Y\x0b\\S')
+        expected_chunk1 = bytearray(b"\xcer\xf40")
+        expected_chunk2 = bytearray(b"\x8a\x1b\xc1\xaf")
+        expected_chunk3 = bytearray(b"\xb0\xfbG\xed")
+        expected_chunk4 = bytearray(b"Y\x0b\\S")
         self.assertEqual(chunk1, expected_chunk1)
         self.assertEqual(chunk2, expected_chunk2)
         self.assertEqual(chunk3, expected_chunk3)
         self.assertEqual(chunk4, expected_chunk4)
 
     def test_compute_codebase_directory_fingerprints(self):
-        scan_loc = self.get_test_loc('abbrev-1.0.3-i.json')
+        scan_loc = self.get_test_loc("abbrev-1.0.3-i.json")
         vc = VirtualCodebase(location=scan_loc)
         vc = compute_codebase_directory_fingerprints(vc)
-        directory_content = vc.root.extra_data['directory_content']
-        directory_structure = vc.root.extra_data['directory_structure']
-        expected_directory_content = '0000000346ce04751a3c98f00086f16a91d9790b'
-        expected_directory_structure = '000000034f9bf110673bdf06197cd514a799a66c'
+        directory_content = vc.root.extra_data["directory_content"]
+        directory_structure = vc.root.extra_data["directory_structure"]
+        expected_directory_content = "0000000346ce04751a3c98f00086f16a91d9790b"
+        expected_directory_structure = "000000034f9bf110673bdf06197cd514a799a66c"
         self.assertEqual(expected_directory_content, directory_content)
         self.assertEqual(expected_directory_structure, directory_structure)
 
     def test_do_not_compute_fingerprint_for_empty_dirs(self):
-        scan_loc = self.get_test_loc('test.json')
+        scan_loc = self.get_test_loc("test.json")
         vc = VirtualCodebase(location=scan_loc)
         vc = compute_codebase_directory_fingerprints(vc)
-        directory_content = vc.root.extra_data['directory_content']
-        directory_structure = vc.root.extra_data['directory_structure']
-        expected_directory_content = '000000032a5fa8d01922536b53e8fc6e3d43766f'
-        expected_directory_structure = '000000030a399ce2b947a6f611821965a4fcc577'
+        directory_content = vc.root.extra_data["directory_content"]
+        directory_structure = vc.root.extra_data["directory_structure"]
+        expected_directory_content = "000000032a5fa8d01922536b53e8fc6e3d43766f"
+        expected_directory_structure = "000000030a399ce2b947a6f611821965a4fcc577"
         self.assertEqual(expected_directory_content, directory_content)
         self.assertEqual(expected_directory_structure, directory_structure)
         # These directories should not have fingerprints generated or stored in
         # extra_data
-        empty_dir_1 = vc.get_resource('test/test')
-        empty_dir_2 = vc.get_resource('test/test/test2')
+        empty_dir_1 = vc.get_resource("test/test")
+        empty_dir_2 = vc.get_resource("test/test/test2")
         self.assertEqual({}, empty_dir_1.extra_data)
         self.assertEqual({}, empty_dir_1.extra_data)
         self.assertEqual({}, empty_dir_2.extra_data)
         self.assertEqual({}, empty_dir_2.extra_data)
 
     def test_get_file_fingerprint_hashes_one_line_removed(self):
-        test_file1 = self.get_test_loc('inflate.c')
-        test_file2 = self.get_test_loc('inflate-mod.c')
+        test_file1 = self.get_test_loc("inflate.c")
+        test_file2 = self.get_test_loc("inflate-mod.c")
         result1 = get_file_fingerprint_hashes(test_file1)
         result2 = get_file_fingerprint_hashes(test_file2)
-        result1 = result1.get('halo1')
-        result2 = result2.get('halo1')
-        result1_indexed_elements_count, result1_fingerprint = split_fingerprint(
-            result1)
-        result2_indexed_elements_count, result2_fingerprint = split_fingerprint(
-            result2)
+        result1 = result1.get("halo1")
+        result2 = result2.get("halo1")
+        result1_indexed_elements_count, result1_fingerprint = split_fingerprint(result1)
+        result2_indexed_elements_count, result2_fingerprint = split_fingerprint(result2)
 
         expected_result1_indexed_elements_count = 6395
         expected_result2_indexed_elements_count = 6388
         assert result1_indexed_elements_count == expected_result1_indexed_elements_count
         assert result2_indexed_elements_count == expected_result2_indexed_elements_count
 
-        expected_result1_fingerprint = 'a23a49e4cd40718d1297be719e6564a4'
-        expected_result2_fingerprint = 'aa3a49e4cd40718d1297be519e6564a4'
+        expected_result1_fingerprint = "a23a49e4cd40718d1297be719e6564a4"
+        expected_result2_fingerprint = "aa3a49e4cd40718d1297be519e6564a4"
         assert result1_fingerprint == expected_result1_fingerprint
         assert result2_fingerprint == expected_result2_fingerprint
 
-        assert byte_hamming_distance(
-            result1_fingerprint, result2_fingerprint) == 2
+        assert byte_hamming_distance(result1_fingerprint, result2_fingerprint) == 2
 
     def test_get_file_fingerprint_hashes_one_line_added(self):
-        test_file1 = self.get_test_loc('inflate.c')
-        test_file2 = self.get_test_loc('inflate-mod2.c')
+        test_file1 = self.get_test_loc("inflate.c")
+        test_file2 = self.get_test_loc("inflate-mod2.c")
         result1 = get_file_fingerprint_hashes(test_file1)
         result2 = get_file_fingerprint_hashes(test_file2)
-        result1 = result1.get('halo1')
-        result2 = result2.get('halo1')
-        result1_indexed_elements_count, result1_fingerprint = split_fingerprint(
-            result1)
-        result2_indexed_elements_count, result2_fingerprint = split_fingerprint(
-            result2)
+        result1 = result1.get("halo1")
+        result2 = result2.get("halo1")
+        result1_indexed_elements_count, result1_fingerprint = split_fingerprint(result1)
+        result2_indexed_elements_count, result2_fingerprint = split_fingerprint(result2)
 
         expected_result1_indexed_elements_count = 6395
         expected_result2_indexed_elements_count = 6398
         assert result1_indexed_elements_count == expected_result1_indexed_elements_count
         assert result2_indexed_elements_count == expected_result2_indexed_elements_count
 
-        expected_result1_fingerprint = 'a23a49e4cd40718d1297be719e6564a4'
-        expected_result2_fingerprint = 'a23b49e4cd40708d1297be719c6564a4'
+        expected_result1_fingerprint = "a23a49e4cd40718d1297be719e6564a4"
+        expected_result2_fingerprint = "a23b49e4cd40708d1297be719c6564a4"
         assert result1_fingerprint == expected_result1_fingerprint
         assert result2_fingerprint == expected_result2_fingerprint
 
-        assert byte_hamming_distance(
-            result1_fingerprint, result2_fingerprint) == 3
+        assert byte_hamming_distance(result1_fingerprint, result2_fingerprint) == 3
diff --git a/tests/test_halohash.py b/tests/test_halohash.py
index cb45ae2..5bfe7a5 100644
--- a/tests/test_halohash.py
+++ b/tests/test_halohash.py
@@ -7,20 +7,18 @@
 # See https://aboutcode.org for more information about nexB OSS projects.
 #
 
-from collections import defaultdict
-
+import copy
 import csv
 import math
-import copy
 import os
 import random
 import subprocess
+from collections import defaultdict
 
 from commoncode.testcase import FileBasedTesting
 
 from matchcode_toolkit import halohash
 
-
 SIZE_IN_BITS = 128
 
 
@@ -30,10 +28,7 @@ def load_csv(location):
     mappings field->value).
     """
     with open(location) as csvin:
-        reader = csv.DictReader(
-            csvin,
-            quoting=csv.QUOTE_NONNUMERIC
-        )
+        reader = csv.DictReader(csvin, quoting=csv.QUOTE_NONNUMERIC)
         fields = reader.fieldnames
         values = sorted(reader, key=lambda d: d.items())
         return fields, values
@@ -49,12 +44,8 @@ def check_results(
     Load and compare the CSV at `expected_file` against `results`.
     """
     if regen:
-        with open(expected_file, 'w') as f:
-            writer = csv.DictWriter(
-                f,
-                fieldnames=fieldnames,
-                quoting=csv.QUOTE_NONNUMERIC
-            )
+        with open(expected_file, "w") as f:
+            writer = csv.DictWriter(f, fieldnames=fieldnames, quoting=csv.QUOTE_NONNUMERIC)
             writer.writeheader()
             writer.writerows(results)
 
@@ -64,11 +55,11 @@ def check_results(
     # check results line by line for more compact results
     for exp, res in zip(expected, results):
         assert exp[column1_name] == res[column1_name]
-        expected_mean_hamming_distance = exp['mean hamming distance']
-        expected_standard_deviation = exp['standard deviation']
+        expected_mean_hamming_distance = exp["mean hamming distance"]
+        expected_standard_deviation = exp["standard deviation"]
         exp_min = expected_mean_hamming_distance - expected_standard_deviation
         exp_max = expected_mean_hamming_distance + expected_standard_deviation
-        assert exp_min <= res['mean hamming distance'] <= exp_max
+        assert exp_min <= res["mean hamming distance"] <= exp_max
 
 
 def calculate_hamming_distance(content_hash, modified_content):
@@ -85,13 +76,11 @@ def calculate_mean_and_standard_deviation(hamming_distances):
     number_of_hamming_distances = len(hamming_distances)
 
     # 1: Find the mean.
-    mean_hamming_distance = sum(hamming_distances) / \
-        number_of_hamming_distances
+    mean_hamming_distance = sum(hamming_distances) / number_of_hamming_distances
 
     # 2: For each data point, find the square of its distance to the mean, then sum the values.
     s0 = sum(
-        (hamming_distance - mean_hamming_distance) ** 2
-        for hamming_distance in hamming_distances
+        (hamming_distance - mean_hamming_distance) ** 2 for hamming_distance in hamming_distances
     )
 
     # 3: Divide by the number of data points.
@@ -104,16 +93,17 @@ def calculate_mean_and_standard_deviation(hamming_distances):
 
 
 class TestHalohash(FileBasedTesting):
-    test_data_dir = os.path.join(
-        os.path.dirname(__file__), 'testfiles/halohash')
+    test_data_dir = os.path.join(os.path.dirname(__file__), "testfiles/halohash")
 
     def setUp(self):
-        words_loc = self.get_test_loc('words.txt')
+        words_loc = self.get_test_loc("words.txt")
         with open(words_loc) as f:
-            self.original_content = [bytes(x.strip(), 'utf-8') for x in f]
+            self.original_content = [bytes(x.strip(), "utf-8") for x in f]
 
     def test_halohash_random_delete(self, regen=False):
-        for number_of_words in [500,]:
+        for number_of_words in [
+            500,
+        ]:
             content = copy.copy(self.original_content[:number_of_words])
             original_hash = halohash.BitAverageHaloHash(content)
 
@@ -125,36 +115,44 @@ def test_halohash_random_delete(self, regen=False):
                 # we are moving towards unrelated files past that
                 n = int(math.floor(len(modified_content) * 0.10))
                 for _ in range(n):
-                    hamming_distance = calculate_hamming_distance(
-                        original_hash,
-                        modified_content
-                    )
+                    hamming_distance = calculate_hamming_distance(original_hash, modified_content)
                     number_of_elements = len(modified_content)
                     hamming_distance_by_number_of_elements[number_of_elements].append(
-                        hamming_distance)
-                    modified_content.pop(random.randint(
-                        0, len(modified_content) - 1))
+                        hamming_distance
+                    )
+                    modified_content.pop(random.randint(0, len(modified_content) - 1))
 
             # Take mean and standard deviation
             results = []
-            for number_of_elements, hamming_distances in hamming_distance_by_number_of_elements.items():
+            for (
+                number_of_elements,
+                hamming_distances,
+            ) in hamming_distance_by_number_of_elements.items():
                 mean_hamming_distance, standard_deviation = calculate_mean_and_standard_deviation(
-                    hamming_distances)
+                    hamming_distances
+                )
                 results.append(
                     {
-                        'number of hashed elements': int(number_of_elements),
-                        'mean hamming distance': round(mean_hamming_distance, 1),
-                        'standard deviation': round(standard_deviation, 1)
+                        "number of hashed elements": int(number_of_elements),
+                        "mean hamming distance": round(mean_hamming_distance, 1),
+                        "standard deviation": round(standard_deviation, 1),
                     }
                 )
 
             expected_results_loc = self.get_test_loc(
-                f'{number_of_words}-delete-expected-results.csv')
-            check_results(results, expected_results_loc, [
-                          'number of hashed elements', 'mean hamming distance', 'standard deviation'], regen=regen)
+                f"{number_of_words}-delete-expected-results.csv"
+            )
+            check_results(
+                results,
+                expected_results_loc,
+                ["number of hashed elements", "mean hamming distance", "standard deviation"],
+                regen=regen,
+            )
 
     def test_halohash_random_replace(self, regen=False):
-        for number_of_words in [500,]:
+        for number_of_words in [
+            500,
+        ]:
             content = copy.copy(self.original_content[:number_of_words])
             original_hash = halohash.BitAverageHaloHash(content)
 
@@ -168,43 +166,49 @@ def test_halohash_random_replace(self, regen=False):
                 # we are moving towards unrelated files past that
                 n = int(math.floor(len(modified_content) * 0.10))
                 for _ in range(n):
-                    hamming_distance = calculate_hamming_distance(
-                        original_hash,
-                        modified_content
-                    )
+                    hamming_distance = calculate_hamming_distance(original_hash, modified_content)
                     hamming_distance_by_number_of_words_replaced[words_replaced].append(
-                        hamming_distance)
+                        hamming_distance
+                    )
 
-                    modified_content.pop(random.randint(
-                        0, len(modified_content) - 1))
+                    modified_content.pop(random.randint(0, len(modified_content) - 1))
                     new_word = (
                         subprocess.run(
-                            ['shuf', '-n', '1', '/usr/share/dict/american-english'],
-                            stdout=subprocess.PIPE
+                            ["shuf", "-n", "1", "/usr/share/dict/american-english"],
+                            stdout=subprocess.PIPE,
                         )
-                        .stdout
-                        .decode('utf-8')
+                        .stdout.decode("utf-8")
                         .strip()
-                        .replace('"', '')
+                        .replace('"', "")
+                    )
+                    modified_content[random.randint(0, len(modified_content) - 1)] = bytes(
+                        new_word, "utf-8"
                     )
-                    modified_content[random.randint(
-                        0, len(modified_content) - 1)] = bytes(new_word, 'utf-8')
                     words_replaced += 1
 
             # Take mean and standard deviation
             results = []
-            for words_replaced, hamming_distances in hamming_distance_by_number_of_words_replaced.items():
+            for (
+                words_replaced,
+                hamming_distances,
+            ) in hamming_distance_by_number_of_words_replaced.items():
                 mean_hamming_distance, standard_deviation = calculate_mean_and_standard_deviation(
-                    hamming_distances)
+                    hamming_distances
+                )
                 results.append(
                     {
-                        'words replaced': int(words_replaced),
-                        'mean hamming distance': round(mean_hamming_distance, 1),
-                        'standard deviation': round(standard_deviation, 1)
+                        "words replaced": int(words_replaced),
+                        "mean hamming distance": round(mean_hamming_distance, 1),
+                        "standard deviation": round(standard_deviation, 1),
                     }
                 )
 
             expected_results_loc = self.get_test_loc(
-                f'{number_of_words}-replaced-expected-results.csv')
-            check_results(results, expected_results_loc, [
-                          'words replaced', 'mean hamming distance', 'standard deviation'], regen=regen)
+                f"{number_of_words}-replaced-expected-results.csv"
+            )
+            check_results(
+                results,
+                expected_results_loc,
+                ["words replaced", "mean hamming distance", "standard deviation"],
+                regen=regen,
+            )
diff --git a/tests/test_plugin_fingerprinting.py b/tests/test_plugin_fingerprinting.py
index 1f9b35a..65c4510 100644
--- a/tests/test_plugin_fingerprinting.py
+++ b/tests/test_plugin_fingerprinting.py
@@ -16,36 +16,36 @@
 
 from matchcode_toolkit.fingerprinting import get_file_fingerprint_hashes
 
-
 """
 These tests spawn new process as if launched from the command line.
 """
 
 
 class TestPluginFingerprinting(FileBasedTesting):
-    test_data_dir = os.path.join(os.path.dirname(__file__), 'testfiles')
+    test_data_dir = os.path.join(os.path.dirname(__file__), "testfiles")
 
     def test_plugin_fingerprinting_api_works(self):
-        test_loc = self.get_test_loc('fingerprinting/inflate.c')
+        test_loc = self.get_test_loc("fingerprinting/inflate.c")
         detections = list(get_file_fingerprint_hashes(location=test_loc))
         assert detections
 
     def test_fingerprinting_plugin_works(self):
-        test_dir = self.get_test_loc('fingerprinting', copy=True)
-        result_file = self.get_temp_file('json')
+        test_dir = self.get_test_loc("fingerprinting", copy=True)
+        result_file = self.get_temp_file("json")
         args = [
-            '--info',
-            '--fingerprint',
-            '--verbose',
-            '--json', result_file,
+            "--info",
+            "--fingerprint",
+            "--verbose",
+            "--json",
+            result_file,
             test_dir,
         ]
         run_scan_click(args)
-        test_loc = self.get_test_loc('fingerprinting-expected.json')
+        test_loc = self.get_test_loc("fingerprinting-expected.json")
         check_json_scan(
             test_loc,
             result_file,
             remove_file_date=True,
             check_headers=False,
-            regen=REGEN_TEST_FIXTURES
+            regen=REGEN_TEST_FIXTURES,
         )