From ff3387a3729ef67dbfee8fd47b37039414a35859 Mon Sep 17 00:00:00 2001 From: Jingchao Zhong Date: Thu, 20 Apr 2023 08:59:27 -0700 Subject: [PATCH 1/4] Add customized plug-ins to detect-secrets --- .../detect-secrets/plug-ins/.DS_Store | Bin 0 -> 6148 bytes .../detect-secrets/plug-ins/README.md | 40 +++ .../plug-ins/absolute_filepaths.py | 17 ++ .../plug-ins/aws_sensitive_info.py | 128 ++++++++ .../detect-secrets/plug-ins/email_address.py | 11 + .../detect-secrets/plug-ins/ip_address.py | 13 + .../detect-secrets/plug-ins/tests/__init__.py | 0 .../plug-ins/tests/test_absolute_filepaths.py | 103 +++++++ .../plug-ins/tests/test_aws_sensitive_info.py | 284 ++++++++++++++++++ .../plug-ins/tests/test_email_address.py | 45 +++ .../plug-ins/tests/test_ip_address.py | 71 +++++ 11 files changed, 712 insertions(+) create mode 100644 continuous-integration/starter-kits/detect-secrets/plug-ins/.DS_Store create mode 100644 continuous-integration/starter-kits/detect-secrets/plug-ins/README.md create mode 100644 continuous-integration/starter-kits/detect-secrets/plug-ins/absolute_filepaths.py create mode 100644 continuous-integration/starter-kits/detect-secrets/plug-ins/aws_sensitive_info.py create mode 100644 continuous-integration/starter-kits/detect-secrets/plug-ins/email_address.py create mode 100644 continuous-integration/starter-kits/detect-secrets/plug-ins/ip_address.py create mode 100644 continuous-integration/starter-kits/detect-secrets/plug-ins/tests/__init__.py create mode 100644 continuous-integration/starter-kits/detect-secrets/plug-ins/tests/test_absolute_filepaths.py create mode 100644 continuous-integration/starter-kits/detect-secrets/plug-ins/tests/test_aws_sensitive_info.py create mode 100644 continuous-integration/starter-kits/detect-secrets/plug-ins/tests/test_email_address.py create mode 100644 continuous-integration/starter-kits/detect-secrets/plug-ins/tests/test_ip_address.py diff --git a/continuous-integration/starter-kits/detect-secrets/plug-ins/.DS_Store b/continuous-integration/starter-kits/detect-secrets/plug-ins/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..37b24df502c53a45d00ac080459ec07562370413 GIT binary patch literal 6148 zcmeHK&2G~`5S~o~bpnV;RL~2^7j9LRpQ@qoKRkVt?im#kLA3Ld=j*^BuJQb~DH|={`f6jUMS9iS^L3i9P?L6gC>IPAFD1!sP z3zwHCLFDssgO4IVk>fbZU|9vL&@Syxr-!wJJ^SF%(QMD29#zZGYlpL0!MgXb^0;+2 zm|RTX&E9`RA`pg+S;}pV3-|;@CKvW9PKOWTg<;V@;LWB^+B*G_uRZwnRlIB&GvNNE zwz!`?-nlc3LLT)n#$ruNQyTT^8PKo65_^CV!+>GHFtE&k*dJ^Zfpv{@g>vgaC9eR$ zJi4Vo7rzA}#@1NZI9CV@gsD)V3T66=!BjZGnzhZ#pn@+QVl=RuUk(@YdZIm+= 8 characters. + + Reference: https://docs.aws.amazon.com/vpc/latest/userguide/create-vpc.html + """ + self.denylist.append(re.compile(r'vpc-\w{8,}')) + + def _add_deny_subnet_id(self): + """Add AWS subnet id pattern to denylist. + AWS subnet id is a string starts with `subnet-` and followed by 8 or 17 characters. + For example, subnet-12345678 + + Pattern Strategy: + 1. Only find the string that starts with `subnet-` and followed by >= 8 characters. + + Reference: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-ec2-subnet.html + """ + self.denylist.append(re.compile(r'subnet-\w{8,}')) + + def _add_deny_bucket_name(self): + """Add AWS bucket name pattern to denylist. + Note: this function is not implemented yet due to the complexity of the pattern. + Check the reference for more details. + + Reference: https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html + """ + pass + + def _add_deny_aws_hostname(self): + """Add AWS hostname pattern to denylist. + + Example IP name: + e.g. 1: ip-10-24-34-0.ec2.internal + e.g. 2: ip-10-24-34-0.us-west-2.compute.internal + + Example Resource name: + e.g. 1: i-0123456789abcdef.ec2.internal + e.g. 2: i-0123456789abcdef.us-west-2.compute.internal + + Pattern Strategy: + 1. For IP name, check the content of this format: `ip-ip_digit-ip_digit-ip_digit-ip_digit` + 2. For Resource name, check the content of this format: `i-16character` + + The denylist is a list of regular expressions that will be used to match and deny certain patterns. + + Reference: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-naming.html + """ + self.denylist.append(re.compile(r'ip-\d{1,3}-\d{1,3}-\d{1,3}-\d{1,3}')) + self.denylist.append(re.compile(r'i-\w{16}')) + \ No newline at end of file diff --git a/continuous-integration/starter-kits/detect-secrets/plug-ins/email_address.py b/continuous-integration/starter-kits/detect-secrets/plug-ins/email_address.py new file mode 100644 index 000000000..81da105b0 --- /dev/null +++ b/continuous-integration/starter-kits/detect-secrets/plug-ins/email_address.py @@ -0,0 +1,11 @@ +import re +from detect_secrets.plugins.base import RegexBasedDetector + + +class EmailAddressDetector(RegexBasedDetector): + """Scans for email addresses.""" + secret_type = 'Email Address' + + denylist = [ + re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'), + ] diff --git a/continuous-integration/starter-kits/detect-secrets/plug-ins/ip_address.py b/continuous-integration/starter-kits/detect-secrets/plug-ins/ip_address.py new file mode 100644 index 000000000..c67a8e0b2 --- /dev/null +++ b/continuous-integration/starter-kits/detect-secrets/plug-ins/ip_address.py @@ -0,0 +1,13 @@ +import re +from detect_secrets.plugins.base import RegexBasedDetector + + +class IPAddressDetector(RegexBasedDetector): + """Scans for IP addresses (ipv4 and ipv6).""" + secret_type = 'IP Address' + + denylist = [ + re.compile(r'\b(?:\d{1,3}\.){3}\d{1,3}\b'), # ipv4 + re.compile(r'\b(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))\b') + # reference: https://ihateregex.io/expr/ipv6/ + ] diff --git a/continuous-integration/starter-kits/detect-secrets/plug-ins/tests/__init__.py b/continuous-integration/starter-kits/detect-secrets/plug-ins/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/continuous-integration/starter-kits/detect-secrets/plug-ins/tests/test_absolute_filepaths.py b/continuous-integration/starter-kits/detect-secrets/plug-ins/tests/test_absolute_filepaths.py new file mode 100644 index 000000000..34c36d911 --- /dev/null +++ b/continuous-integration/starter-kits/detect-secrets/plug-ins/tests/test_absolute_filepaths.py @@ -0,0 +1,103 @@ +import os +import sys +import unittest + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))) + +from detect_secrets.plugins.absolute_filepath import AbsolutePathDetector + + +class TestAbsolutePathDetector(unittest.TestCase): + """ + Testing strategy + + Cover the cartesian product of these partitions: + + 1. Partition on system type: + a. Unix-like + b. Windows + + 2. Partition on path type: + a. Absolute + b. Relative + + 3. Partition on special characters in path: + a. Presence of special characters (e.g., '.', '..', '#') + b. Absence of special characters + """ + + def setUp(self): + self.detector = AbsolutePathDetector() + + def test_unix_absolute_no_special(self): + path = '/home/user/myfile.txt' + results = self.detector.analyze_line(filename='mock_filename', line=path) + self.assertEqual(bool(results), True) + + def test_unix_relative_no_special(self): + path = './myfile.txt' + results = self.detector.analyze_line(filename='mock_filename', line=path) + self.assertEqual(bool(results), False) + + def test_unix_relative_with_special(self): + path = 'my..file.txt' + results = self.detector.analyze_line(filename='mock_filename', line=path) + self.assertEqual(bool(results), False) + + def test_unix_absolute_with_dotdot(self): + path = '/../etc/passwd' + results = self.detector.analyze_line(filename='mock_filename', line=path) + self.assertEqual(bool(results), True) + + def test_unix_absolute_with_hash(self): + path = '/home/user/dir/myfile.txt' + results = self.detector.analyze_line(filename='mock_filename', line=path) + self.assertEqual(bool(results), True) + + def test_windows_absolute_no_special(self): + path = 'C:\\Users\\user\\myfile.txt' + results = self.detector.analyze_line(filename='mock_filename', line=path) + self.assertEqual(bool(results), True) + + def test_windows_relative_no_special(self): + path = '.\\myfile.txt' + results = self.detector.analyze_line(filename='mock_filename', line=path) + self.assertEqual(bool(results), False) + + def test_windows_relative_with_special(self): + path = 'myfile.txt' + results = self.detector.analyze_line(filename='mock_filename', line=path) + self.assertEqual(bool(results), False) + + def test_windows_absolute_with_special(self): + path = 'C:\\Users\\user\\dir.#2\\myfile.txt' + results = self.detector.analyze_line(filename='mock_filename', line=path) + self.assertEqual(bool(results), True) + + # More tests + def test_common_used_absolute_path(self): + """Common used absolute path shoud not be detected as secret + """ + path = '/usr/bin/python' + results = self.detector.analyze_line(filename='mock_filename', line=path) + self.assertEqual(bool(results), False) + + # **/.terraform/* should not be detected as secret because it is not a home directory essentially + def test_path_in_pattern(self): + path = 'regex = **/.terraform/*' + results = self.detector.analyze_line(filename='mock_filename', line=path) + self.assertEqual(bool(results), False) + + def test_other_path(self): + path = '157:# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control' + results = self.detector.analyze_line(filename='mock_filename', line=path) + self.assertEqual(bool(results), False) + + def test_other_path2(self): + path = '187:/site' + results = self.detector.analyze_line(filename='mock_filename', line=path) + self.assertEqual(bool(results), False) + + +if __name__ == '__main__': + unittest.main() diff --git a/continuous-integration/starter-kits/detect-secrets/plug-ins/tests/test_aws_sensitive_info.py b/continuous-integration/starter-kits/detect-secrets/plug-ins/tests/test_aws_sensitive_info.py new file mode 100644 index 000000000..6c4b5e106 --- /dev/null +++ b/continuous-integration/starter-kits/detect-secrets/plug-ins/tests/test_aws_sensitive_info.py @@ -0,0 +1,284 @@ +import os +import sys +import unittest + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))) + +from detect_secrets.plugins.aws_sensitive_info import AWSSensitiveInfoDetector + + +class TestAWSSensitiveInfoDetector(unittest.TestCase): + """ + Testing strategy + + 1. Partition on AWS resource type: + 1. AWS account id + 2. AWS ARN + 3. AWS security group id + 4. AWS VPC id + 5. AWS subnet id + 6. AWS bucket name (not implemented) + 7. AWS hostname + + 2. Partition on presence or absence of keyword (if applicable): + a. With keyword + b. Without keyword (Wrong keyword) + + 3. Parition by changing order of keyword (if applicable) + + 4. Parition by changing length of id number (if applicable) + """ + + def setUp(self): + self.detector = AWSSensitiveInfoDetector() + + + # test account id + + def test_aws_account_id_digit_length_12(self): + results = self.detector.analyze_line(filename='mock_filename', line='123456789012') + self.assertEqual(bool(results), True) + + def test_aws_account_id_digit_length_11(self): + results = self.detector.analyze_line(filename='mock_filename', line='12345678901') + self.assertEqual(bool(results), False) + + def test_aws_account_id_with_keyword(self): + results = self.detector.analyze_line(filename='mock_filename', line='aws_account_id: 123456789012') + self.assertEqual(bool(results), True) + + def test_aws_account_id_with_noise(self): + content = """ + 2. Partition on presence or absence of keyword (if applicable): + a. With keyword + b. Without keyword (Wrong keyword) + + 3. Parition by changing order of keyword (if applicable) + + 4. Parition by changing length of id number (if applicable) + + def setUp(self): + self.detector = AWSSensitiveInfoDetector() + + # test account id + + def test_aws_account_id_digit_length_12(self): + results = self.detector.analyze_line(filename='mock_filename', line='123456789012') + self.assertEqual(bool(results), True) + """ + results = self.detector.analyze_line(filename='mock_filename', line=content) + self.assertEqual(bool(results), True) + + + # test ARN + + def test_aws_arn_valid_end_by_account_id(self): + results = self.detector.analyze_line(filename='mock_filename', line='arn:aws:iam:us:123456789012') + self.assertEqual(bool(results), True) + + def test_aws_arn_valid_not_end_by_account_id(self): + results = self.detector.analyze_line(filename='mock_filename', line='arn:aws:iam:cn:123456789012:role') + self.assertEqual(bool(results), True) + + def test_aws_arn_valid_iam_user(self): + results = self.detector.analyze_line(filename='mock_filename', line='arn:aws:iam::123456789012:user/johndoe') + self.assertEqual(bool(results), True) + + def test_aws_arn_valid_sns_topic(self): + results = self.detector.analyze_line(filename='mock_filename', line='arn:aws:sns:us-east-1:123456789012:example-sns-topic-name') + self.assertEqual(bool(results), True) + + def test_aws_arn_valid_vpc(self): + results = self.detector.analyze_line(filename='mock_filename', line='arn:aws:ec2:us-east-1:123456789012:vpc/vpc-0e9801d129EXAMPLE') + self.assertEqual(bool(results), True) + + def test_aws_arn_invalid_account_id_length(self): + results = self.detector.analyze_line(filename='mock_filename', line='arn:aws:iam::12345678901') + self.assertEqual(bool(results), False) + + def test_aws_arn_invalid_account_id_wrong_format(self): + results = self.detector.analyze_line(filename='mock_filename', line='arn:aws') + self.assertEqual(bool(results), False) + + def test_aws_arn_invalid_single_aws(self): + results = self.detector.analyze_line(filename='mock_filename', line='aws') + self.assertEqual(bool(results), False) + + + # test security group id + + def test_aws_sg_id_valid_length_8_digits(self): + results = self.detector.analyze_line(filename='mock_filename', line='sg-12345678') + self.assertEqual(bool(results), True) + + def test_aws_sg_id_valid_length_8_chars(self): + results = self.detector.analyze_line(filename='mock_filename', line='sg-1234abcd') + self.assertEqual(bool(results), True) + + def test_aws_sg_id_valid_length_17_digits(self): + results = self.detector.analyze_line(filename='mock_filename', line='sg-12345678901234567') + self.assertEqual(bool(results), True) + + def test_aws_sg_id_valid_length_17_chars(self): + results = self.detector.analyze_line(filename='mock_filename', line='sg-1234abcd5678efghi czxcx') + self.assertEqual(bool(results), True) + + def test_aws_sg_id_valid_length_9_chars(self): + """Special case: valid when length of string >= 8 + """ + results = self.detector.analyze_line(filename='mock_filename', line='sg-12345678a') + self.assertEqual(bool(results), True) + + def test_aws_sg_id_invalid_length_7_digits(self): + results = self.detector.analyze_line(filename='mock_filename', line='sg-1234567') + self.assertEqual(bool(results), False) + + def test_aws_sg_id_invalid_length_7_chars(self): + results = self.detector.analyze_line(filename='mock_filename', line='sg-1234abc') + self.assertEqual(bool(results), False) + + def test_aws_sg_id_invalid_wrong_keyword(self): + results = self.detector.analyze_line(filename='mock_filename', line='sc-12345678') + self.assertEqual(bool(results), False) + + ## additional tests + def test_aws_sg_id_valid_other1(self): + results = self.detector.analyze_line(filename='mock_filename', line='opera-dev-cluster-sg-collinss for keypair collinss') + self.assertEqual(bool(results), True) + + def test_aws_sg_id_valid_other2(self): + results = self.detector.analyze_line(filename='mock_filename', line='cluster_security_group_id=sg-037e6de521a3f4854') + self.assertEqual(bool(results), True) + + + # test VPC id + + def test_aws_vpc_id_valid_length_17_chars(self): + results = self.detector.analyze_line(filename='mock_filename', line='vpc-02676637ea26098a7') + self.assertEqual(bool(results), True) + + def test_aws_vpc_id_valid_length_17_digits(self): + results = self.detector.analyze_line(filename='mock_filename', line='vpc-12345678901234567') + self.assertEqual(bool(results), True) + + def test_aws_vpc_id_valid_length_8_chars(self): + results = self.detector.analyze_line(filename='mock_filename', line='vpc-1234abcd') + self.assertEqual(bool(results), True) + + def test_aws_vpc_id_valid_length_8_digits(self): + results = self.detector.analyze_line(filename='mock_filename', line='vpc-12345678') + self.assertEqual(bool(results), True) + + def test_aws_vpc_id_valid_length_9_chars(self): + """Special case: valid when length of string >= 8 + """ + results = self.detector.analyze_line(filename='mock_filename', line='vpc-12345678a') + self.assertEqual(bool(results), True) + + def test_aws_vpc_id_invalid_length_7_digits(self): + results = self.detector.analyze_line(filename='mock_filename', line='vpc-1234567') + self.assertEqual(bool(results), False) + + def test_aws_vpc_id_invalid_length_7_chars(self): + results = self.detector.analyze_line(filename='mock_filename', line='vpc-1234abc') + self.assertEqual(bool(results), False) + + def test_aws_vpc_id_invalid_wrong_keyword(self): + results = self.detector.analyze_line(filename='mock_filename', line='vsc-12345678') + self.assertEqual(bool(results), False) + + + # test subnet id + + def test_aws_subnet_id_valid_length_8_digits(self): + results = self.detector.analyze_line(filename='mock_filename', line='subnet-12345678') + self.assertEqual(bool(results), True) + + def test_aws_subnet_id_valid_length_8_chars(self): + results = self.detector.analyze_line(filename='mock_filename', line='subnet-1234abcd') + self.assertEqual(bool(results), True) + + def test_aws_subnet_id_valid_length_17_digits(self): + results = self.detector.analyze_line(filename='mock_filename', line='subnet-12345678901234567') + self.assertEqual(bool(results), True) + + def test_aws_subnet_id_valid_length_17_chars(self): + results = self.detector.analyze_line(filename='mock_filename', line='subnet-1234abcd5678efghij') + self.assertEqual(bool(results), True) + + def test_aws_subnet_id_valid_length_9_chars(self): + """Special case: valid when length of string >= 8 + """ + results = self.detector.analyze_line(filename='mock_filename', line='subnet-12345678a') + self.assertEqual(bool(results), True) + + def test_aws_subnet_id_invalid_length_7_digits(self): + results = self.detector.analyze_line(filename='mock_filename', line='subnet-1234567') + self.assertEqual(bool(results), False) + + def test_aws_subnet_id_invalid_length_7_chars(self): + results = self.detector.analyze_line(filename='mock_filename', line='subnet-1234abc') + self.assertEqual(bool(results), False) + + def test_aws_subnet_id_invalid_wrong_keyword(self): + results = self.detector.analyze_line(filename='mock_filename', line='subsc-12345678') + self.assertEqual(bool(results), False) + + + # test AWS hostname + + def test_aws_hostname_ip_name_valid1(self): + results = self.detector.analyze_line(filename='mock_filename', line='ip-10-24-34-0.ec2.internal') + self.assertEqual(bool(results), True) + + def test_aws_hostname_ip_name_valid2(self): + results = self.detector.analyze_line(filename='mock_filename', line='ip-10-24-34-0.us-west-2.compute.internal') + self.assertEqual(bool(results), True) + + def test_aws_hostname_ip_name_valid3_only_ip(self): + results = self.detector.analyze_line(filename='mock_filename', line='ip-10-24-34-0') + self.assertEqual(bool(results), True) + + def test_aws_hostname_resource_name_valid1(self): + results = self.detector.analyze_line(filename='mock_filename', line='i-0123456789abcdef.ec2.internal') + self.assertEqual(bool(results), True) + + def test_aws_hostname_resource_name_valid2(self): + results = self.detector.analyze_line(filename='mock_filename', line='i-0123456789abcdef.us-west-2.compute.internal') + self.assertEqual(bool(results), True) + + def test_aws_hostname_resource_name_valid3_only_resource_id(self): + results = self.detector.analyze_line(filename='mock_filename', line='i-0123456789abcdef') + self.assertEqual(bool(results), True) + + def test_aws_hostname_ip_name_invalid_wrong_keyword(self): + results = self.detector.analyze_line(filename='mock_filename', line='io-10-24-34-0.ec2.internal') + self.assertEqual(bool(results), False) + + def test_aws_hostname_ip_name_invalid_wrong_ip_format(self): + results = self.detector.analyze_line(filename='mock_filename', line='ip.10.24.34.0.ec2.internal') + self.assertEqual(bool(results), False) + + def test_aws_hostname_ip_name_valid_wrong_ip_format(self): + """256 is not a valid IP address, but it passes the regex since length 3 is allowed + """ + results = self.detector.analyze_line(filename='mock_filename', line='ip-256-24-34-0.ec2.internal') + self.assertEqual(bool(results), True) + + def test_aws_hostname_ip_name_invalid_wrong_ip_format_length4(self): + results = self.detector.analyze_line(filename='mock_filename', line='ip-1024-24-34-0.ec2.internal') + self.assertEqual(bool(results), False) + + def test_aws_hostname_resource_name_invalid_wrong_keyword(self): + results = self.detector.analyze_line(filename='mock_filename', line='r-0123456789abcdef.ec2.internal') + self.assertEqual(bool(results), False) + + def test_aws_hostname_resource_name_invalid_wrong_length(self): + """Instead of 16 digits, there are 10 digits + """ + results = self.detector.analyze_line(filename='mock_filename', line='i-0123456789a.ec2.internal') + self.assertEqual(bool(results), False) + + +if __name__ == '__main__': + unittest.main() diff --git a/continuous-integration/starter-kits/detect-secrets/plug-ins/tests/test_email_address.py b/continuous-integration/starter-kits/detect-secrets/plug-ins/tests/test_email_address.py new file mode 100644 index 000000000..942b09a7e --- /dev/null +++ b/continuous-integration/starter-kits/detect-secrets/plug-ins/tests/test_email_address.py @@ -0,0 +1,45 @@ +import os +import sys +import unittest + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))) + +from detect_secrets.plugins.email_address import EmailAddressDetector + + +class TestEmailAddressDetector(unittest.TestCase): + """ + Testing strategy + + Cover the cartesian product of these partitions: + + 1. Partition on email address format: + a. Valid email address + b. Invalid email address + """ + + def setUp(self): + self.detector = EmailAddressDetector() + + def test_valid_email(self): + email = 'john.doe@example.com' + results = self.detector.analyze_line(filename='mock_filename', line=email) + self.assertEqual(bool(results), True) + + def test_invalid_email(self): + email = 'john.doe@123123123' + results = self.detector.analyze_line(filename='mock_filename', line=email) + self.assertEqual(bool(results), False) + + def test_valid_email_within_text(self): + line = 'This is a valid email: john.doe@example.com within some text.' + results = self.detector.analyze_line(filename='mock_filename', line=line) + self.assertEqual(bool(results), True) + + def test_invalid_email_within_text(self): + line = 'This is an invalid email: john.doe@123123123 within some text.' + results = self.detector.analyze_line(filename='mock_filename', line=line) + self.assertEqual(bool(results), False) + +if __name__ == '__main__': + unittest.main() diff --git a/continuous-integration/starter-kits/detect-secrets/plug-ins/tests/test_ip_address.py b/continuous-integration/starter-kits/detect-secrets/plug-ins/tests/test_ip_address.py new file mode 100644 index 000000000..4a4d40276 --- /dev/null +++ b/continuous-integration/starter-kits/detect-secrets/plug-ins/tests/test_ip_address.py @@ -0,0 +1,71 @@ +import os +import sys +import unittest + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))) + +from detect_secrets.plugins.ip_address import IPAddressDetector + + +class TestIPAddressesDetector(unittest.TestCase): + """ + Testing strategy + + Cover the cartesian product of these partitions: + + 1. Partition on IP address format: + a. Valid IPv4 address + b. Invalid IPv4 address + c. Valid IPv6 address + d. Invalid IPv6 address + + 2. Partition on IP address type: + a. Public + b. Private + """ + + def setUp(self): + self.detector = IPAddressDetector() + + def test_valid_public_ipv4(self): + ip = '8.8.8.8' + results = self.detector.analyze_line(filename='mock_filename', line=ip) + self.assertEqual(bool(results), True) + + def test_valid_private_ipv4(self): + ip = '192.168.1.1' + results = self.detector.analyze_line(filename='mock_filename', line=ip) + self.assertEqual(bool(results), True) + + def test_invalid_ipv4_but_pass(self): + ip = '300.12.34.56' + results = self.detector.analyze_line(filename='mock_filename', line=ip) + self.assertEqual(bool(results), True) + + def test_valid_public_ipv6(self): + ip = '2001:0db8:85a3:0000:0000:8a2e:0370:7334' + results = self.detector.analyze_line(filename='mock_filename', line=ip) + self.assertEqual(bool(results), True) + + def test_valid_ipv6(self): + ip = '2001:db8::1' + results = self.detector.analyze_line(filename='mock_filename', line=ip) + self.assertEqual(bool(results), True) + + def test_invalid_ipv6(self): + ip = '2001:0db8:85a3:0000:0000:8a2e:0370:733g' + results = self.detector.analyze_line(filename='mock_filename', line=ip) + self.assertEqual(bool(results), False) + + def test_valid_ip_within_text(self): + line = 'This is a valid IP: 192.168.1.100 within some text.' + results = self.detector.analyze_line(filename='mock_filename', line=line) + self.assertEqual(bool(results), True) + + def test_invalid_ip_within_text_but_pass(self): + line = 'This is an invalid IP: 400.200.100.50 within some text.' + results = self.detector.analyze_line(filename='mock_filename', line=line) + self.assertEqual(bool(results), True) + +if __name__ == '__main__': + unittest.main() From dd385595ca8963ab1c83778b894e4a41a0bb9fc6 Mon Sep 17 00:00:00 2001 From: Jingchao Zhong Date: Thu, 20 Apr 2023 09:00:12 -0700 Subject: [PATCH 2/4] Add test files for detect secrets --- continuous-integration/starter-kits/detect-secrets/test_files | 1 + 1 file changed, 1 insertion(+) create mode 160000 continuous-integration/starter-kits/detect-secrets/test_files diff --git a/continuous-integration/starter-kits/detect-secrets/test_files b/continuous-integration/starter-kits/detect-secrets/test_files new file mode 160000 index 000000000..d023dbd7e --- /dev/null +++ b/continuous-integration/starter-kits/detect-secrets/test_files @@ -0,0 +1 @@ +Subproject commit d023dbd7ead34f687222ada9a2b6d1a27689f58e From 41cf55187d57c4c0e88a978be3aafb6752a9f0a9 Mon Sep 17 00:00:00 2001 From: Jingchao Zhong Date: Thu, 20 Apr 2023 09:01:05 -0700 Subject: [PATCH 3/4] Update starter kit for detect secrets and pre-commit --- continuous-integration/starter-kits/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/continuous-integration/starter-kits/README.md b/continuous-integration/starter-kits/README.md index 03c584367..2dff5d06d 100644 --- a/continuous-integration/starter-kits/README.md +++ b/continuous-integration/starter-kits/README.md @@ -145,3 +145,7 @@ To leverage this template, make sure to do the following: 4. Automation should be set up to scan for new commits, but it's a good idea to scan the history of commits starting out: `git secrets --scan-history` 1. If you receive no output, that means the tool found now problematic commits. +### Automated Checking for General Sensitive Information within Git +This tool help you get started with the detection of sensitive information across three layers of Git and GitHub repositories. This solution also provides customized plugins to support more types of secrets. Please see links below for details. + +[Starter Kit](detect-secrets/README.md) From d3364183d8d34bf0765ab4ef674161a2840e2a9c Mon Sep 17 00:00:00 2001 From: Jingchao Zhong Date: Thu, 20 Apr 2023 09:01:56 -0700 Subject: [PATCH 4/4] Update starter kit for detect secrets and pre-commit --- .../starter-kits/detect-secrets/README.md | 211 ++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100644 continuous-integration/starter-kits/detect-secrets/README.md diff --git a/continuous-integration/starter-kits/detect-secrets/README.md b/continuous-integration/starter-kits/detect-secrets/README.md new file mode 100644 index 000000000..4e6661ce7 --- /dev/null +++ b/continuous-integration/starter-kits/detect-secrets/README.md @@ -0,0 +1,211 @@ +# Automated Checking for General Sensitive Information within Git + +This page contains starter kit information for the open source tool [detect-secrets](https://github.com/Yelp/detect-secrets) and [pre-commit](https://github.com/pre-commit-ci) to help you get started with the detection of sensitive information across three layers of Git and GitHub repositories. This solution also provides [customized plugins](plug-ins/README.md) to support more types of secrets. Please see categories and links below for details. + +## Content +* [What is detect-secrets?](#what-is-detect-secrets) +* [What is pre-commit?](#what-is-pre-commit) +* [Introduction to three layers of protection](#introduction-to-three-layers-of-protection) +* [Layer 1: client-side full scan of existing code base](#layer-1-client-side-full-scan-of-existing-code-base) +* [Layer 2: client-side scan of updated code upon Git commit](#layer-2-client-side-scan-of-updated-code-upon-git-commit) +* [Layer 3: server-side push to GitHub.com from client](#layer-3-server-side-push-to-githubcom-from-client) +* [Recommended workflow](#recommended-workflow) + +## What is detect-secrets? +[detect-secrets](https://github.com/Yelp/detect-secrets) is an open source tool to detect secrets in files, which is recommended by [Microsoft](https://microsoft.github.io/code-with-engineering-playbook/continuous-integration/dev-sec-ops/secret-management/recipes/detect-secrets/) and the government (**TODO: John mentioned this. I have not found the link yet**). It is designed to be used as a pre-commit hook and/or as a CI step to prevent secrets from being committed to a repository. It is also designed to be used as a standalone tool to scan a codebase for secrets. + +## What is pre-commit? +[pre-commit](https://pre-commit.com/) is a framework for managing and maintaining multi-language pre-commit hooks. It is a command line tool that is installed on the developer’s local machine. It can also be used as a service, [pre-commit.ci](https://pre-commit.ci/), which is a free service for open source projects. + +## Introduction to three layers of protection +This page proposes three layers of secret scanning to help preventing secrets from being leaked on GitHub + +Three layers of protection are: +1. Client-side full scan of existing code base +2. Client-side scan of updated code upon Git commit +3. Server-side push to GitHub.com from client + +Each layer has its own advantages and disadvantages. The full use of these three layers of protection is recommended to minimize human negligence. + +Here is the overview of user workflow diagram: +```mermaid +flowchart TB + User([fa:fa-user User]) + + subgraph UserWorkflow["User Workflow to Secure Secrets"] + Layer1["1. Layer 1: GitHub.com (server-side)"] + Layer2["2. Layer 2: Git commit scan (client-side)"] + Layer3["3. Layer 3: Full scan (client-side)"] + + Layer1 -->|If Secrets Detected| Clean1[Purge or Fix the commit manually] + Layer2 -->|If Secrets Detected| Clean2[Clean local file directly.
Don't need to worry about cleaning commit history] + Layer3 -->|If Secrets Detected| Clean3[Clean local file directly.] + + Secure["Only Main branch is in safe.
Secrets are leaked on other branch before cleaning"] + Clean1 --> Secure + + SaveTime["It saves your time. And secrets are safe from GitHub"] + Clean2 --> SaveTime + Clean3 --> SaveTime + end + + User -->|At least use| Layer1 + User -->|Helpful to use| Layer2 + User -->|Optional to use| Layer3 + + style User fill:#F6F5F3,stroke:#333,stroke-width:1px + style UserWorkflow fill:#AF7AC5,stroke:#333,stroke-width:2px + style Layer1 fill:#F3B044,stroke:#333,stroke-width:2px,stroke-dasharray: 5 5 + style Layer2 fill:#F3B044,stroke:#333,stroke-width:2px,stroke-dasharray: 5 5 + style Layer3 fill:#F3B044,stroke:#333,stroke-width:2px,stroke-dasharray: 5 5 + style Clean1 fill:#5A88ED,stroke:#333,stroke-width:2px + style Clean2 fill:#5A88ED,stroke:#333,stroke-width:2px + style Clean3 fill:#5A88ED,stroke:#333,stroke-width:2px + style SaveTime fill:#5ABF9B,stroke:#333,stroke-width:2px + style Secure fill:#AF3034,stroke:#333,stroke-width:2px +``` + +## Layer 1: client-side full scan of existing code base +This layer allows you to scan current local code base for secrets. It also provides a way to easily read and label the scan result ([Auditing a Baseline](https://github.com/Yelp/detect-secrets#auditing-a-baseline)). In this page, it also provides customized version of [detect-secrets](https://github.com/perryzjc/detect-secrets) to support more types of secrets. + +**Starter Kit**: +1. Install default [detect-secrets](https://github.com/Yelp/detect-secrets) +```bash +pip install detect-secrets +``` +or Install enhanced [detect-secrets](https://github.com/perryzjc/detect-secrets) +```bash +pip install git+https://github.com/perryzjc/detect-secrets@v1.4.4 +``` +> **Additional Secret Types Supported** +> * [AWS sensitive information]() (click for more information) +> * IP address +> * Absolute path +> * Email address + +2. Scan all local files from current directory and output the result to a file +```bash +detect-secrets scan ./ --all-files > .secrets.baseline +``` + +3. Check result method 1: read the result directly from the file `.secrets.baseline` +```bash +cat .secrets.baseline +``` +> **Note**: If any secrets are detected, the result will be located at the `"results":` of the file. + +For example: +```json +"results": { + "Jenkins/e2e-opera-pcm-develop.sh": [ + { + "type": "AWS Sensitive Information", + "filename": "Jenkins/e2e-opera-pcm-develop.sh", + "hashed_secret": "a2c61f79dd90429e5284ab45de3318c16756a2f0", + "is_verified": false, + "line_number": 39 +}, +``` + +Only line number is visible through this method. To see the actual secret, you need to use method 2. + +3. Check result method 2: [auditing the result](https://github.com/perryzjc/detect-secrets#auditing-a-baseline) +```bash +detect-secrets audit .secrets.baseline +``` +> **Note**: It can provide you an interactive interface to view the actual secret and label it as false positive or true positive. + +For example: + +Screen Shot 2023-04-20 at 7 08 08 AM + + +## Layer 2: client-side scan of updated code upon Git commit +This layer allows you to easily set up a pre-commit hook to prevent commit from creating if any secrets exist. This layer uses the recommended way through [pre-commit](https://pre-commit.com/). + +**Starter Kit**: +1. Install [pre-commit](https://pre-commit.com/) +```bash +pip install pre-commit +``` +2. Set up `.pre-commit-config.yaml` on the root directory of your project + +For default detect-secrets: +```yaml +repos: + - repo: https://github.com/Yelp/detect-secrets + rev: v1.4.0 + hooks: + - id: detect-secrets + args: ['--baseline', '.secrets.baseline'] +``` + +For enhanced detect-secrets: +```yaml +repos: +- repo: https://github.com/perryzjc/detect-secrets + rev: v1.4.4 + hooks: + - id: detect-secrets + args: ['--baseline', '.secrets.baseline'] +``` + +3. Install pre-commit hook based on `.pre-commit-config.yaml` +```bash +pre-commit install +``` +This command will create a `.git/hooks/pre-commit` file based on the configuration in `.pre-commit-config.yaml`. It will be triggered before each commit. + +4. Set up `.secrets.baseline` file on the root directory of your project +```bash +detect-secrets scan ./ --all-files > .secrets.baseline +``` +> **Note**: pre-commit hook block commit by comparing new secrets with the results in `.secrets.baseline` file. If you want to add new secret results, you need to update `.secrets.baseline` file by re-running the scan command and generate a new baseline file. +> +> You can create an empty baseline file by running this command at a directory without secrets. + +5. Now, if any new secrets are detected, the commit will be blocked. And you will be able to see the error message in the terminal. +For example: +Screen Shot 2023-04-20 at 7 32 10 AM + +6. You can handle this output by either: +* Remove the new secrets from the code base +* Generate a new baseline file by running the scan command and update the baseline file. +```bash +detect-secrets scan ./ --all-files > .secrets.baseline +``` + +## Layer 3: server-side scan of updated code upon Git push +This layer serves as the final layer. As long as anyone set up this layer, GitHub will be able to draw the report of secrets in the code base. When the status check is failed, by setting up branch protection,it can also protect the repository from being pushed or merged to the main branch. + +> This layer has some major issues: +> 1. It can not prevent commit from pushing to GitHub even though it can protect the main branch from being pushed or merged. Secrets can still exist on other branches. +> 2. Reports generated are visible to the public if using default setup provided by [pre-commit.ci](https://pre-commit.ci/) or GitHub Action +> * public is able to see the results about 1. filename, 2. line number, 3. secret type, 4. hashed secret +> * Additional setup is able to solve the visibility issue, but it requires additional efforts such as an private repository to receive the reports from the public repository. + +**Starter Kit**: +1. Use the same `.pre-commit-config.yaml` file as layer 2 (on the root directory of your project) +2. Use the same `.secrets.baseline` file as layer 2 (on the root directory of your project) +3. Register your repository on [pre-commit.ci](https://results.pre-commit.ci/) +Screen Shot 2023-04-20 at 8 53 23 AM + +4. Once after the registration, every time when you push commits to the repository, pre-commit.ci will run detect-secrets (based on the yaml file) and generate a report. You can see the report on your project GitHub repository or [pre-commit.ci](https://results.pre-commit.ci/) website. + +Screen Shot 2023-04-20 at 8 55 21 AM +Screen Shot 2023-04-20 at 8 56 05 AM + +5. If you set up the main branch as protected, you can also set up the status check to prevent push or merge to the main branch if any secrets are detected. + + +## Recommended workflow +1. At least use layer 3 to protect the main branch from being pushed or merged if any secrets are detected. +2. If any secrets are detected during layer 3, you can: + - Clean the commit history of the branch + - To find out the files that needs clean, you can use layer 1's auditing feature +3. Recommend to set up layer 2 for every developer + - It can minimize the chance of pushing secrets to the cloud + - Local files are easier to clean than GitHub commit history +4. layer 1 is involved during each stage + - It helps you generate baseline file for layer 2 and 3 + - Read the secret easily by using auditing feature