Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

working character checker for good name, party name and party address #2210

Closed
wants to merge 2 commits into from
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 126 additions & 0 deletions api/support/management/commands/get_lite_specialcharacters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
from api.applications.models import GoodOnApplication, PartyOnApplication, CaseStatusEnum
from django.core.management.base import BaseCommand
import csv
import re
from django.db.models import Q


class SpecialCharacterFinder:
match_string = r"[^a-zA-Z0-9 .,\-\)\(\/'+:=\?\!\"%&\*;\<\>]"
fieldnames = []

results = []

def __init__(self, filename, data):
self.filename = filename
self.results = self.check_data(data)
self.write_to_csv()

def check_regex(self, value):
match_regex = re.sub(self.match_string, "", value)
if len(match_regex) < len(value):
return set(value).difference(set(match_regex))

def get_value(self, entry):
return entry

def check_data(self, data):
results = []
for entry in data:
value = self.get_value(entry)
if match := self.check_regex(value):
results.append(self.format_results(entry, match))
return results

def format_results(self, data, match):
return {
"org_name": data.application.organisation.name,
"good_id": data.good.id,
"reference_code": data.application.reference_code,
"value": data.good.name,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this column actually good_name instead of value?

"match": match,
}

def write_to_csv(self):
with open(f"{self.filename}.csv", "w", newline="") as csvfile:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is better you take the complete filename good_names.csv so that it is clear that the output is csv

writer = csv.DictWriter(csvfile, fieldnames=self.fieldnames)
writer.writeheader()
writer.writerows(self.results)


class GoodSpecialCharacterFinder(SpecialCharacterFinder):
fieldnames = ["org_name", "reference_code", "good_id", "value", "match"]

def get_value(self, entry):
return entry.good.name


class PartyNameSpecialCharacterFinder(SpecialCharacterFinder):
fieldnames = ["org_name", "reference_code", "party_id", "value", "match"]

def get_value(self, entry):
return entry.party.name

def format_results(self, data, match):
return {
"org_name": data.application.organisation.name,
"party_id": data.party.id,
"reference_code": data.application.reference_code,
"value": data.party.name,
"match": match,
}


class PartyAddressSpecialCharacterFinder(SpecialCharacterFinder):
match_string = r"[^a-zA-Z0-9 .,\-\)\(\/'+:=\?\!\"%&\*;\<\>\r\n]"
fieldnames = ["org_name", "reference_code", "party_id", "value", "match"]

def get_value(self, entry):
return entry.party.address

def format_results(self, data, match):
return {
"org_name": data.application.organisation.name,
"party_id": data.party.id,
"reference_code": data.application.reference_code,
"value": data.party.address,
"match": match,
}


class Command(BaseCommand):
help = """
Command to check special characters within LITE

This will generate csvs for good.name, party.name and party.address which can be retrieved using:
cf shh <app> -c "cat app/csvname.csv > csvname.csv

to be passed forward to support so that exporters can be contacted to review the fields raised
"""

def handle(self, *args, **options):

name_match_string = r"^[a-zA-Z0-9 .,\-\)\(\/'+:=\?\!\"%&\*;\<\>]+$"
address_match_string = r"^[a-zA-Z0-9 .,\-\)\(\/'+:=\?\!\"%&\*;\<\>\r\n]+$"

# get goods that don't match the string and are not finalised
goa = GoodOnApplication.objects.filter(
~Q(good__name__iregex=name_match_string)
& ~Q(application__status__status__in=CaseStatusEnum._terminal_statuses)
)
Comment on lines +123 to +126
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this also be extended to Good? we can may be ask them to edit if possible or archive and create new ones before they are used on an application preventing this further?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did start with good but wanted this particular script to be a bit more focused on the "in flight" ones that would potentially cause issues going forward

as this is hopefully somewhat reusable this query should be able to be modified to good, though we may need a new child class to format it correctly


# get parties that don't match the string and are not finalised
party_matches = PartyOnApplication.objects.filter(
Q(~Q(party__name__iregex=name_match_string) | ~Q(party__address__iregex=address_match_string))
& ~Q(application__status__status__in=CaseStatusEnum._terminal_statuses)
)

GoodSpecialCharacterFinder("good_names", goa)
PartyNameSpecialCharacterFinder("party_names", party_matches)
PartyAddressSpecialCharacterFinder("party_address", party_matches)


# retrieve file:
# cf ssh lite-api-uat -c "cat app/good_names.csv" > good_names.csv
# cf ssh lite-api-uat -c "cat app/party_names.csv" > party_names.csv
# cf ssh lite-api-uat -c "cat app/party_address.csv" > party_address.csv