-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
working character checker for good name, party name and party address #2210
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
from api.applications.models import GoodOnApplication, PartyOnApplication, CaseStatusEnum | ||
from django.core.management.base import BaseCommand | ||
import csv | ||
import re | ||
from django.db.models import Q | ||
|
||
|
||
class SpecialCharacterFinder: | ||
match_string = r"[^a-zA-Z0-9 .,\-\)\(\/'+:=\?\!\"%&\*;\<\>]" | ||
fieldnames = [] | ||
|
||
results = [] | ||
|
||
def __init__(self, filename, data): | ||
self.filename = filename | ||
self.results = self.check_data(data) | ||
self.write_to_csv() | ||
|
||
def check_regex(self, value): | ||
match_regex = re.sub(self.match_string, "", value) | ||
if len(match_regex) < len(value): | ||
return set(value).difference(set(match_regex)) | ||
|
||
def get_value(self, entry): | ||
return entry | ||
|
||
def check_data(self, data): | ||
results = [] | ||
for entry in data: | ||
value = self.get_value(entry) | ||
if match := self.check_regex(value): | ||
results.append(self.format_results(entry, match)) | ||
return results | ||
|
||
def format_results(self, data, match): | ||
return { | ||
"org_name": data.application.organisation.name, | ||
"good_id": data.good.id, | ||
"reference_code": data.application.reference_code, | ||
"value": data.good.name, | ||
"match": match, | ||
} | ||
|
||
def write_to_csv(self): | ||
with open(f"{self.filename}.csv", "w", newline="") as csvfile: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is better you take the complete filename |
||
writer = csv.DictWriter(csvfile, fieldnames=self.fieldnames) | ||
writer.writeheader() | ||
writer.writerows(self.results) | ||
|
||
|
||
class GoodSpecialCharacterFinder(SpecialCharacterFinder): | ||
fieldnames = ["org_name", "reference_code", "good_id", "value", "match"] | ||
|
||
def get_value(self, entry): | ||
return entry.good.name | ||
|
||
|
||
class PartyNameSpecialCharacterFinder(SpecialCharacterFinder): | ||
fieldnames = ["org_name", "reference_code", "party_id", "value", "match"] | ||
|
||
def get_value(self, entry): | ||
return entry.party.name | ||
|
||
def format_results(self, data, match): | ||
return { | ||
"org_name": data.application.organisation.name, | ||
"party_id": data.party.id, | ||
"reference_code": data.application.reference_code, | ||
"value": data.party.name, | ||
"match": match, | ||
} | ||
|
||
|
||
class PartyAddressSpecialCharacterFinder(SpecialCharacterFinder): | ||
match_string = r"[^a-zA-Z0-9 .,\-\)\(\/'+:=\?\!\"%&\*;\<\>\r\n]" | ||
fieldnames = ["org_name", "reference_code", "party_id", "value", "match"] | ||
|
||
def get_value(self, entry): | ||
return entry.party.address | ||
|
||
def format_results(self, data, match): | ||
return { | ||
"org_name": data.application.organisation.name, | ||
"party_id": data.party.id, | ||
"reference_code": data.application.reference_code, | ||
"value": data.party.address, | ||
"match": match, | ||
} | ||
|
||
|
||
class Command(BaseCommand): | ||
help = """ | ||
Command to check special characters within LITE | ||
|
||
This will generate csvs for good.name, party.name and party.address which can be retrieved using: | ||
cf shh <app> -c "cat app/csvname.csv > csvname.csv | ||
|
||
to be passed forward to support so that exporters can be contacted to review the fields raised | ||
""" | ||
|
||
def handle(self, *args, **options): | ||
|
||
name_match_string = r"^[a-zA-Z0-9 .,\-\)\(\/'+:=\?\!\"%&\*;\<\>]+$" | ||
address_match_string = r"^[a-zA-Z0-9 .,\-\)\(\/'+:=\?\!\"%&\*;\<\>\r\n]+$" | ||
|
||
# get goods that don't match the string and are not finalised | ||
goa = GoodOnApplication.objects.filter( | ||
~Q(good__name__iregex=name_match_string) | ||
& ~Q(application__status__status__in=CaseStatusEnum._terminal_statuses) | ||
) | ||
Comment on lines
+123
to
+126
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could this also be extended to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did start with good but wanted this particular script to be a bit more focused on the "in flight" ones that would potentially cause issues going forward as this is hopefully somewhat reusable this query should be able to be modified to good, though we may need a new child class to format it correctly |
||
|
||
# get parties that don't match the string and are not finalised | ||
party_matches = PartyOnApplication.objects.filter( | ||
Q(~Q(party__name__iregex=name_match_string) | ~Q(party__address__iregex=address_match_string)) | ||
& ~Q(application__status__status__in=CaseStatusEnum._terminal_statuses) | ||
) | ||
|
||
GoodSpecialCharacterFinder("good_names", goa) | ||
PartyNameSpecialCharacterFinder("party_names", party_matches) | ||
PartyAddressSpecialCharacterFinder("party_address", party_matches) | ||
|
||
|
||
# retrieve file: | ||
# cf ssh lite-api-uat -c "cat app/good_names.csv" > good_names.csv | ||
# cf ssh lite-api-uat -c "cat app/party_names.csv" > party_names.csv | ||
# cf ssh lite-api-uat -c "cat app/party_address.csv" > party_address.csv |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is this column actually
good_name
instead ofvalue
?