Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sanitize numbers #631

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 7 additions & 18 deletions src/server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from sqlalchemy.dialects.postgresql import JSONB, insert
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql.functions import coalesce
from utils import standardize_phone_number

Base = declarative_base()

Expand Down Expand Up @@ -92,21 +93,6 @@ def dedup_consecutive(table, unique_id, id, order_by, dedup_on):
return delete(table).where(unique_id == to_delete.c[0])


def normalize_phone_number(number):
result = None

if number and str(number) != "nan":
number = re.sub("[() -.+]", "", str(number))

if number and number[0] == "1":
number = number[1:]

if number.isdigit() and len(number) == 10:
result = number

return result


class PdpContacts(Base):
__tablename__ = "pdp_contacts"
__table_args__ = (
Expand Down Expand Up @@ -173,8 +159,10 @@ def insert_from_file_df(cls, df, conn):
df = df[column_translation.keys()]
df = df.rename(columns=column_translation)

df["phone"] = df["phone"].apply(normalize_phone_number)
df["mobile"] = df["mobile"].apply(normalize_phone_number)
phone_numbers = [standardize_phone_number(phone) for phone in df["phone"]]
mobile_numbers = [standardize_phone_number(phone) for phone in df["mobile"]]
df["phone"] = phone_numbers
df["mobile"] = mobile_numbers

dedup_on = [col for col in cls.__table__.columns if col.name in df.columns]
df["created_date"] = datetime.datetime.utcnow()
Expand Down Expand Up @@ -237,7 +225,8 @@ def insert_from_df(cls, df, conn):
df = df[column_translation.keys()]
df = df.rename(columns=column_translation)

df["phone"] = df["phone"].apply(normalize_phone_number)
phone_numbers = [standardize_phone_number(phone) for phone in df["phone"]]
df["phone"] = phone_numbers

dedup_on = [col for col in cls.__table__.columns if col.name in df.columns]
df["created_date"] = datetime.datetime.utcnow()
Expand Down
28 changes: 28 additions & 0 deletions src/server/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import re

def standardize_phone_number(phone):
"""Standardize phone number format.

Args:
phone (str): The phone number to standardize.

Returns:
str: The standardized phone number.
"""
# Remove all non-numeric characters
phone = re.sub(r'\D', '', phone)

# if the phone number is less than 10 digits, it's invalid
if len(phone) < 10:
return None

# If the phone number is exactly 10 digits, return as is
if len(phone) == 10:
return phone

# if the phone number is greater than 10 digits, take the last 10 digits
if len(phone) > 10:
return f'{phone[-10:]}'

# anything else we ignore
return None
12 changes: 8 additions & 4 deletions src/server/volgistics_importer.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import re
from flask.globals import current_app
from datetime import datetime, timedelta
from openpyxl import load_workbook
from jellyfish import jaro_similarity

from config import engine
from utils import standardize_phone_number

import structlog

Expand Down Expand Up @@ -178,6 +178,10 @@ def volgistics_people_import(workbook):
col_email = col['Email']
time_stamp = datetime.utcnow()

home_phone = standardize_phone_number(r[col_home])
work_phone = standardize_phone_number(r[col_work])
cell_phone = standardize_phone_number(r[col_cell])

try:
for r in ws.iter_rows(min_row=2, max_col=42,values_only=True):
insert_list.append(
Expand All @@ -194,9 +198,9 @@ def volgistics_people_import(workbook):
"state": r[col_state],
"zip": r[col_zip],
"all_phone_numbers": r[col_all_phones],
"home": r[col_home],
"work": r[col_work],
"cell": r[col_cell],
"home": home_phone,
"work": work_phone,
"cell": cell_phone,
"email": r[col_email],
"created_date" : time_stamp
}
Expand Down