Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Data Sources Logic #60

Draft
wants to merge 1 commit into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 33 additions & 39 deletions database_client/constants.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,17 @@
AGENCY_APPROVED_COLUMNS = [
"homepage_url",
"count_data_sources",
"agency_type",
"multi_agency",
"submitted_name",
"jurisdiction_type",
"state_iso",
"municipality",
"zip_code",
"county_fips",
"county_name",
"lat",
"lng",
"data_sources",
"no_web_presence",
"airtable_agency_last_modified",
"data_sources_last_updated",
"approved",
"rejection_reason",
"last_approval_editor",
"agency_created",
"county_airtable_uid",
"defunct_year",
]
DATA_SOURCES_APPROVED_COLUMNS = [
# These columns are used when pulling from the data sources table

Check warning on line 1 in database_client/constants.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] database_client/constants.py#L1 <100>

Missing docstring in public module
Raw output
./database_client/constants.py:1:1: D100 Missing docstring in public module
DATA_SOURCES_ORIGINAL_COLUMNS = [
"name",
"submitted_name",
"last_approval_editor"]
# These columns are used when pulling from the Agency_Data_Sources_View table
DATA_SOURCES_VIEW_COLUMNS = [
"data_source_name",
"data_source_submitted_name",
"data_source_last_approval_editor",
]

# These columns are the same whether in the Agency_Data_Sources_View table or the data_sources table
DATA_SOURCES_AGNOSTIC_COLUMNS = [
"description",
"record_type",
"source_url",
Expand Down Expand Up @@ -53,7 +39,6 @@
"airtable_source_last_modified",
"url_status",
"rejection_note",
"last_approval_editor",
"agency_described_submitted",
"agency_described_not_in_database",
"approval_status",
Expand All @@ -66,30 +51,39 @@
"access_notes",
"last_cached",
]
DATA_SOURCES_OUTPUT_COLUMNS = DATA_SOURCES_APPROVED_COLUMNS + ["agency_name"]
DATA_SOURCES_NEEDS_IDENTIFICATION_COLUMNS = DATA_SOURCES_AGNOSTIC_COLUMNS + DATA_SOURCES_ORIGINAL_COLUMNS
DATA_SOURCES_APPROVED_COLUMNS = DATA_SOURCES_AGNOSTIC_COLUMNS + DATA_SOURCES_VIEW_COLUMNS
RESTRICTED_DATA_SOURCE_COLUMNS = [
"rejection_note",
"data_source_request",
"approval_status",
"airtable_uid",
"airtable_source_last_modified",
]
DATA_SOURCES_MAP_COLUMN = [

AGENCY_DATA_SOURCE_VIEW_COLUMNS = [
"data_source_id",
"name",
"agency_id",
"agency_name",
"homepage_url",
"count_data_sources",
"agency_type",
"agency_submitted_name",
"jurisdiction_type",
"state_iso",
"municipality",
"county_fips",
"county_name",
"record_type",
"lat",
"lng",
]
RESTRICTED_COLUMNS = [
"rejection_note",
"data_source_request",
"approval_status",
"airtable_uid",
"airtable_source_last_modified",
]
"data_sources",
"no_web_presence",
"airtable_agency_last_modified",
"data_sources_last_updated",
"approved",
"rejection_reason",
"agency_last_approval_editor",
"agency_created",
"county_airtable_uid",
"defunct_year",
] + DATA_SOURCES_AGNOSTIC_COLUMNS + DATA_SOURCES_VIEW_COLUMNS
174 changes: 73 additions & 101 deletions database_client/database_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
from collections import namedtuple
from contextlib import contextmanager
from datetime import datetime
from typing import Optional, Any, List
from typing import Optional, Any, List, Tuple

Check warning on line 5 in database_client/database_client.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] database_client/database_client.py#L5 <401>

'typing.Tuple' imported but unused
Raw output
./database_client/database_client.py:5:1: F401 'typing.Tuple' imported but unused
import uuid

import psycopg2
from psycopg2 import sql
from psycopg2.extras import DictRow

from database_client.constants import DATA_SOURCES_APPROVED_COLUMNS
from database_client.parameter_objects import AgencyDataSourceParams
from database_client.dynamic_query_constructor import DynamicQueryConstructor
from middleware.custom_exceptions import (
UserNotFoundError,
Expand All @@ -16,49 +19,31 @@
)
from utilities.enums import RecordCategories

DATA_SOURCES_MAP_COLUMN = [
"data_source_id",
"name",
"agency_id",
"agency_name",
"state_iso",
"municipality",
"county_name",
"record_type",
"lat",
"lng",
]


QUICK_SEARCH_SQL = """
SELECT
data_sources.airtable_uid,
data_sources.name AS data_source_name,
data_sources.description,
data_sources.record_type,
data_sources.source_url,
data_sources.record_format,
data_sources.coverage_start,
data_sources.coverage_end,
data_sources.agency_supplied,
agencies.name AS agency_name,
agencies.municipality,
agencies.state_iso
adsv.data_source_id as airtable_uid,
adsv.data_source_name,
adsv.description,
adsv.record_type,
adsv.source_url,
adsv.record_format,
adsv.coverage_start,
adsv.coverage_end,
adsv.agency_supplied,
adsv.agency_name,
adsv.municipality,
adsv.state_iso
FROM
agency_source_link
INNER JOIN
data_sources ON agency_source_link.airtable_uid = data_sources.airtable_uid
agency_data_source_view adsv
INNER JOIN
agencies ON agency_source_link.agency_described_linked_uid = agencies.airtable_uid
INNER JOIN
state_names ON agencies.state_iso = state_names.state_iso
state_names ON adsv.state_iso = state_names.state_iso
WHERE
(data_sources.name ILIKE '%{0}%' OR data_sources.description ILIKE '%{0}%' OR data_sources.record_type ILIKE '%{0}%' OR data_sources.tags ILIKE '%{0}%')
AND (agencies.county_name ILIKE '%{1}%' OR substr(agencies.county_name,3,length(agencies.county_name)-4) || ' County' ILIKE '%{1}%'
OR agencies.state_iso ILIKE '%{1}%' OR agencies.municipality ILIKE '%{1}%' OR agencies.agency_type ILIKE '%{1}%' OR agencies.jurisdiction_type ILIKE '%{1}%'
OR agencies.name ILIKE '%{1}%' OR state_names.state_name ILIKE '%{1}%')
AND data_sources.approval_status = 'approved'
AND data_sources.url_status not in ('broken', 'none found')
(adsv.data_source_name ILIKE '%{0}%' OR adsv.description ILIKE '%{0}%' OR adsv.record_type ILIKE '%{0}%' OR adsv.tags ILIKE '%{0}%')
AND (adsv.county_name ILIKE '%{1}%' OR substr(adsv.county_name,3,length(adsv.county_name)-4) || ' County' ILIKE '%{1}%'
OR adsv.state_iso ILIKE '%{1}%' OR adsv.municipality ILIKE '%{1}%' OR adsv.agency_type ILIKE '%{1}%' OR adsv.jurisdiction_type ILIKE '%{1}%'
OR adsv.agency_name ILIKE '%{1}%' OR state_names.state_name ILIKE '%{1}%')
AND adsv.approval_status = 'approved'
AND adsv.url_status not in ('broken', 'none found')

"""

Expand Down Expand Up @@ -219,37 +204,32 @@

self.cursor.execute(query)

# region Data Source Queries
def get_data_source_by_id(self, data_source_id: str) -> Optional[tuple[Any, ...]]:
"""
Get a data source by its ID, including related agency information from the database.
:param data_source_id: The unique identifier for the data source.
:return: A dictionary containing the data source and its related agency details. None if not found.
"""
sql_query = DynamicQueryConstructor.build_data_source_by_id_results_query()
self.cursor.execute(
sql_query,
(data_source_id,),
params = AgencyDataSourceParams(
data_source_id=data_source_id, approval_status=None
)
result = self.cursor.fetchone()
# NOTE: Very big tuple, perhaps very long NamedTuple to be implemented later
return result
return self.get_agencies_data_sources(params)[0]

def get_approved_data_sources(self) -> list[tuple[Any, ...]]:
def get_approved_data_sources(self) -> list[DictRow]:
"""
Fetches all approved data sources and their related agency information from the database.

:param columns: List of column names to use in the SELECT statement.
:return: A list of tuples, each containing details of a data source and its related agency.
"""
params = AgencyDataSourceParams(
include_columns=DATA_SOURCES_APPROVED_COLUMNS + ["agency_name"],
approval_status="approved",
)
return self.get_agencies_data_sources(params)

sql_query = DynamicQueryConstructor.build_get_approved_data_sources_query()

self.cursor.execute(sql_query)
results = self.cursor.fetchall()
# NOTE: Very big tuple, perhaps very long NamedTuple to be implemented later
return results

def get_needs_identification_data_sources(self) -> list[tuple[Any, ...]]:
def get_needs_identification_data_sources(self) -> list[DictRow]:
"""
Returns a list of data sources that need identification from the database.

Expand All @@ -262,6 +242,28 @@
self.cursor.execute(sql_query)
return self.cursor.fetchall()

def get_data_sources_for_map(self) -> list[DictRow]:
"""

Check warning on line 246 in database_client/database_client.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] database_client/database_client.py#L246 <401>

First line should be in imperative mood
Raw output
./database_client/database_client.py:246:1: D401 First line should be in imperative mood
Returns a list of data sources with relevant info for the map from the database.

:return: A list of MapInfo namedtuples, each containing details of a data source.
"""
params = AgencyDataSourceParams(
include_columns=[
"data_source_id",
"data_source_name",
"agency_id",
"agency_name",
"state_iso",
"municipality",
"county_name",
"record_type",
"lat",
"lng",
],
)
return self.get_agencies_data_sources(params)

def add_new_data_source(self, data: dict) -> None:
"""
Processes a request to add a new data source.
Expand All @@ -283,53 +285,21 @@
)
self.cursor.execute(sql_query)

MapInfo = namedtuple(
"MapInfo",
[
"data_source_id",
"data_source_name",
"agency_id",
"agency_name",
"state",
"municipality",
"county",
"record_type",
"lat",
"lng",
],
)

def get_data_sources_for_map(self) -> list[MapInfo]:
def get_agencies_data_sources(
self,
params: AgencyDataSourceParams,
) -> list[DictRow]:
"""
Returns a list of data sources with relevant info for the map from the database.
Fetches all data sources and their related agency information from the database.

:return: A list of MapInfo namedtuples, each containing details of a data source.
"""
sql_query = """
SELECT
data_sources.airtable_uid as data_source_id,
data_sources.name,
agencies.airtable_uid as agency_id,
agencies.submitted_name as agency_name,
agencies.state_iso,
agencies.municipality,
agencies.county_name,
data_sources.record_type,
agencies.lat,
agencies.lng
FROM
agency_source_link
INNER JOIN
data_sources ON agency_source_link.airtable_uid = data_sources.airtable_uid
INNER JOIN
agencies ON agency_source_link.agency_described_linked_uid = agencies.airtable_uid
WHERE
data_sources.approval_status = 'approved'
:param columns: List of column names to use in the SELECT statement.
:return: A list of tuples, each containing details of a data source and its related agency.
"""
sql_query = DynamicQueryConstructor.build_agency_data_source_query(params)
self.cursor.execute(sql_query)
results = self.cursor.fetchall()
return self.cursor.fetchall()

return [self.MapInfo(*result) for result in results]
# endregion

def get_agencies_from_page(self, page: int) -> list[tuple[Any, ...]]:
"""
Expand Down Expand Up @@ -372,8 +342,7 @@
sql_query,
(offset,),
)
results = self.cursor.fetchall()
return results
return self.cursor.fetchall()

@staticmethod
def get_offset(page: int) -> int:
Expand Down Expand Up @@ -637,14 +606,17 @@
TypeaheadSuggestions = namedtuple(
"TypeaheadSuggestions", ["display_name", "type", "state", "county", "locality"]
)

def get_typeahead_suggestions(self, search_term: str) -> List[TypeaheadSuggestions]:
"""
Returns a list of data sources that match the search query.

:param search_term: The search query.
:return: List of data sources that match the search query.
"""
query = DynamicQueryConstructor.generate_new_typeahead_suggestion_query(search_term)
query = DynamicQueryConstructor.generate_new_typeahead_suggestion_query(
search_term
)
self.cursor.execute(query)
results = self.cursor.fetchall()

Expand Down Expand Up @@ -696,4 +668,4 @@
state=row[11],
)
for row in results
]
]
Loading
Loading