Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add importer and support for ONS data #171

Merged
merged 17 commits into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ ruby "~> 3.2.0"

gem "rails", "7.0.8"

gem "aws-sdk-s3"
gem "bootsnap", require: false
gem "gds-api-adapters"
gem "govuk_app_config"
Expand All @@ -12,6 +13,7 @@ gem "httparty"
gem "pact", require: false
gem "pact_broker-client"
gem "pg"
gem "rubyzip"
gem "sentry-sidekiq"
gem "sidekiq-scheduler"
gem "sidekiq-unique-jobs"
Expand Down
19 changes: 19 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,22 @@ GEM
public_suffix (>= 2.0.2, < 6.0)
ast (2.4.2)
awesome_print (1.9.2)
aws-eventstream (1.2.0)
aws-partitions (1.781.0)
aws-sdk-core (3.175.0)
aws-eventstream (~> 1, >= 1.0.2)
aws-partitions (~> 1, >= 1.651.0)
aws-sigv4 (~> 1.5)
jmespath (~> 1, >= 1.6.1)
aws-sdk-kms (1.67.0)
aws-sdk-core (~> 3, >= 3.174.0)
aws-sigv4 (~> 1.1)
aws-sdk-s3 (1.126.0)
aws-sdk-core (~> 3, >= 3.174.0)
aws-sdk-kms (~> 1)
aws-sigv4 (~> 1.4)
aws-sigv4 (1.5.2)
aws-eventstream (~> 1, >= 1.0.2)
bootsnap (1.16.0)
msgpack (~> 1.2)
brakeman (5.2.3)
Expand Down Expand Up @@ -161,6 +177,7 @@ GEM
multi_xml (>= 0.5.2)
i18n (1.14.1)
concurrent-ruby (~> 1.0)
jmespath (1.6.2)
json (2.6.3)
language_server-protocol (3.17.0.3)
link_header (0.0.8)
Expand Down Expand Up @@ -624,6 +641,7 @@ PLATFORMS
x86_64-linux

DEPENDENCIES
aws-sdk-s3
bootsnap
byebug
climate_control
Expand All @@ -640,6 +658,7 @@ DEPENDENCIES
rails (= 7.0.8)
rspec-rails
rubocop-govuk
rubyzip
sentry-sidekiq
sidekiq-scheduler
sidekiq-unique-jobs
Expand Down
4 changes: 1 addition & 3 deletions app/controllers/v1/locations_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@ class LocationsController < ApplicationController
def index
Sentry.set_tags postcode: PostcodeHelper.normalise(params[:postcode])

token_manager = OsPlacesApi::AccessTokenManager.new
begin
locations = OsPlacesApi::Client.new(token_manager).locations_for_postcode(params[:postcode])
render json: locations
render json: PostcodeManager.new.locations_for_postcode(params[:postcode])
rescue OsPlacesApi::InvalidPostcodeProvided
render json: { errors: { "postcode": ["Invalid postcode provided"] } }, status: 400
rescue OsPlacesApi::NoResultsForPostcode
Expand Down
14 changes: 8 additions & 6 deletions app/models/location.rb
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
class Location
include ActiveModel::Model

attr_accessor :address, :latitude, :longitude, :local_custodian_code
attr_accessor :address, :longitude, :latitude, :local_custodian_code

def ==(other)
address == other.address &&
latitude == other.latitude &&
longitude == other.longitude &&
local_custodian_code == other.local_custodian_code
def to_hash
{
"address" => address,
"longitude" => longitude,
"latitude" => latitude,
"local_custodian_code" => local_custodian_code,
}
end
end
4 changes: 4 additions & 0 deletions app/models/postcode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ class Postcode < ApplicationRecord
validates :postcode, uniqueness: true
before_validation :normalize_postcode

enum source: %i[os_places onspd].index_with(&:to_s)
scope :active, -> { where(retired: false) }
scope :retired, -> { where(retired: true) }

private

def normalize_postcode
Expand Down
40 changes: 40 additions & 0 deletions app/models/postcode_manager.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
class PostcodeManager
def locations_for_postcode(postcode)
normalised_postcode = PostcodeHelper.normalise(postcode)
unless (record = Postcode.find_by(postcode: normalised_postcode))
record = create_record_from_os_places_api(normalised_postcode)
end

LocationsPresenter.instance_for(record).to_hash
end

def update_postcode(postcode)
normalised_postcode = PostcodeHelper.normalise(postcode)
record = Postcode.os_places.find_by(postcode: normalised_postcode)
location_results = location_results_from_os_places_api(normalised_postcode)

if location_results.empty?
record.destroy if record
elsif record
record.update(results: location_results.results) && record.touch
else
Postcode.create!(postcode: normalised_postcode, source: "os_places", results: location_results.results)
end
end

private

def create_record_from_os_places_api(normalised_postcode)
location_results = location_results_from_os_places_api(normalised_postcode)
raise OsPlacesApi::NoResultsForPostcode unless location_results.any_locations?

Postcode.create_or_find_by!(postcode: normalised_postcode, source: "os_places", results: location_results.results)
end

def location_results_from_os_places_api(normalised_postcode)
token_manager = OsPlacesApi::AccessTokenManager.new
client = OsPlacesApi::Client.new(token_manager)
# Can raise various errors, which we let flow to the controller
client.retrieve_locations_for_postcode(normalised_postcode)
end
end
19 changes: 19 additions & 0 deletions app/presenters/locations_presenter.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
class LocationsPresenter
class UnknownSource < StandardError; end

def self.instance_for(postcode)
case postcode.source
when "os_places"
OsPlacesLocationsPresenter.new(postcode)
when "onspd"
OnspdLocationsPresenter.new(postcode)
else
# Should be unreachable, but maybe if data is corrupted?
raise(LocationsPresenter::UnknownSource, "Unknown source #{postcode.source}")
end
end

def initialize(postcode)
@postcode = postcode
end
end
25 changes: 25 additions & 0 deletions app/presenters/onspd_locations_presenter.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
class OnspdLocationsPresenter < LocationsPresenter
def to_hash
{
"source" => "Office of National Statistics",
"average_longitude" => @postcode.results.first["ONS"]["AVG_LNG"].to_f,
"average_latitude" => @postcode.results.first["ONS"]["AVG_LAT"].to_f,
"results" => [],
"extra_information" => extra_information,
}
end

def extra_information
extra_information = { source: "ONS source updated infrequently" }

if @postcode.retired?
extra_information.merge!(retired: "Postcode was retired in #{@postcode.results.first['ONS']['DOTERM']}")
end

if @postcode.results.first["ONS"]["TYPE"] == "L"
extra_information.merge!(large: "Postcode is a large user postcode")
end

extra_information
end
end
13 changes: 13 additions & 0 deletions app/presenters/os_places_locations_presenter.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
class OsPlacesLocationsPresenter < LocationsPresenter
def to_hash
location_results = OsPlacesApi::LocationResults.new(@postcode.results)
locations = location_results.unfiltered_locations

{
"source" => "Ordnance Survey",
"average_longitude" => locations.sum(0.0, &:longitude) / locations.size.to_f,
"average_latitude" => locations.sum(0.0, &:latitude) / locations.size.to_f,
"results" => location_results.filtered_locations.map(&:to_hash),
}
end
end
12 changes: 12 additions & 0 deletions app/workers/ons_base_worker.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
require "aws-sdk-s3"

class OnsBaseWorker
include Sidekiq::Worker
sidekiq_options queue: :queue_ons, lock: :until_executed, lock_timeout: nil

BUCKET_NAME = "govuk-#{ENV['GOVUK_ENVIRONMENT_NAME']}-locations-api-import-csvs".freeze

def s3_client
@s3_client ||= Aws::S3::Client.new(region: "eu-west-1")
end
end
31 changes: 31 additions & 0 deletions app/workers/ons_download_worker.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
require "open-uri"
require "zip"

class OnsDownloadWorker < OnsBaseWorker
DATAFILE_REGEX = /\AData\/multi_csv\/.*.csv\z/

def perform(url)
# 1. Download File
temp_zip_file = Tempfile.new("ONSPD.zip")
IO.copy_stream(URI.parse(url).open, temp_zip_file.path)

# 2. Unzip File/Data/multi_csv, and post to S3 bucket
Zip::File.open(temp_zip_file.path) do |zip_file|
zip_file.each do |entry|
file_details = entry.name.match(DATAFILE_REGEX)
next unless file_details

s3_client.put_object(
bucket: BUCKET_NAME,
key: entry.name,
body: entry.get_input_stream.read,
)

OnsImportWorker.perform_async(entry.name)
Rails.logger.info("ONS Download Worker: Added #{entry.name} to S3 bucket")
end
end
rescue StandardError => e
GovukError.notify("Problem downloading ONS file from #{url}: #{e.message}")
end
end
46 changes: 46 additions & 0 deletions app/workers/ons_import_worker.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
class OnsImportWorker < OnsBaseWorker
def perform(s3_key_name)
temp_csv_file = Tempfile.new("ONSPD.csv")

s3_client.get_object(
response_target: temp_csv_file.path,
bucket: BUCKET_NAME,
key: s3_key_name,
)

CSV.foreach(temp_csv_file.path, headers: true) do |row|
postcode = PostcodeHelper.normalise(row["pcds"])
next if Postcode.os_places.where(postcode:).any?

termination_date = parse_termination_date(row["doterm"])
results = [
{
"ONS" => {
"AVG_LNG" => row["long"],
"AVG_LAT" => row["lat"],
"TYPE" => row["usertype"] == "0" ? "S" : "L",
"DOTERM" => termination_date,
},
},
]

retired = termination_date.blank? ? false : true

existing_record = Postcode.onspd.find_by(postcode:)
if existing_record
existing_record.update(retired:, results:)
else
Postcode.create(postcode:, source: "onspd", retired:, results:)
end
end
rescue StandardError => e
GovukError.notify("ONS Import Worker import problem: #{e.message}")
end

def parse_termination_date(doterm_string)
doterm_string.blank? ? "" : Time.strptime(doterm_string, "%Y%m").strftime("%B %Y")
rescue ArgumentError
Rails.logger.warn("ONS Import Worker found unparseable doterm: #{doterm_string}")
"Unknown"
end
end
2 changes: 1 addition & 1 deletion app/workers/postcodes_collection_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ class PostcodesCollectionWorker

def perform
postcodes = Postcode.uncached do
Postcode.order("updated_at ASC").first(POSTCODES_PER_SECOND).pluck(:postcode)
Postcode.os_places.order("updated_at ASC").first(POSTCODES_PER_SECOND).pluck(:postcode)
end
postcodes.each { |postcode| ProcessPostcodeWorker.perform_async(postcode) }
end
Expand Down
3 changes: 1 addition & 2 deletions app/workers/process_postcode_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ class ProcessPostcodeWorker
sidekiq_options queue: :update_postcode, lock: :until_and_while_executing, lock_timeout: nil

def perform(postcode)
token_manager = OsPlacesApi::AccessTokenManager.new
OsPlacesApi::Client.new(token_manager).update_postcode(postcode)
PostcodeManager.new.update_postcode(postcode)
rescue OsPlacesApi::ClientError => e
GovukError.notify(e)
end
Expand Down
3 changes: 1 addition & 2 deletions config/sidekiq.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
:timeout: 4
:max_retries: 1
:queues:
# See here to understand priorities: https://github.com/mperham/sidekiq/wiki/Advanced-Options#queues
# Use powers of 2: higher priority groups are checked twice as often.
- [queue_ons, 8]
- [update_postcode, 8]
- [queue_postcode, 1]
:scheduler:
Expand Down
9 changes: 9 additions & 0 deletions db/migrate/20230605101124_update_postcodes_add_fields.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
class UpdatePostcodesAddFields < ActiveRecord::Migration[7.0]
def change
add_column :postcodes, :source, :string, default: "os_places", null: false
add_column :postcodes, :retired, :boolean, default: false, null: false

add_index :postcodes, :source
add_index :postcodes, :retired
end
end
6 changes: 5 additions & 1 deletion db/schema.rb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading