Skip to content

Commit

Permalink
Merge pull request #171 from alphagov/ons-import
Browse files Browse the repository at this point in the history
Add importer and support for ONS data
  • Loading branch information
KludgeKML authored Sep 27, 2023
2 parents 98610fc + d6e848e commit 57fb5e4
Show file tree
Hide file tree
Showing 40 changed files with 1,199 additions and 543 deletions.
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ ruby "~> 3.2.0"

gem "rails", "7.0.8"

gem "aws-sdk-s3"
gem "bootsnap", require: false
gem "gds-api-adapters"
gem "govuk_app_config"
Expand All @@ -12,6 +13,7 @@ gem "httparty"
gem "pact", require: false
gem "pact_broker-client"
gem "pg"
gem "rubyzip"
gem "sentry-sidekiq"
gem "sidekiq-scheduler"
gem "sidekiq-unique-jobs"
Expand Down
19 changes: 19 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,22 @@ GEM
public_suffix (>= 2.0.2, < 6.0)
ast (2.4.2)
awesome_print (1.9.2)
aws-eventstream (1.2.0)
aws-partitions (1.781.0)
aws-sdk-core (3.175.0)
aws-eventstream (~> 1, >= 1.0.2)
aws-partitions (~> 1, >= 1.651.0)
aws-sigv4 (~> 1.5)
jmespath (~> 1, >= 1.6.1)
aws-sdk-kms (1.67.0)
aws-sdk-core (~> 3, >= 3.174.0)
aws-sigv4 (~> 1.1)
aws-sdk-s3 (1.126.0)
aws-sdk-core (~> 3, >= 3.174.0)
aws-sdk-kms (~> 1)
aws-sigv4 (~> 1.4)
aws-sigv4 (1.5.2)
aws-eventstream (~> 1, >= 1.0.2)
bootsnap (1.16.0)
msgpack (~> 1.2)
brakeman (5.2.3)
Expand Down Expand Up @@ -161,6 +177,7 @@ GEM
multi_xml (>= 0.5.2)
i18n (1.14.1)
concurrent-ruby (~> 1.0)
jmespath (1.6.2)
json (2.6.3)
language_server-protocol (3.17.0.3)
link_header (0.0.8)
Expand Down Expand Up @@ -624,6 +641,7 @@ PLATFORMS
x86_64-linux

DEPENDENCIES
aws-sdk-s3
bootsnap
byebug
climate_control
Expand All @@ -640,6 +658,7 @@ DEPENDENCIES
rails (= 7.0.8)
rspec-rails
rubocop-govuk
rubyzip
sentry-sidekiq
sidekiq-scheduler
sidekiq-unique-jobs
Expand Down
4 changes: 1 addition & 3 deletions app/controllers/v1/locations_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@ class LocationsController < ApplicationController
def index
Sentry.set_tags postcode: PostcodeHelper.normalise(params[:postcode])

token_manager = OsPlacesApi::AccessTokenManager.new
begin
locations = OsPlacesApi::Client.new(token_manager).locations_for_postcode(params[:postcode])
render json: locations
render json: PostcodeManager.new.locations_for_postcode(params[:postcode])
rescue OsPlacesApi::InvalidPostcodeProvided
render json: { errors: { "postcode": ["Invalid postcode provided"] } }, status: 400
rescue OsPlacesApi::NoResultsForPostcode
Expand Down
14 changes: 8 additions & 6 deletions app/models/location.rb
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
class Location
include ActiveModel::Model

attr_accessor :address, :latitude, :longitude, :local_custodian_code
attr_accessor :address, :longitude, :latitude, :local_custodian_code

def ==(other)
address == other.address &&
latitude == other.latitude &&
longitude == other.longitude &&
local_custodian_code == other.local_custodian_code
def to_hash
{
"address" => address,
"longitude" => longitude,
"latitude" => latitude,
"local_custodian_code" => local_custodian_code,
}
end
end
4 changes: 4 additions & 0 deletions app/models/postcode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ class Postcode < ApplicationRecord
validates :postcode, uniqueness: true
before_validation :normalize_postcode

enum source: %i[os_places onspd].index_with(&:to_s)
scope :active, -> { where(retired: false) }
scope :retired, -> { where(retired: true) }

private

def normalize_postcode
Expand Down
40 changes: 40 additions & 0 deletions app/models/postcode_manager.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
class PostcodeManager
def locations_for_postcode(postcode)
normalised_postcode = PostcodeHelper.normalise(postcode)
unless (record = Postcode.find_by(postcode: normalised_postcode))
record = create_record_from_os_places_api(normalised_postcode)
end

LocationsPresenter.instance_for(record).to_hash
end

def update_postcode(postcode)
normalised_postcode = PostcodeHelper.normalise(postcode)
record = Postcode.os_places.find_by(postcode: normalised_postcode)
location_results = location_results_from_os_places_api(normalised_postcode)

if location_results.empty?
record.destroy if record
elsif record
record.update(results: location_results.results) && record.touch
else
Postcode.create!(postcode: normalised_postcode, source: "os_places", results: location_results.results)
end
end

private

def create_record_from_os_places_api(normalised_postcode)
location_results = location_results_from_os_places_api(normalised_postcode)
raise OsPlacesApi::NoResultsForPostcode unless location_results.any_locations?

Postcode.create_or_find_by!(postcode: normalised_postcode, source: "os_places", results: location_results.results)
end

def location_results_from_os_places_api(normalised_postcode)
token_manager = OsPlacesApi::AccessTokenManager.new
client = OsPlacesApi::Client.new(token_manager)
# Can raise various errors, which we let flow to the controller
client.retrieve_locations_for_postcode(normalised_postcode)
end
end
19 changes: 19 additions & 0 deletions app/presenters/locations_presenter.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
class LocationsPresenter
class UnknownSource < StandardError; end

def self.instance_for(postcode)
case postcode.source
when "os_places"
OsPlacesLocationsPresenter.new(postcode)
when "onspd"
OnspdLocationsPresenter.new(postcode)
else
# Should be unreachable, but maybe if data is corrupted?
raise(LocationsPresenter::UnknownSource, "Unknown source #{postcode.source}")
end
end

def initialize(postcode)
@postcode = postcode
end
end
25 changes: 25 additions & 0 deletions app/presenters/onspd_locations_presenter.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
class OnspdLocationsPresenter < LocationsPresenter
def to_hash
{
"source" => "Office of National Statistics",
"average_longitude" => @postcode.results.first["ONS"]["AVG_LNG"].to_f,
"average_latitude" => @postcode.results.first["ONS"]["AVG_LAT"].to_f,
"results" => [],
"extra_information" => extra_information,
}
end

def extra_information
extra_information = { source: "ONS source updated infrequently" }

if @postcode.retired?
extra_information.merge!(retired: "Postcode was retired in #{@postcode.results.first['ONS']['DOTERM']}")
end

if @postcode.results.first["ONS"]["TYPE"] == "L"
extra_information.merge!(large: "Postcode is a large user postcode")
end

extra_information
end
end
13 changes: 13 additions & 0 deletions app/presenters/os_places_locations_presenter.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
class OsPlacesLocationsPresenter < LocationsPresenter
def to_hash
location_results = OsPlacesApi::LocationResults.new(@postcode.results)
locations = location_results.unfiltered_locations

{
"source" => "Ordnance Survey",
"average_longitude" => locations.sum(0.0, &:longitude) / locations.size.to_f,
"average_latitude" => locations.sum(0.0, &:latitude) / locations.size.to_f,
"results" => location_results.filtered_locations.map(&:to_hash),
}
end
end
12 changes: 12 additions & 0 deletions app/workers/ons_base_worker.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
require "aws-sdk-s3"

class OnsBaseWorker
include Sidekiq::Worker
sidekiq_options queue: :queue_ons, lock: :until_executed, lock_timeout: nil

BUCKET_NAME = "govuk-#{ENV['GOVUK_ENVIRONMENT_NAME']}-locations-api-import-csvs".freeze

def s3_client
@s3_client ||= Aws::S3::Client.new(region: "eu-west-1")
end
end
31 changes: 31 additions & 0 deletions app/workers/ons_download_worker.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
require "open-uri"
require "zip"

class OnsDownloadWorker < OnsBaseWorker
DATAFILE_REGEX = /\AData\/multi_csv\/.*.csv\z/

def perform(url)
# 1. Download File
temp_zip_file = Tempfile.new("ONSPD.zip")
IO.copy_stream(URI.parse(url).open, temp_zip_file.path)

# 2. Unzip File/Data/multi_csv, and post to S3 bucket
Zip::File.open(temp_zip_file.path) do |zip_file|
zip_file.each do |entry|
file_details = entry.name.match(DATAFILE_REGEX)
next unless file_details

s3_client.put_object(
bucket: BUCKET_NAME,
key: entry.name,
body: entry.get_input_stream.read,
)

OnsImportWorker.perform_async(entry.name)
Rails.logger.info("ONS Download Worker: Added #{entry.name} to S3 bucket")
end
end
rescue StandardError => e
GovukError.notify("Problem downloading ONS file from #{url}: #{e.message}")
end
end
46 changes: 46 additions & 0 deletions app/workers/ons_import_worker.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
class OnsImportWorker < OnsBaseWorker
def perform(s3_key_name)
temp_csv_file = Tempfile.new("ONSPD.csv")

s3_client.get_object(
response_target: temp_csv_file.path,
bucket: BUCKET_NAME,
key: s3_key_name,
)

CSV.foreach(temp_csv_file.path, headers: true) do |row|
postcode = PostcodeHelper.normalise(row["pcds"])
next if Postcode.os_places.where(postcode:).any?

termination_date = parse_termination_date(row["doterm"])
results = [
{
"ONS" => {
"AVG_LNG" => row["long"],
"AVG_LAT" => row["lat"],
"TYPE" => row["usertype"] == "0" ? "S" : "L",
"DOTERM" => termination_date,
},
},
]

retired = termination_date.blank? ? false : true

existing_record = Postcode.onspd.find_by(postcode:)
if existing_record
existing_record.update(retired:, results:)
else
Postcode.create(postcode:, source: "onspd", retired:, results:)
end
end
rescue StandardError => e
GovukError.notify("ONS Import Worker import problem: #{e.message}")
end

def parse_termination_date(doterm_string)
doterm_string.blank? ? "" : Time.strptime(doterm_string, "%Y%m").strftime("%B %Y")
rescue ArgumentError
Rails.logger.warn("ONS Import Worker found unparseable doterm: #{doterm_string}")
"Unknown"
end
end
2 changes: 1 addition & 1 deletion app/workers/postcodes_collection_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ class PostcodesCollectionWorker

def perform
postcodes = Postcode.uncached do
Postcode.order("updated_at ASC").first(POSTCODES_PER_SECOND).pluck(:postcode)
Postcode.os_places.order("updated_at ASC").first(POSTCODES_PER_SECOND).pluck(:postcode)
end
postcodes.each { |postcode| ProcessPostcodeWorker.perform_async(postcode) }
end
Expand Down
3 changes: 1 addition & 2 deletions app/workers/process_postcode_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ class ProcessPostcodeWorker
sidekiq_options queue: :update_postcode, lock: :until_and_while_executing, lock_timeout: nil

def perform(postcode)
token_manager = OsPlacesApi::AccessTokenManager.new
OsPlacesApi::Client.new(token_manager).update_postcode(postcode)
PostcodeManager.new.update_postcode(postcode)
rescue OsPlacesApi::ClientError => e
GovukError.notify(e)
end
Expand Down
3 changes: 1 addition & 2 deletions config/sidekiq.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
:timeout: 4
:max_retries: 1
:queues:
# See here to understand priorities: https://github.com/mperham/sidekiq/wiki/Advanced-Options#queues
# Use powers of 2: higher priority groups are checked twice as often.
- [queue_ons, 8]
- [update_postcode, 8]
- [queue_postcode, 1]
:scheduler:
Expand Down
9 changes: 9 additions & 0 deletions db/migrate/20230605101124_update_postcodes_add_fields.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
class UpdatePostcodesAddFields < ActiveRecord::Migration[7.0]
def change
add_column :postcodes, :source, :string, default: "os_places", null: false
add_column :postcodes, :retired, :boolean, default: false, null: false

add_index :postcodes, :source
add_index :postcodes, :retired
end
end
6 changes: 5 additions & 1 deletion db/schema.rb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 57fb5e4

Please sign in to comment.