diff --git a/Gemfile b/Gemfile index ca429de..41f2c4f 100644 --- a/Gemfile +++ b/Gemfile @@ -4,6 +4,7 @@ ruby "~> 3.2.0" gem "rails", "7.0.8" +gem "aws-sdk-s3" gem "bootsnap", require: false gem "gds-api-adapters" gem "govuk_app_config" @@ -12,6 +13,7 @@ gem "httparty" gem "pact", require: false gem "pact_broker-client" gem "pg" +gem "rubyzip" gem "sentry-sidekiq" gem "sidekiq-scheduler" gem "sidekiq-unique-jobs" diff --git a/Gemfile.lock b/Gemfile.lock index 3ecb34b..0606732 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -70,6 +70,22 @@ GEM public_suffix (>= 2.0.2, < 6.0) ast (2.4.2) awesome_print (1.9.2) + aws-eventstream (1.2.0) + aws-partitions (1.781.0) + aws-sdk-core (3.175.0) + aws-eventstream (~> 1, >= 1.0.2) + aws-partitions (~> 1, >= 1.651.0) + aws-sigv4 (~> 1.5) + jmespath (~> 1, >= 1.6.1) + aws-sdk-kms (1.67.0) + aws-sdk-core (~> 3, >= 3.174.0) + aws-sigv4 (~> 1.1) + aws-sdk-s3 (1.126.0) + aws-sdk-core (~> 3, >= 3.174.0) + aws-sdk-kms (~> 1) + aws-sigv4 (~> 1.4) + aws-sigv4 (1.5.2) + aws-eventstream (~> 1, >= 1.0.2) bootsnap (1.16.0) msgpack (~> 1.2) brakeman (5.2.3) @@ -161,6 +177,7 @@ GEM multi_xml (>= 0.5.2) i18n (1.14.1) concurrent-ruby (~> 1.0) + jmespath (1.6.2) json (2.6.3) language_server-protocol (3.17.0.3) link_header (0.0.8) @@ -624,6 +641,7 @@ PLATFORMS x86_64-linux DEPENDENCIES + aws-sdk-s3 bootsnap byebug climate_control @@ -640,6 +658,7 @@ DEPENDENCIES rails (= 7.0.8) rspec-rails rubocop-govuk + rubyzip sentry-sidekiq sidekiq-scheduler sidekiq-unique-jobs diff --git a/app/controllers/v1/locations_controller.rb b/app/controllers/v1/locations_controller.rb index 71b79bc..49184bc 100644 --- a/app/controllers/v1/locations_controller.rb +++ b/app/controllers/v1/locations_controller.rb @@ -5,10 +5,8 @@ class LocationsController < ApplicationController def index Sentry.set_tags postcode: PostcodeHelper.normalise(params[:postcode]) - token_manager = OsPlacesApi::AccessTokenManager.new begin - locations = OsPlacesApi::Client.new(token_manager).locations_for_postcode(params[:postcode]) - render json: locations + render json: PostcodeManager.new.locations_for_postcode(params[:postcode]) rescue OsPlacesApi::InvalidPostcodeProvided render json: { errors: { "postcode": ["Invalid postcode provided"] } }, status: 400 rescue OsPlacesApi::NoResultsForPostcode diff --git a/app/models/location.rb b/app/models/location.rb index b133046..c7574e7 100644 --- a/app/models/location.rb +++ b/app/models/location.rb @@ -1,12 +1,14 @@ class Location include ActiveModel::Model - attr_accessor :address, :latitude, :longitude, :local_custodian_code + attr_accessor :address, :longitude, :latitude, :local_custodian_code - def ==(other) - address == other.address && - latitude == other.latitude && - longitude == other.longitude && - local_custodian_code == other.local_custodian_code + def to_hash + { + "address" => address, + "longitude" => longitude, + "latitude" => latitude, + "local_custodian_code" => local_custodian_code, + } end end diff --git a/app/models/postcode.rb b/app/models/postcode.rb index cadd323..0046c84 100644 --- a/app/models/postcode.rb +++ b/app/models/postcode.rb @@ -3,6 +3,10 @@ class Postcode < ApplicationRecord validates :postcode, uniqueness: true before_validation :normalize_postcode + enum source: %i[os_places onspd].index_with(&:to_s) + scope :active, -> { where(retired: false) } + scope :retired, -> { where(retired: true) } + private def normalize_postcode diff --git a/app/models/postcode_manager.rb b/app/models/postcode_manager.rb new file mode 100644 index 0000000..c005914 --- /dev/null +++ b/app/models/postcode_manager.rb @@ -0,0 +1,40 @@ +class PostcodeManager + def locations_for_postcode(postcode) + normalised_postcode = PostcodeHelper.normalise(postcode) + unless (record = Postcode.find_by(postcode: normalised_postcode)) + record = create_record_from_os_places_api(normalised_postcode) + end + + LocationsPresenter.instance_for(record).to_hash + end + + def update_postcode(postcode) + normalised_postcode = PostcodeHelper.normalise(postcode) + record = Postcode.os_places.find_by(postcode: normalised_postcode) + location_results = location_results_from_os_places_api(normalised_postcode) + + if location_results.empty? + record.destroy if record + elsif record + record.update(results: location_results.results) && record.touch + else + Postcode.create!(postcode: normalised_postcode, source: "os_places", results: location_results.results) + end + end + +private + + def create_record_from_os_places_api(normalised_postcode) + location_results = location_results_from_os_places_api(normalised_postcode) + raise OsPlacesApi::NoResultsForPostcode unless location_results.any_locations? + + Postcode.create_or_find_by!(postcode: normalised_postcode, source: "os_places", results: location_results.results) + end + + def location_results_from_os_places_api(normalised_postcode) + token_manager = OsPlacesApi::AccessTokenManager.new + client = OsPlacesApi::Client.new(token_manager) + # Can raise various errors, which we let flow to the controller + client.retrieve_locations_for_postcode(normalised_postcode) + end +end diff --git a/app/presenters/locations_presenter.rb b/app/presenters/locations_presenter.rb new file mode 100644 index 0000000..a849b9b --- /dev/null +++ b/app/presenters/locations_presenter.rb @@ -0,0 +1,19 @@ +class LocationsPresenter + class UnknownSource < StandardError; end + + def self.instance_for(postcode) + case postcode.source + when "os_places" + OsPlacesLocationsPresenter.new(postcode) + when "onspd" + OnspdLocationsPresenter.new(postcode) + else + # Should be unreachable, but maybe if data is corrupted? + raise(LocationsPresenter::UnknownSource, "Unknown source #{postcode.source}") + end + end + + def initialize(postcode) + @postcode = postcode + end +end diff --git a/app/presenters/onspd_locations_presenter.rb b/app/presenters/onspd_locations_presenter.rb new file mode 100644 index 0000000..bb9db37 --- /dev/null +++ b/app/presenters/onspd_locations_presenter.rb @@ -0,0 +1,25 @@ +class OnspdLocationsPresenter < LocationsPresenter + def to_hash + { + "source" => "Office of National Statistics", + "average_longitude" => @postcode.results.first["ONS"]["AVG_LNG"].to_f, + "average_latitude" => @postcode.results.first["ONS"]["AVG_LAT"].to_f, + "results" => [], + "extra_information" => extra_information, + } + end + + def extra_information + extra_information = { source: "ONS source updated infrequently" } + + if @postcode.retired? + extra_information.merge!(retired: "Postcode was retired in #{@postcode.results.first['ONS']['DOTERM']}") + end + + if @postcode.results.first["ONS"]["TYPE"] == "L" + extra_information.merge!(large: "Postcode is a large user postcode") + end + + extra_information + end +end diff --git a/app/presenters/os_places_locations_presenter.rb b/app/presenters/os_places_locations_presenter.rb new file mode 100644 index 0000000..14d7f0a --- /dev/null +++ b/app/presenters/os_places_locations_presenter.rb @@ -0,0 +1,13 @@ +class OsPlacesLocationsPresenter < LocationsPresenter + def to_hash + location_results = OsPlacesApi::LocationResults.new(@postcode.results) + locations = location_results.unfiltered_locations + + { + "source" => "Ordnance Survey", + "average_longitude" => locations.sum(0.0, &:longitude) / locations.size.to_f, + "average_latitude" => locations.sum(0.0, &:latitude) / locations.size.to_f, + "results" => location_results.filtered_locations.map(&:to_hash), + } + end +end diff --git a/app/workers/ons_base_worker.rb b/app/workers/ons_base_worker.rb new file mode 100644 index 0000000..06f69f9 --- /dev/null +++ b/app/workers/ons_base_worker.rb @@ -0,0 +1,12 @@ +require "aws-sdk-s3" + +class OnsBaseWorker + include Sidekiq::Worker + sidekiq_options queue: :queue_ons, lock: :until_executed, lock_timeout: nil + + BUCKET_NAME = "govuk-#{ENV['GOVUK_ENVIRONMENT_NAME']}-locations-api-import-csvs".freeze + + def s3_client + @s3_client ||= Aws::S3::Client.new(region: "eu-west-1") + end +end diff --git a/app/workers/ons_download_worker.rb b/app/workers/ons_download_worker.rb new file mode 100644 index 0000000..442b985 --- /dev/null +++ b/app/workers/ons_download_worker.rb @@ -0,0 +1,31 @@ +require "open-uri" +require "zip" + +class OnsDownloadWorker < OnsBaseWorker + DATAFILE_REGEX = /\AData\/multi_csv\/.*.csv\z/ + + def perform(url) + # 1. Download File + temp_zip_file = Tempfile.new("ONSPD.zip") + IO.copy_stream(URI.parse(url).open, temp_zip_file.path) + + # 2. Unzip File/Data/multi_csv, and post to S3 bucket + Zip::File.open(temp_zip_file.path) do |zip_file| + zip_file.each do |entry| + file_details = entry.name.match(DATAFILE_REGEX) + next unless file_details + + s3_client.put_object( + bucket: BUCKET_NAME, + key: entry.name, + body: entry.get_input_stream.read, + ) + + OnsImportWorker.perform_async(entry.name) + Rails.logger.info("ONS Download Worker: Added #{entry.name} to S3 bucket") + end + end + rescue StandardError => e + GovukError.notify("Problem downloading ONS file from #{url}: #{e.message}") + end +end diff --git a/app/workers/ons_import_worker.rb b/app/workers/ons_import_worker.rb new file mode 100644 index 0000000..c75d4e2 --- /dev/null +++ b/app/workers/ons_import_worker.rb @@ -0,0 +1,46 @@ +class OnsImportWorker < OnsBaseWorker + def perform(s3_key_name) + temp_csv_file = Tempfile.new("ONSPD.csv") + + s3_client.get_object( + response_target: temp_csv_file.path, + bucket: BUCKET_NAME, + key: s3_key_name, + ) + + CSV.foreach(temp_csv_file.path, headers: true) do |row| + postcode = PostcodeHelper.normalise(row["pcds"]) + next if Postcode.os_places.where(postcode:).any? + + termination_date = parse_termination_date(row["doterm"]) + results = [ + { + "ONS" => { + "AVG_LNG" => row["long"], + "AVG_LAT" => row["lat"], + "TYPE" => row["usertype"] == "0" ? "S" : "L", + "DOTERM" => termination_date, + }, + }, + ] + + retired = termination_date.blank? ? false : true + + existing_record = Postcode.onspd.find_by(postcode:) + if existing_record + existing_record.update(retired:, results:) + else + Postcode.create(postcode:, source: "onspd", retired:, results:) + end + end + rescue StandardError => e + GovukError.notify("ONS Import Worker import problem: #{e.message}") + end + + def parse_termination_date(doterm_string) + doterm_string.blank? ? "" : Time.strptime(doterm_string, "%Y%m").strftime("%B %Y") + rescue ArgumentError + Rails.logger.warn("ONS Import Worker found unparseable doterm: #{doterm_string}") + "Unknown" + end +end diff --git a/app/workers/postcodes_collection_worker.rb b/app/workers/postcodes_collection_worker.rb index 56c711a..5c405ee 100644 --- a/app/workers/postcodes_collection_worker.rb +++ b/app/workers/postcodes_collection_worker.rb @@ -6,7 +6,7 @@ class PostcodesCollectionWorker def perform postcodes = Postcode.uncached do - Postcode.order("updated_at ASC").first(POSTCODES_PER_SECOND).pluck(:postcode) + Postcode.os_places.order("updated_at ASC").first(POSTCODES_PER_SECOND).pluck(:postcode) end postcodes.each { |postcode| ProcessPostcodeWorker.perform_async(postcode) } end diff --git a/app/workers/process_postcode_worker.rb b/app/workers/process_postcode_worker.rb index e751cf6..4cef899 100644 --- a/app/workers/process_postcode_worker.rb +++ b/app/workers/process_postcode_worker.rb @@ -3,8 +3,7 @@ class ProcessPostcodeWorker sidekiq_options queue: :update_postcode, lock: :until_and_while_executing, lock_timeout: nil def perform(postcode) - token_manager = OsPlacesApi::AccessTokenManager.new - OsPlacesApi::Client.new(token_manager).update_postcode(postcode) + PostcodeManager.new.update_postcode(postcode) rescue OsPlacesApi::ClientError => e GovukError.notify(e) end diff --git a/config/sidekiq.yml b/config/sidekiq.yml index a2bce2d..168a0a4 100644 --- a/config/sidekiq.yml +++ b/config/sidekiq.yml @@ -3,8 +3,7 @@ :timeout: 4 :max_retries: 1 :queues: - # See here to understand priorities: https://github.com/mperham/sidekiq/wiki/Advanced-Options#queues - # Use powers of 2: higher priority groups are checked twice as often. + - [queue_ons, 8] - [update_postcode, 8] - [queue_postcode, 1] :scheduler: diff --git a/db/migrate/20230605101124_update_postcodes_add_fields.rb b/db/migrate/20230605101124_update_postcodes_add_fields.rb new file mode 100644 index 0000000..c238630 --- /dev/null +++ b/db/migrate/20230605101124_update_postcodes_add_fields.rb @@ -0,0 +1,9 @@ +class UpdatePostcodesAddFields < ActiveRecord::Migration[7.0] + def change + add_column :postcodes, :source, :string, default: "os_places", null: false + add_column :postcodes, :retired, :boolean, default: false, null: false + + add_index :postcodes, :source + add_index :postcodes, :retired + end +end diff --git a/db/schema.rb b/db/schema.rb index bba7f9e..e15b4f9 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.0].define(version: 2022_01_13_154245) do +ActiveRecord::Schema[7.0].define(version: 2023_06_05_101124) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -19,7 +19,11 @@ t.json "results" t.datetime "created_at", null: false t.datetime "updated_at", null: false + t.string "source", default: "os_places", null: false + t.boolean "retired", default: false, null: false t.index ["postcode"], name: "index_postcodes_on_postcode", unique: true + t.index ["retired"], name: "index_postcodes_on_retired" + t.index ["source"], name: "index_postcodes_on_source" end end diff --git a/docs/adr/0001-add-ons-import.md b/docs/adr/0001-add-ons-import.md new file mode 100644 index 0000000..74c0221 --- /dev/null +++ b/docs/adr/0001-add-ons-import.md @@ -0,0 +1,56 @@ +# 1. Add ONS Postcode Data Import + +Date: 2023-05-09 + +## Status + +Open + +## Context + +Locations API is supposed to be an exhaustive list of postcodes, but because the underlying API (OS Places API) is a list of addresses, postcodes which aren't the primary postcode for an address in the OS Places API database won't ever appear. So Locations API can't support Large User Postcodes (LUPs), or Retired Postcodes. + +The nature of data in Imminence means that a lot of LUPs are provided as the contact information for places in Imminence, and without LUPs in the Locations API we can neither geocode nor search by these postcodes. The current workaround is to manually look up the Longitude and Latitude of the postcode in the Office of National Statistics Postcode Data (ONSPD) file, which contains information as a CSV list of postcodes, and critically includes both LUPs and retired postcodes. This is not possible for content second liners, and not easy for technical second liners, so second line tickets generated by these problems are hard to resolve without toil work on the part of a handful of specialists. + +It also means we cannot support end users who only have retired postcodes for their addresses (more common than expected, because it seems the communications around postcode retirement are often missed). + +## Decision + +The ONSPD file is updated on average around two to three times a year (exact update intervals may vary), and has a well understood and open structure (a CSV file also available as multiple smaller CSV files) which has not changed since 2017. We do not expect it to change in the near future. Given this apparent stability and the fact that the data after import will be stable even if the file format changes, we will import it into the postcode table in Locations API. If the file format does change, we will be able to use the last import until we can update our import methods. + +To differentiate these ONSPD imported records from OS Places API records, two fields will be added to the database +- a source field, an enum which will record where the data comes from, and +- a retired field, a boolean field which will note whether the postcode is active (false) or retired (true). + +The rest of the ONSPD data for a postcode will be stored in the results field in a JSON structure, the same way we currently store for OS Places API-derived data, but with a simpler structure: + +``` +[ + { + "ONS" => { + "AVG_LNG" => . + "AVG_LAT" => . + "TYPE" => <"L" (Large User Postcode) or "S" (a small, "normal" postcode)>. + "DOTERM" => , + } + } +] +``` + +(Here naming the array element by its source - ONS - is a little redundant, but matches the way OS Places array elements are named with their ultimate source - DPA or LPI) + +We will create import workers to handle the import. A rake task will be created which, given the URL of an ONSPD zip file, will schedule a download worker job. The download worker will retrieve the zip file and extract the smaller CSV files, putting them in an S3 bucket. It will then schedule import worker jobs for each CSV file. These jobs will read the CSV, comparing it with the existing database. Where a postcode apppears in the CSV but is not in the current database, it will create a simplified record for it, with the source marked as ONSPD and the retired flag set appropriately. These workers will not override any record that has been retrieved from the OS Places API. + +The OS Places API crawler will be updated so that it will not attempt to retrieve information for any postcode whose record source is ONSPD unless it is marked as both not retired (from the active field) and not an LUP (from the simplified record). This will prevent us from bombarding OS Places API with requests for invalid postcodes, while at the same time attempting to get improved information for any postcode that should be in OS Places API. + +The GDS Api Adapter methods which call Locations API will be updated to understand how to use the new structures. + +We will run a data migration for existing OS Places records to populate the source/retired flag. + +## Consequences + +We can update Imminence to allow geocoding by LUPs and retired postcodes (with warnings for the imminence admin users that those postcodes might need updating), reducing maintenance for Imminence datasets which currently require longitude/latitude overrides and generating fewer 2nd line tickets. + +We can provide lookups by LUPs and retired postcodes, reducing confusion in Licence, LLM, and Imminence lookups. + +We can (if it is decided useful) return end-user notifications that a postcode is retired in the frontend searches that use Locations API. \ No newline at end of file diff --git a/docs/updating-ons-postcode-data.md b/docs/updating-ons-postcode-data.md new file mode 100644 index 0000000..e68075a --- /dev/null +++ b/docs/updating-ons-postcode-data.md @@ -0,0 +1,12 @@ +# Updating ONS Postcode Data + +High-quality postcode data using in Locations API comes from the Ordnance Survey. But that dataset does not include all postcodes active (for instance, it excludes large user postcodes), nor does it include historical postcodes that people may still be using. To support this, we can import lower-quality postcode information from the Office for National Statistics Postcode Directory. The system will always use the high quality data where possible, but can use the low-quality data for geolocating in imminence datasets and lookups. + +The Postcode Directory is updated a couple of times a year. Every few months, someone should check to see if there is a new version of the data and update it. + +- Visit https://geoportal.statistics.gov.uk/search?collection=Dataset&sort=-created&tags=all(PRD_ONSPD) to see if there is a new version. +- Visit the page for the new version. There should be a Download link on the page. Copy the URL from that link. +- On the locations-api shell, run the rake task: `rails import_ons_data[]` + +This rake task will start an OnsDownloadWorker, which downloads the file, splits out the multi-csv directory into an S3 bucket, and starts a single OnsImportWorker for each of the files. + diff --git a/lib/os_places_api/client.rb b/lib/os_places_api/client.rb index b613766..ccf5df0 100644 --- a/lib/os_places_api/client.rb +++ b/lib/os_places_api/client.rb @@ -6,39 +6,15 @@ def initialize(token_manager) @token_manager = token_manager end - def locations_for_postcode(postcode) - postcode = PostcodeHelper.normalise(postcode) - if (record = Postcode.find_by(postcode:)) - return build_locations(record["results"]) - end - - response = get_api_response(postcode) + def retrieve_locations_for_postcode(normalised_postcode) + response = get_api_response(normalised_postcode) raise NoResultsForPostcode if response["results"].nil? - if any_locations?(response["results"]) && !Postcode.find_by(postcode:) - Postcode.create!(postcode:, results: response["results"]) - end - build_locations(response["results"]) - end - - def update_postcode(postcode) - postcode = PostcodeHelper.normalise(postcode) - response = get_api_response(postcode) - record = Postcode.find_by(postcode:) - - if response["results"].nil? || !any_locations?(response["results"]) - record.destroy unless record.nil? - elsif record.nil? - Postcode.create!(postcode:, results: response["results"]) - else - record.update(results: response["results"]) && record.touch - end + OsPlacesApi::LocationResults.new(response["results"]) end private - NATIONAL_AUTHORITIES = ["ORDNANCE SURVEY", "HIGHWAYS ENGLAND"].freeze - def validate_response_code(response) raise InvalidPostcodeProvided if response.code == 400 raise ExpiredAccessToken if response.code == 401 @@ -50,43 +26,6 @@ def validate_response_code(response) raise ServiceUnavailable if response.code == 503 end - def results_hash_with_uniq_uprns(results) - results.map { |result| result[result.keys.first] } # first key is either "LPI" or "DPA" - .uniq { |result_hash| result_hash["UPRN"] } - end - - def hash_to_locations(results_hash) - results_hash.map { |result_hash| LocationBuilder.new(result_hash).build_location } - end - - def filtered_locations(results) - filtered_hash = results_hash_with_uniq_uprns(results).reject do |r| - (NATIONAL_AUTHORITIES.include? r["LOCAL_CUSTODIAN_CODE_DESCRIPTION"]) || - (r["POSTAL_ADDRESS_CODE"] == "N") - end - hash_to_locations(filtered_hash) - end - - def unfiltered_locations(results) - hash_to_locations(results_hash_with_uniq_uprns(results)) - end - - def any_locations?(results) - !filtered_locations(results).empty? - end - - def build_locations(results) - locations = unfiltered_locations(results) - - raise NoResultsForPostcode if locations.empty? - - { - "average_latitude" => locations.sum(&:latitude) / locations.size.to_f, - "average_longitude" => locations.sum(&:longitude) / locations.size.to_f, - "results" => filtered_locations(results), - } - end - def get_api_response(postcode) response = HTTParty.get( "https://api.os.uk/search/places/v1/postcode", diff --git a/lib/os_places_api/location_builder.rb b/lib/os_places_api/location_builder.rb deleted file mode 100644 index dccab3f..0000000 --- a/lib/os_places_api/location_builder.rb +++ /dev/null @@ -1,14 +0,0 @@ -module OsPlacesApi - class LocationBuilder - def initialize(result) - @result = result - end - - def build_location - Location.new(address: @result["ADDRESS"], - latitude: @result["LAT"], - longitude: @result["LNG"], - local_custodian_code: @result["LOCAL_CUSTODIAN_CODE"]) - end - end -end diff --git a/lib/os_places_api/location_results.rb b/lib/os_places_api/location_results.rb new file mode 100644 index 0000000..ec79344 --- /dev/null +++ b/lib/os_places_api/location_results.rb @@ -0,0 +1,47 @@ +module OsPlacesApi + class LocationResults + NATIONAL_AUTHORITIES = ["ORDNANCE SURVEY", "HIGHWAYS ENGLAND"].freeze + + attr_reader :results + + def initialize(results) + @results = results + end + + def any_locations? + !filtered_locations.empty? + end + + def empty? + results.nil? || filtered_locations.empty? + end + + def filtered_locations + filtered_hash = results_hash_with_uniq_uprns.reject do |r| + (NATIONAL_AUTHORITIES.include? r["LOCAL_CUSTODIAN_CODE_DESCRIPTION"]) || + (r["POSTAL_ADDRESS_CODE"] == "N") + end + hash_to_locations(filtered_hash) + end + + def unfiltered_locations + hash_to_locations(results_hash_with_uniq_uprns) + end + + def hash_to_locations(results_hash) + results_hash.map { |result_hash| build_location(result_hash) } + end + + def results_hash_with_uniq_uprns + results.map { |result| result[result.keys.first] } # first key is either "LPI" or "DPA" + .uniq { |result_hash| result_hash["UPRN"] } + end + + def build_location(result_hash) + Location.new(address: result_hash["ADDRESS"], + latitude: result_hash["LAT"], + longitude: result_hash["LNG"], + local_custodian_code: result_hash["LOCAL_CUSTODIAN_CODE"]) + end + end +end diff --git a/lib/tasks/import_ons_data.rake b/lib/tasks/import_ons_data.rake new file mode 100644 index 0000000..997ae0d --- /dev/null +++ b/lib/tasks/import_ons_data.rake @@ -0,0 +1,5 @@ +desc "Kick off import of ONS data, adding basic details for postcodes that aren't already in the system. Existing postcodes will not be touched." +task :import_ons_data, [:onspd_file_url] => :environment do |_, args| + # TODO: Test if the URL is valid before kicking off? + OnsDownloadWorker.perform_async(args[:onspd_file_url]) +end diff --git a/spec/fixtures/ONSPD_JUN_2023_UK.zip b/spec/fixtures/ONSPD_JUN_2023_UK.zip new file mode 100644 index 0000000..38825e5 Binary files /dev/null and b/spec/fixtures/ONSPD_JUN_2023_UK.zip differ diff --git a/spec/fixtures/ONSPD_JUN_2023_UK_AB.csv b/spec/fixtures/ONSPD_JUN_2023_UK_AB.csv new file mode 100644 index 0000000..eb3e98d --- /dev/null +++ b/spec/fixtures/ONSPD_JUN_2023_UK_AB.csv @@ -0,0 +1,6 @@ +pcd,pcd2,pcds,dointr,doterm,oscty,ced,oslaua,osward,parish,usertype,oseast1m,osnrth1m,osgrdind,oshlthau,nhser,ctry,rgn,streg,pcon,eer,teclec,ttwa,pct,itl,statsward,oa01,casward,park,lsoa01,msoa01,ur01ind,oac01,oa11,lsoa11,msoa11,wz11,sicbl,bua11,buasd11,ru11ind,oac11,lat,long,lep1,lep2,pfa,imd,calncv,icb,oa21,lsoa21,msoa21 +"AB1 0AA","AB1 0AA","AB1 0AA","198001","199606","S99999999","S99999999","S12000033","S13002843","S99999999","0","385386","0801193","1","S08000020","S99999999","S92000003","S99999999","0","S14000002","S15000001","S09000001","S22000047","S03000012","S30000026","99ZZ00","S00001364","01C30","S99999999","S01000011","S02000007","6","3C2","S00090303","S01006514","S02001237","S34002990","S03000012","S99999999","S99999999","3","1C3",57.101474,-2.242851,"S99999999","S99999999","S23000009",6715,"S99999999","S99999999","","","" +"AB1 0AB","AB1 0AB","AB1 0AB","198001","199606","S99999999","S99999999","S12000033","S13002843","S99999999","0","385177","0801314","1","S08000020","S99999999","S92000003","S99999999","0","S14000002","S15000001","S09000001","S22000047","S03000012","S30000026","99ZZ00","S00001270","01C31","S99999999","S01000011","S02000007","6","4B3","S00090303","S01006514","S02001237","S34002990","S03000012","S99999999","S99999999","3","1C3",57.102554,-2.246308,"S99999999","S99999999","S23000009",6715,"S99999999","S99999999","","","" +"AB1 0AD","AB1 0AD","AB1 0AD","198001","199606","S99999999","S99999999","S12000033","S13002843","S99999999","0","385053","0801092","1","S08000020","S99999999","S92000003","S99999999","0","S14000002","S15000001","S09000001","S22000047","S03000012","S30000026","99ZZ00","S00001364","01C30","S99999999","S01000011","S02000007","6","3C2","S00090399","S01006514","S02001237","S34003015","S03000012","S99999999","S99999999","3","6A1",57.100556,-2.248342,"S99999999","S99999999","S23000009",6715,"S99999999","S99999999","","","" +"AB1 0AE","AB1 0AE","AB1 0AE","199402","","S99999999","S99999999","S12000034","S13002864","S99999999","0","384600","0799300","8","S08000020","S99999999","S92000003","S99999999","0","S14000058","S15000001","S09000001","S22000047","S03000013","S30000027","99ZZ00","S00002142","02C58","S99999999","S01000333","S02000061","6","3B1","S00091322","S01006853","S02001296","S34003292","S03000013","S99999999","S99999999","6","1A2",57.084444,-2.255708,"S99999999","S99999999","S23000009",5069,"S99999999","S99999999","","","" +"AB1 0AF","AB1 0AF","AB1 0AF","199012","199207","S99999999","S99999999","S12000033","S13002843","S99999999","1","384460","0800660","8","S08000020","S99999999","S92000003","S99999999","0","S14000002","S15000001","S09000001","S22000047","S03000012","S30000026","99ZZ00","S00001266","01C30","S99999999","S01000007","S02000003","3","4D2","S00090299","S01006511","S02001236","S34003015","S03000012","S99999999","S99999999","3","6A4",57.096656,-2.258102,"S99999999","S99999999","S23000009",6253,"S99999999","S99999999","","","" diff --git a/spec/fixtures/ONSPD_JUN_2023_UK_AB_broken.csv b/spec/fixtures/ONSPD_JUN_2023_UK_AB_broken.csv new file mode 100644 index 0000000..df5368f --- /dev/null +++ b/spec/fixtures/ONSPD_JUN_2023_UK_AB_broken.csv @@ -0,0 +1,6 @@ +pcd,pcd2,pcds,dointr,doterm,oscty,ced,oslaua,osward,parish,usertype,oseast1m,osnrth1m,osgrdind,oshlthau,nhser,ctry,rgn,streg,pcon,eer,teclec,ttwa,pct,itl,statsward,oa01,casward,park,lsoa01,msoa01,ur01ind,oac01,oa11,lsoa11,msoa11,wz11,sicbl,bua11,buasd11,ru11ind,oac11,lat,long,lep1,lep2,pfa,imd,calncv,icb,oa21,lsoa21,msoa21 +"AB1 0AA","AB1 0AA","AB1 0AA","199606","AAAAAA","S99999999","S99999999","S12000033","S13002843","S99999999","0","385386","0801193","1","S08000020","S99999999","S92000003","S99999999","0","S14000002","S15000001","S09000001","S22000047","S03000012","S30000026","99ZZ00","S00001364","01C30","S99999999","S01000011","S02000007","6","3C2","S00090303","S01006514","S02001237","S34002990","S03000012","S99999999","S99999999","3","1C3",57.101474,-2.242851,"S99999999","S99999999","S23000009",6715,"S99999999","S99999999","","","" +"AB1 0AB","AB1 0AB","AB1 0AB","198001","199606","S99999999","S99999999","S12000033","S13002843","S99999999","0","385177","0801314","1","S08000020","S99999999","S92000003","S99999999","0","S14000002","S15000001","S09000001","S22000047","S03000012","S30000026","99ZZ00","S00001270","01C31","S99999999","S01000011","S02000007","6","4B3","S00090303","S01006514","S02001237","S34002990","S03000012","S99999999","S99999999","3","1C3",57.102554,-2.246308,"S99999999","S99999999","S23000009",6715,"S99999999","S99999999","","","" +"AB1 0AD","AB1 0AD","AB1 0AD","198001","199606","S99999999","S99999999","S12000033","S13002843","S99999999","0","385053","0801092","1","S08000020","S99999999","S92000003","S99999999","0","S14000002","S15000001","S09000001","S22000047","S03000012","S30000026","99ZZ00","S00001364","01C30","S99999999","S01000011","S02000007","6","3C2","S00090399","S01006514","S02001237","S34003015","S03000012","S99999999","S99999999","3","6A1",57.100556,-2.248342,"S99999999","S99999999","S23000009",6715,"S99999999","S99999999","","","" +"AB1 0AE","AB1 0AE","AB1 0AE","199402","199606","S99999999","S99999999","S12000034","S13002864","S99999999","0","384600","0799300","8","S08000020","S99999999","S92000003","S99999999","0","S14000058","S15000001","S09000001","S22000047","S03000013","S30000027","99ZZ00","S00002142","02C58","S99999999","S01000333","S02000061","6","3B1","S00091322","S01006853","S02001296","S34003292","S03000013","S99999999","S99999999","6","1A2",57.084444,-2.255708,"S99999999","S99999999","S23000009",5069,"S99999999","S99999999","","","" +"AB1 0AF","AB1 0AF","AB1 0AF","199012","199207","S99999999","S99999999","S12000033","S13002843","S99999999","1","384460","0800660","8","S08000020","S99999999","S92000003","S99999999","0","S14000002","S15000001","S09000001","S22000047","S03000012","S30000026","99ZZ00","S00001266","01C30","S99999999","S01000007","S02000003","3","4D2","S00090299","S01006511","S02001236","S34003015","S03000012","S99999999","S99999999","3","6A4",57.096656,-2.258102,"S99999999","S99999999","S23000009",6253,"S99999999","S99999999","","","" diff --git a/spec/lib/os_places_api/client_spec.rb b/spec/lib/os_places_api/client_spec.rb index fb639ce..07a66a3 100644 --- a/spec/lib/os_places_api/client_spec.rb +++ b/spec/lib/os_places_api/client_spec.rb @@ -5,400 +5,79 @@ described_class.new(instance_double("AccessTokenManager", access_token: "some token")) end let(:postcode) { "E18QS" } - let(:api_endpoint) { "https://api.os.uk/search/places/v1/postcode?output_srs=WGS84&postcode=#{postcode}&dataset=DPA,LPI" } - let(:successful_response) do - { - "header": { - "uri": api_endpoint, - "query": "postcode=#{postcode}", - "offset": 0, - "totalresults": 1, # really 12, but we've omitted the other 11 in `results` above - "format": "JSON", - "dataset": "DPA,LPI", - "lr": "EN,CY", - "maxresults": 100, - "epoch": "87", - "output_srs": "WGS84", - }, - "results": os_places_api_results, - } - end - let(:os_places_api_results) do - [ - { - "DPA" => { - "UPRN" => "6714278", - "UDPRN" => "54673874", - "ADDRESS" => "1, WHITECHAPEL HIGH STREET, LONDON, E1 8QS", - "BUILDING_NUMBER" => "1", - "THOROUGHFARE_NAME" => "WHITECHAPEL HIGH STREET", - "POST_TOWN" => "LONDON", - "POSTCODE" => "E1 8QS", - "RPC" => "2", - "X_COORDINATE" => 533_813.0, - "Y_COORDINATE" => 181_262.0, - "LNG" => -0.0729933, - "LAT" => 51.5144547, - "STATUS" => "APPROVED", - "LOGICAL_STATUS_CODE" => "1", - "CLASSIFICATION_CODE" => "CO01", - "CLASSIFICATION_CODE_DESCRIPTION" => "Office / Work Studio", - "LOCAL_CUSTODIAN_CODE" => 5900, - "LOCAL_CUSTODIAN_CODE_DESCRIPTION" => "TOWER HAMLETS", - "POSTAL_ADDRESS_CODE" => "D", - "POSTAL_ADDRESS_CODE_DESCRIPTION" => "A record which is linked to PAF", - "BLPU_STATE_CODE" => "1", - "BLPU_STATE_CODE_DESCRIPTION" => "Under construction", - "TOPOGRAPHY_LAYER_TOID" => "osgb1000006035651", - "LAST_UPDATE_DATE" => "17/06/2017", - "ENTRY_DATE" => "17/02/2017", - "BLPU_STATE_DATE" => "17/02/2017", - "LANGUAGE" => "EN", - "MATCH" => 1.0, - "MATCH_DESCRIPTION" => "EXACT", - }, - }, - { - "LPI" => { - "UPRN" => "6714279", - "ADDRESS" => "2, WHITECHAPEL HIGH STREET, LONDON, E1 8QS", - "USRN" => "22701338", - "LPI_KEY" => "5900L000449318", - "PAO_START_NUMBER" => "2", - "STREET_DESCRIPTION" => "WHITECHAPEL HIGH STREET", - "TOWN_NAME" => "LONDON", - "ADMINISTRATIVE_AREA" => "TOWER HAMLETS", - "POSTCODE_LOCATOR" => "E1 8QS", - "RPC" => "2", - "X_COORDINATE" => 533_813.0, - "Y_COORDINATE" => 181_262.0, - "LNG" => -0.0729935, - "LAT" => 51.5144545, - "STATUS" => "APPROVED", - "LOGICAL_STATUS_CODE" => "1", - "CLASSIFICATION_CODE" => "CO01", - "CLASSIFICATION_CODE_DESCRIPTION" => "Office / Work Studio", - "LOCAL_CUSTODIAN_CODE" => 5900, - "LOCAL_CUSTODIAN_CODE_DESCRIPTION" => "TOWER HAMLETS", - "COUNTRY_CODE" => "E", - "COUNTRY_CODE_DESCRIPTION" => "This record is within England", - "POSTAL_ADDRESS_CODE" => "D", - "POSTAL_ADDRESS_CODE_DESCRIPTION" => "A record which is linked to PAF", - "BLPU_STATE_CODE" => "1", - "BLPU_STATE_CODE_DESCRIPTION" => "Under construction", - "TOPOGRAPHY_LAYER_TOID" => "osgb1000006035651", - "LAST_UPDATE_DATE" => "17/06/2017", - "ENTRY_DATE" => "17/02/2017", - "BLPU_STATE_DATE" => "17/02/2017", - "STREET_STATE_CODE" => "2", - "STREET_STATE_CODE_DESCRIPTION" => "Open", - "STREET_CLASSIFICATION_CODE" => "8", - "STREET_CLASSIFICATION_CODE_DESCRIPTION" => "All vehicles", - "LPI_LOGICAL_STATUS_CODE" => "1", - "LPI_LOGICAL_STATUS_CODE_DESCRIPTION" => "APPROVED", - "LANGUAGE" => "EN", - "MATCH" => 1.0, - "MATCH_DESCRIPTION" => "EXACT", - }, - }, - { - "DPA" => { - "UPRN" => "10091989923", - "UDPRN" => "54673879", - "ADDRESS" => "10, WHITECHAPEL HIGH STREET, LONDON, E1 8QS", - "BUILDING_NUMBER" => "10", - "THOROUGHFARE_NAME" => "WHITECHAPEL HIGH STREET", - "POST_TOWN" => "LONDON", - "POSTCODE" => "E1 8QS", - "RPC" => "2", - "X_COORDINATE" => 533_813.0, - "Y_COORDINATE" => 181_262.0, - "LNG" => -0.0729692, - "LAT" => 51.5144785, - "STATUS" => "APPROVED", - "LOGICAL_STATUS_CODE" => "1", - "CLASSIFICATION_CODE" => "CO01", - "CLASSIFICATION_CODE_DESCRIPTION" => "Office / Work Studio", - "LOCAL_CUSTODIAN_CODE" => 7655, - "LOCAL_CUSTODIAN_CODE_DESCRIPTION" => "ORDNANCE SURVEY", - "POSTAL_ADDRESS_CODE" => "E", - "POSTAL_ADDRESS_CODE_DESCRIPTION" => "A record which is linked to PAF", - "BLPU_STATE_CODE" => "1", - "BLPU_STATE_CODE_DESCRIPTION" => "Under construction", - "TOPOGRAPHY_LAYER_TOID" => "osgb1000006035651", - "LAST_UPDATE_DATE" => "17/06/2017", - "ENTRY_DATE" => "17/02/2017", - "BLPU_STATE_DATE" => "17/02/2017", - "LANGUAGE" => "EN", - "MATCH" => 1.0, - "MATCH_DESCRIPTION" => "EXACT", - }, - }, - { - "DPA" => { - "UPRN" => "10091989923", - "UDPRN" => "54673879", - "ADDRESS" => "10, WHITECHAPEL HIGH STREET, LONDON, E1 8QS", - "BUILDING_NUMBER" => "10", - "THOROUGHFARE_NAME" => "WHITECHAPEL HIGH STREET", - "POST_TOWN" => "LONDON", - "POSTCODE" => "E1 8QS", - "RPC" => "2", - "X_COORDINATE" => 533_813.0, - "Y_COORDINATE" => 181_262.0, - "LNG" => -0.0729692, - "LAT" => 51.5144785, - "STATUS" => "APPROVED", - "LOGICAL_STATUS_CODE" => "1", - "CLASSIFICATION_CODE" => "CO01", - "CLASSIFICATION_CODE_DESCRIPTION" => "Office / Work Studio", - "LOCAL_CUSTODIAN_CODE" => 11, - "LOCAL_CUSTODIAN_CODE_DESCRIPTION" => "HIGHWAYS ENGLAND", - "POSTAL_ADDRESS_CODE" => "E", - "POSTAL_ADDRESS_CODE_DESCRIPTION" => "A record which is linked to PAF", - "BLPU_STATE_CODE" => "1", - "BLPU_STATE_CODE_DESCRIPTION" => "Under construction", - "TOPOGRAPHY_LAYER_TOID" => "osgb1000006035651", - "LAST_UPDATE_DATE" => "17/06/2017", - "ENTRY_DATE" => "17/02/2017", - "BLPU_STATE_DATE" => "17/02/2017", - "LANGUAGE" => "EN", - "MATCH" => 1.0, - "MATCH_DESCRIPTION" => "EXACT", - }, - }, - # subsequent results omitted for brevity - ] - end - let(:filterable_response) do - # Reponse with valid locations, but where all locations have a - # LOCAL_CUSTODIAN_CODE_DESCRIPTION which causes them to be filtered - # out when processed. - { - "header": { - "uri": api_endpoint, - "query": "postcode=#{postcode}", - "offset": 0, - "totalresults": 1, # really 12, but we've omitted the other 11 in `results` above - "format": "JSON", - "dataset": "DPA,LPI", - "lr": "EN,CY", - "maxresults": 100, - "epoch": "87", - "output_srs": "WGS84", - }, - "results": os_places_api_results_with_filterable_locations, - } - end + describe "#retrieve_locations_for_postcode" do + it "queries OS Places API and return results" do + stub_os_places_api_request_good(postcode) - let(:os_places_api_results_with_filterable_locations) do - os_places_api_results.deep_dup.each do |location| - location.values.first["LOCAL_CUSTODIAN_CODE_DESCRIPTION"] = "ORDNANCE SURVEY" + expect(client.retrieve_locations_for_postcode(postcode).results).to eq(os_places_api_results) end - end - describe "#locations_for_postcode" do - let(:locations) do - [ - Location.new(address: "1, WHITECHAPEL HIGH STREET, LONDON, E1 8QS", - latitude: 51.5144547, - local_custodian_code: 5900, - longitude: -0.0729933), - Location.new(address: "2, WHITECHAPEL HIGH STREET, LONDON, E1 8QS", - latitude: 51.5144545, - local_custodian_code: 5900, - longitude: -0.0729935), - ] - end - let(:average_latitude) { 51.51446256666667 } - let(:average_longitude) { -0.07298533333333333 } - - context "the postcode doesn't exist in the database" do - before :each do - Postcode.where(postcode:).map(&:destroy) - end + it "raises an exception if the results are nil" do + stub_os_places_api_request_nil_results(postcode) - it "should query OS Places API and return results" do - stub_request(:get, api_endpoint) - .to_return(status: 200, body: successful_response.to_json) - - expect(client.locations_for_postcode(postcode).as_json).to eq( - { - "average_latitude" => average_latitude, - "average_longitude" => average_longitude, - "results" => locations.as_json, - }, - ) - end - - it "should filter out duplicate `UPRN` results from the OS Places API response" do - os_places_api_results[1]["LPI"]["UPRN"] = os_places_api_results[0]["DPA"]["UPRN"] - stub_request(:get, api_endpoint) - .to_return(status: 200, body: successful_response.to_json) - - expect(client.locations_for_postcode(postcode).as_json).to eq( - { - "average_latitude" => 51.514466600000006, - "average_longitude" => -0.07298125, - "results" => [locations[0]].as_json, - }, - ) - end - - it "should include records with POSTAL_ADDRESS_CODE='N' in averages, but filter them from results" do - os_places_api_results[1]["LPI"]["POSTAL_ADDRESS_CODE"] = "N" - stub_request(:get, api_endpoint) - .to_return(status: 200, body: successful_response.to_json) - - expect(client.locations_for_postcode(postcode).as_json).to eq( - { - "average_latitude" => average_latitude, - "average_longitude" => average_longitude, - "results" => [locations[0]].as_json, - }, - ) - end - - it "should cache the response from a successful request" do - stub_request(:get, api_endpoint) - .to_return(status: 200, body: successful_response.to_json) - - expect(Postcode.where(postcode:).count).to eq(0) - client.locations_for_postcode(postcode) - expect(Postcode.where(postcode:).count).to eq(1) - end + expect { client.retrieve_locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::NoResultsForPostcode) + end - it "raises an exception if the access token has expired" do - api_response = { - "fault": { - "faultstring": "Access Token expired", - "detail": { - "errorcode": "keymanagement.service.access_token_expired", - }, + it "raises an exception if the access token has expired" do + api_response = { + "fault": { + "faultstring": "Access Token expired", + "detail": { + "errorcode": "keymanagement.service.access_token_expired", }, - } - stub_request(:get, api_endpoint).to_return(status: 401, body: api_response.to_json) - - expect { client.locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::ExpiredAccessToken) - end - - it "raises an exception if the request is forbidden" do - stub_request(:get, api_endpoint).to_return(status: 403) - - expect { client.locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::RequestForbidden) - end - - it "raises an exception if the request cannot resolve" do - stub_request(:get, api_endpoint).to_return(status: 404) - - expect { client.locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::RequestNotFound) - end - - it "raises an exception if the request method is not allowed" do - stub_request(:get, api_endpoint).to_return(status: 405) - - expect { client.locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::MethodNotAllowed) - end - - it "raises an exception if rate limit exceeded" do - stub_request(:get, api_endpoint).to_return(status: 429) + }, + } + stub_os_places_api_request(postcode, api_response, status: 401) - expect { client.locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::RateLimitExceeded) - end + expect { client.retrieve_locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::ExpiredAccessToken) + end - it "raises an exception if OS Places API has an internal server error" do - api_response = { - "error": { - "statuscode": 500, - "message": "The provided request resulted in an internal server error.", - }, - } - stub_request(:get, api_endpoint).to_return(status: 500, body: api_response.to_json) + it "raises an exception if the request is forbidden" do + stub_os_places_api_request(postcode, {}, status: 403) - expect { client.locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::InternalServerError) - end + expect { client.retrieve_locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::RequestForbidden) + end - it "raises an exception if the OS Places API service is unavailable" do - stub_request(:get, api_endpoint).to_return(status: 503) + it "raises an exception if the request cannot resolve" do + stub_os_places_api_request(postcode, {}, status: 404) - expect { client.locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::ServiceUnavailable) - end + expect { client.retrieve_locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::RequestNotFound) + end - it "raises an exception if the postcode is not in OS Places API datasets" do - os_places_api_result_without_location = { - "header": { - "uri": api_endpoint, - "query": "postcode=#{postcode}", - "offset": 0, - "totalresults": 0, - "format": "JSON", - "dataset": "DPA,LPI", - "lr": "EN,CY", - "maxresults": 100, - "epoch": "87", - "output_srs": "WGS84", - }, - } - stub_request(:get, api_endpoint).to_return(status: 200, body: os_places_api_result_without_location.to_json) - expect { client.locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::NoResultsForPostcode) - end + it "raises an exception if the request method is not allowed" do + stub_os_places_api_request(postcode, {}, status: 405) - it "raises an exception if the response isn't in the structure we expect" do - stub_request(:get, api_endpoint).to_return(status: 200, body: "foo") - expect { client.locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::UnexpectedResponse) - end + expect { client.retrieve_locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::MethodNotAllowed) end - context "the postcode exists in the database" do - it "should return the cached data" do - Postcode.create(postcode:, results: os_places_api_results) + it "raises an exception if rate limit exceeded" do + stub_os_places_api_request(postcode, {}, status: 429) - expect(a_request(:get, api_endpoint)).not_to have_been_made + expect { client.retrieve_locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::RateLimitExceeded) + end - expect(client.locations_for_postcode(postcode)).to eq( - { - "average_latitude" => average_latitude, - "average_longitude" => average_longitude, - "results" => locations, - }, - ) - end + it "raises an exception if OS Places API has an internal server error" do + api_response = { + "error": { + "statuscode": 500, + "message": "The provided request resulted in an internal server error.", + }, + } + stub_os_places_api_request(postcode, api_response, status: 500) - it "should return the cached data even if the postcode is structured differently in the database" do - normalised_postcode = "E18QS" - user_inputted_postcode = "E1 8QS" - Postcode.create(postcode: normalised_postcode, results: os_places_api_results) + expect { client.retrieve_locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::InternalServerError) + end - expect(a_request(:get, api_endpoint)).not_to have_been_made + it "raises an exception if the OS Places API service is unavailable" do + stub_os_places_api_request(postcode, {}, status: 503) - expect(client.locations_for_postcode(user_inputted_postcode)).to eq( - { - "average_latitude" => average_latitude, - "average_longitude" => average_longitude, - "results" => locations, - }, - ) - end + expect { client.retrieve_locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::ServiceUnavailable) end - context "there are two simultaneous requests for the same (new) postcode" do - it "should not attempt to create the postcode twice" do - existing_record = Postcode.create(postcode:, results: os_places_api_results) - n = 0 # make the first call to `find_by` return `nil`; subsequent calls should work correctly - allow(Postcode).to receive(:find_by) { (n += 1) == 1 ? nil : existing_record } - stub_request(:get, api_endpoint) - .to_return(status: 200, body: successful_response.to_json) - - expect(Postcode).not_to receive(:create!) - expect(client.locations_for_postcode(postcode)).to eq( - { - "average_latitude" => average_latitude, - "average_longitude" => average_longitude, - "results" => locations, - }, - ) - end + it "raises an exception if the response isn't in the structure we expect" do + stub_os_places_api_request_invalid_response(postcode) + expect { client.retrieve_locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::UnexpectedResponse) end context "the postcode is invalid" do @@ -412,61 +91,10 @@ }, } - stub_request(:get, api_endpoint).to_return(status: 400, body: api_response.to_json) + stub_os_places_api_request(postcode, api_response, status: 400) - expect { client.locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::InvalidPostcodeProvided) + expect { client.retrieve_locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::InvalidPostcodeProvided) end end end - - describe "#update_postcode" do - it "should query OS Places API and add a new row if postcode doesn't exist" do - stub_request(:get, api_endpoint) - .to_return(status: 200, body: successful_response.to_json) - - client.update_postcode(postcode) - expect(Postcode.where(postcode:).pluck(:results)).to eq([os_places_api_results]) - end - - it "should query OS Places API and update cached data if postcode exists" do - old_results = os_places_api_results.first["DPA"].dup - old_results["LNG"] = -1 - old_results["LAT"] = -1 - Postcode.create(postcode:, results: [{ "DPA" => old_results }]) - stub_request(:get, api_endpoint) - .to_return(status: 200, body: successful_response.to_json) - - client.update_postcode(postcode) - expect(Postcode.where(postcode:).pluck(:results)).to eq([os_places_api_results]) - end - - it "should update the `updated_at` property even if no changes were made" do - Postcode.create(postcode:, results: successful_response[:results], updated_at: 1.day.ago) - stub_request(:get, api_endpoint) - .to_return(status: 200, body: successful_response.to_json) - - client.update_postcode(postcode) - expect(Postcode.find_by(postcode:).updated_at.strftime("%A")).to eq(Time.now.strftime("%A")) - end - - it "should query OS Places API and delete the postcode if it was terminated" do - Postcode.create(postcode:, results: [{}]) - stub_request(:get, api_endpoint) - .to_return(status: 200, body: {}.to_json) - - expect(Postcode.find_by(postcode:)).not_to eq(nil) - client.update_postcode(postcode) - expect(Postcode.find_by(postcode:)).to eq(nil) - end - - it "should query OS Places API and delete the postcode if all locations are filtered out" do - Postcode.create(postcode:, results: successful_response[:results], updated_at: 1.day.ago) - stub_request(:get, api_endpoint) - .to_return(status: 200, body: filterable_response.to_json) - - expect(Postcode.find_by(postcode:)).not_to eq(nil) - client.update_postcode(postcode) - expect(Postcode.find_by(postcode:)).to eq(nil) - end - end end diff --git a/spec/lib/os_places_api/location_results_spec.rb b/spec/lib/os_places_api/location_results_spec.rb new file mode 100644 index 0000000..0a64b7a --- /dev/null +++ b/spec/lib/os_places_api/location_results_spec.rb @@ -0,0 +1,47 @@ +require "spec_helper" + +RSpec.describe OsPlacesApi::LocationResults do + let(:results) { OsPlacesApi::LocationResults.new(os_places_api_results) } + let(:filterable_results) { OsPlacesApi::LocationResults.new(os_places_api_results_with_filterable_locations) } + let(:postal_address_code_n_results) { OsPlacesApi::LocationResults.new(os_places_api_results_with_postal_address_code_n) } + + describe "#any_locations?" do + it "should be true for normal results" do + expect(results.any_locations?).to be(true) + end + + it "should be false with filtered results" do + expect(filterable_results.any_locations?).to be(false) + end + end + + describe "#empty?" do + it "should be false for normal results" do + expect(results.empty?).to be(false) + end + + it "should be true with filtered results" do + expect(filterable_results.empty?).to be(true) + end + end + + describe "#filtered_locations" do + it "should return results filtering out duplicate UPRNs and HIGHWAY ENGLAND/ORDNANCE SURVEY records" do + expect(results.filtered_locations.count).to eq(2) + end + + it "should return results but filter out record with POSTAL_ADDRESS_CODE='N'" do + expect(postal_address_code_n_results.filtered_locations.count).to eq(1) + end + end + + describe "#unfiltered_locations" do + it "should return results filtering out duplicate UPRNs" do + expect(results.unfiltered_locations.count).to eq(3) + end + + it "should return results without filtering POSTAL_ADDRESS_CODE='N'" do + expect(postal_address_code_n_results.unfiltered_locations.count).to eq(3) + end + end +end diff --git a/spec/lib/tasks/import_ons_data_spec.rb b/spec/lib/tasks/import_ons_data_spec.rb new file mode 100644 index 0000000..f50cea2 --- /dev/null +++ b/spec/lib/tasks/import_ons_data_spec.rb @@ -0,0 +1,15 @@ +require "spec_helper" +require "rake" + +RSpec.describe "import_ons_data task" do + let(:task) { Rake::Task["import_ons_data"] } + + context "when the import_ons_data task is invoked" do + it "kicks off an OnsDownloadWorker job" do + expect(OnsDownloadWorker).to receive(:perform_async).with("https://test.com/ONSPD_JUN_2023_UK.csv") + + task.reenable + task.invoke("https://test.com/ONSPD_JUN_2023_UK.csv") + end + end +end diff --git a/spec/lib/tasks/populate_postcodes_table_spec.rb b/spec/lib/tasks/populate_postcodes_table_spec.rb index f291464..72e81e3 100644 --- a/spec/lib/tasks/populate_postcodes_table_spec.rb +++ b/spec/lib/tasks/populate_postcodes_table_spec.rb @@ -1,9 +1,7 @@ require "rake" require "spec_helper" -Rails.application.load_tasks - -RSpec.describe "Tasks" do +RSpec.describe "populate_postcodes_table task" do describe "when running the task" do before do Postcode.destroy_all @@ -15,7 +13,7 @@ expect(Postcode.count).to eq(0) task.reenable - task.invoke("spec/lib/tasks/postcodes.csv") + expect { task.invoke("spec/lib/tasks/postcodes.csv") }.to output.to_stdout expect(Postcode.count).to eq(3) end @@ -25,7 +23,7 @@ expect(Postcode.count).to eq(1) task.reenable - task.invoke("spec/lib/tasks/postcodes.csv") + expect { task.invoke("spec/lib/tasks/postcodes.csv") }.to output.to_stdout expect(Postcode.count).to eq(3) end @@ -36,7 +34,7 @@ expect(Postcode.count).to eq(1) task.reenable - task.invoke("spec/lib/tasks/postcodes.csv") + expect { task.invoke("spec/lib/tasks/postcodes.csv") }.to output.to_stdout expect(Postcode.find_by(postcode: "SE12TL").results).to eq(existing_result) end diff --git a/spec/models/postcode_manager_spec.rb b/spec/models/postcode_manager_spec.rb new file mode 100644 index 0000000..8c7b5b5 --- /dev/null +++ b/spec/models/postcode_manager_spec.rb @@ -0,0 +1,161 @@ +require "spec_helper" + +RSpec.describe PostcodeManager, type: :model do + let(:postcode) { "E1 8QS" } + let(:normalised_postcode) { "E18QS" } + let(:postcode_manager) { PostcodeManager.new } + + before do + mock_access_token_manager + end + + describe "#locations_for_postcode" do + let(:address1) do + { + "ADDRESS" => "1, WHITECHAPEL HIGH STREET, LONDON, E1 8QS", + "LAT" => 51.5144547, + "LNG" => -0.0729933, + "LOCAL_CUSTODIAN_CODE" => 5900, + "UPRN" => "1", + } + end + let(:address2) do + { + "ADDRESS" => "2, WHITECHAPEL HIGH STREET, LONDON, E1 8QS", + "LAT" => 51.5144545, + "LNG" => -0.0729935, + "LOCAL_CUSTODIAN_CODE" => 5900, + "UPRN" => "2", + } + end + let(:api_locations) do + [ + { "DPA" => address1 }, + { "DPA" => address2 }, + ] + end + let(:locations) do + [address1, address2].map { |a| OsPlacesApi::LocationResults.new([]).build_location(a) } + end + let(:average_latitude) { 51.51446256666667 } + let(:average_longitude) { -0.07298533333333333 } + + context "the postcode doesn't exist in the database" do + before do + Postcode.where(postcode: normalised_postcode).destroy_all + end + + it "passes the query through to the api client" do + mock_os_client = mock_os_client_good_results + expect(mock_os_client).to receive(:retrieve_locations_for_postcode).with(normalised_postcode) + + postcode_manager.locations_for_postcode(postcode) + end + + it "caches the response from a successful request" do + expect(Postcode.where(postcode: normalised_postcode).count).to eq(0) + mock_os_client_good_results + postcode_manager.locations_for_postcode(postcode) + expect(Postcode.where(postcode: normalised_postcode).count).to eq(1) + end + + context "it doesn't have any locations in OS Places API" do + it "raises an error" do + mock_os_client_empty_results + expect { postcode_manager.locations_for_postcode(postcode) }.to raise_error(OsPlacesApi::NoResultsForPostcode) + end + end + end + + context "the postcode exists in the database" do + it "returns the cached data" do + Postcode.create(postcode:, source: "os_places", results: os_places_api_results) + mock_os_client = mock_os_client_good_results + expect(mock_os_client).not_to receive(:retrieve_locations_for_postcode) + + expect(postcode_manager.locations_for_postcode(postcode)).to eq( + { + "average_latitude" => average_latitude, + "average_longitude" => average_longitude, + "results" => locations.map(&:to_hash), + "source" => "Ordnance Survey", + }, + ) + end + + it "returns the cached data even if the postcode is structured differently in the database" do + Postcode.create(postcode: normalised_postcode, results: os_places_api_results) + mock_os_client = mock_os_client_good_results + expect(mock_os_client).not_to receive(:retrieve_locations_for_postcode) + + expect(postcode_manager.locations_for_postcode(postcode)).to eq( + { + "average_latitude" => average_latitude, + "average_longitude" => average_longitude, + "results" => locations.map(&:to_hash), + "source" => "Ordnance Survey", + }, + ) + end + end + end + + describe "#update_postcode" do + context "the postcode exists in the database" do + before do + Postcode.create!(postcode: normalised_postcode) + end + + context "the api returns no locations" do + before do + mock_os_client_empty_results + end + + it "deletes the postcode record" do + expect(Postcode.where(postcode: normalised_postcode).count).to eq(1) + postcode_manager.update_postcode(postcode) + expect(Postcode.where(postcode: normalised_postcode).count).to eq(0) + end + end + + context "the api returns locations" do + before do + mock_os_client_good_results + end + + it "updates the postcode record" do + postcode_manager.update_postcode(postcode) + expect(Postcode.where(postcode: normalised_postcode).first.results).to eq(os_places_api_results) + end + end + end + + context "the postcode doesn't exist in the database" do + before do + Postcode.where(postcode: normalised_postcode).destroy_all + end + + context "the api returns no locations" do + before do + mock_os_client_empty_results + end + + it "does not create a postcode record" do + postcode_manager.update_postcode(postcode) + expect(Postcode.where(postcode: normalised_postcode).count).to eq(0) + end + end + + context "the api returns locations" do + before do + mock_os_client_good_results + end + + it "creates a postcode record" do + postcode_manager.update_postcode(postcode) + expect(Postcode.where(postcode: normalised_postcode).count).to eq(1) + end + end + end + end +end diff --git a/spec/models/postcode_spec.rb b/spec/models/postcode_spec.rb index d890222..52e95fc 100644 --- a/spec/models/postcode_spec.rb +++ b/spec/models/postcode_spec.rb @@ -53,4 +53,16 @@ expect { Postcode.create!(postcode: "1AA 1BB") }.to raise_error(ActiveRecord::RecordInvalid) end end + + describe "Defaults" do + let(:postcode) { Postcode.create!(postcode: "E1 8QS") } + + it "has a default source of os_places" do + expect(postcode.source).to eq("os_places") + end + + it "is active by default" do + expect(postcode.retired?).to be false + end + end end diff --git a/spec/requests/v1/locations_spec.rb b/spec/requests/v1/locations_spec.rb index 8533c25..9d4d5f8 100644 --- a/spec/requests/v1/locations_spec.rb +++ b/spec/requests/v1/locations_spec.rb @@ -6,29 +6,44 @@ ENV["OS_PLACES_API_SECRET"] = "some_secret" end - context "Successful call" do + context "Successful call for an OS Places API postcode" do let(:postcode) { "E1 8QS" } + let(:normalised_postcode) { "E18QS" } + let(:address1) do + { + address: "1, WHITECHAPEL HIGH STREET, LONDON, E1 8QS", + latitude: 51.5144547, + longitude: -0.0729933, + local_custodian_code: 5900, + } + end + let(:address2) do + { + address: "2, WHITECHAPEL HIGH STREET, LONDON, E1 8QS", + latitude: 51.5144545, + longitude: -0.0729935, + local_custodian_code: 5900, + } + end + let(:api_locations) do + [ + { "DPA" => address1 }, + { "DPA" => address2 }, + ] + end let(:locations) do { - "average_latitude" => 51.51445475, - "average_longitude" => -0.07299335, - "results" => [ - Location.new(address: "1, WHITECHAPEL HIGH STREET, LONDON, E1 8QS", - latitude: 51.5144547, - longitude: -0.0729933, - local_custodian_code: 5900), - Location.new(address: "2, WHITECHAPEL HIGH STREET, LONDON, E1 8QS", - latitude: 51.5144548, - longitude: -0.0729934, - local_custodian_code: 5900), - ], + "source" => "Ordnance Survey", + "average_longitude" => -0.07298533333333333, + "average_latitude" => 51.51446256666667, + "results" => [Location.new(address1), Location.new(address2)], } end before do client = double("client") allow(OsPlacesApi::Client).to receive(:new).and_return(client) - expect(client).to receive(:locations_for_postcode).with(postcode).and_return(locations) + expect(client).to(receive(:retrieve_locations_for_postcode).with(normalised_postcode)).and_return(OsPlacesApi::LocationResults.new(os_places_api_results)) end it "Should return proper body" do @@ -38,6 +53,85 @@ end end + context "Successful call for an ONSPD postcode" do + let(:postcode) { "AB1 0AA" } + let(:normalised_postcode) { "AB10AA" } + + context "which is small and non-retired" do + let(:locations) do + { + "source" => "Office of National Statistics", + "average_longitude" => -2.242851, + "average_latitude" => 57.101474, + "results" => [], + "extra_information" => { + "source" => "ONS source updated infrequently", + }, + } + end + let(:results) do + [ + { + "ONS" => { + "AVG_LNG" => -2.242851, + "AVG_LAT" => 57.101474, + "TYPE" => "S", + "DOTERM" => "", + }, + }, + ] + end + + before do + Postcode.create!(postcode: normalised_postcode, source: :onspd, retired: false, results:) + end + + it "Should return proper body with source warning" do + get "/v1/locations?postcode=#{postcode}" + + expect(response.body).to eq locations.to_json + end + end + + context "which is large and retired" do + let(:locations) do + { + "source" => "Office of National Statistics", + "average_longitude" => -2.242851, + "average_latitude" => 57.101474, + "results" => [], + "extra_information" => { + "source" => "ONS source updated infrequently", + "retired" => "Postcode was retired in April 2004", + "large" => "Postcode is a large user postcode", + }, + } + end + let(:results) do + [ + { + "ONS" => { + "AVG_LNG" => -2.242851, + "AVG_LAT" => 57.101474, + "TYPE" => "L", + "DOTERM" => "April 2004", + }, + }, + ] + end + + before do + Postcode.create!(postcode: normalised_postcode, source: :onspd, retired: true, results:) + end + + it "Should return proper body with source, size, and retirement warning" do + get "/v1/locations?postcode=#{postcode}" + + expect(response.body).to eq locations.to_json + end + end + end + context "Client request validation" do let(:expected_validation_response) do { errors: { postcode: ["This postcode is invalid"] } }.to_json @@ -71,7 +165,7 @@ before do client = double("client") allow(OsPlacesApi::Client).to receive(:new).and_return(client) - expect(client).to receive(:locations_for_postcode).with(postcode).and_raise(OsPlacesApi::NoResultsForPostcode) + expect(client).to receive(:retrieve_locations_for_postcode).with(normalised_postcode).and_raise(OsPlacesApi::NoResultsForPostcode) end it "Should return proper body with error message" do @@ -97,7 +191,7 @@ before do client = double("client") allow(OsPlacesApi::Client).to receive(:new).and_return(client) - expect(client).to receive(:locations_for_postcode).with(postcode).and_raise(OsPlacesApi::InvalidPostcodeProvided) + expect(client).to receive(:retrieve_locations_for_postcode).with(normalised_postcode).and_raise(OsPlacesApi::InvalidPostcodeProvided) end it "Should return proper body with error message" do @@ -120,7 +214,7 @@ before do client = double("client") allow(OsPlacesApi::Client).to receive(:new).and_return(client) - expect(client).to receive(:locations_for_postcode).with(postcode).and_raise(OsPlacesApi::UnexpectedResponse) + expect(client).to receive(:retrieve_locations_for_postcode).with(normalised_postcode).and_raise(OsPlacesApi::UnexpectedResponse) end it "Should report the error to Sentry, tagged with the postcode" do @@ -132,4 +226,18 @@ expect { get "/v1/locations?postcode=#{postcode}" }.to raise_error(OsPlacesApi::UnexpectedResponse) end end + + context "If the database is corrupted with invalid source information" do + let(:postcode) { "AB1 0AA" } + let(:normalised_postcode) { "AB10AA" } + + before do + record = Postcode.create!(postcode: normalised_postcode, source: :onspd, retired: false, results: []) + ActiveRecord::Base.connection.execute("UPDATE postcodes SET source='unknown' WHERE id=#{record.id}") + end + + it "returns a 500 to the upstream app" do + expect { get "/v1/locations?postcode=#{postcode}" }.to raise_error(LocationsPresenter::UnknownSource) + end + end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index e2c9db8..10e821a 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,12 +1,16 @@ ENV["RAILS_ENV"] ||= "test" require "simplecov" -SimpleCov.start "rails" +SimpleCov.start "rails" do + enable_coverage :branch +end require File.expand_path("../config/environment", __dir__) require "rspec/rails" require "webmock/rspec" +Rails.application.load_tasks + Dir[Rails.root.join("spec/support/**/*.rb")].sort.each { |f| require f } GovukTest.configure WebMock.disable_net_connect!(allow_localhost: true) diff --git a/spec/support/os_client_helper.rb b/spec/support/os_client_helper.rb new file mode 100644 index 0000000..6391f7d --- /dev/null +++ b/spec/support/os_client_helper.rb @@ -0,0 +1,13 @@ +def mock_os_client_empty_results + os_client = double("os_client") + allow(OsPlacesApi::Client).to receive(:new).and_return(os_client) + allow(os_client).to receive(:retrieve_locations_for_postcode).and_return(OsPlacesApi::LocationResults.new([])) + os_client +end + +def mock_os_client_good_results + os_client = double("os_client") + allow(OsPlacesApi::Client).to receive(:new).and_return(os_client) + allow(os_client).to receive(:retrieve_locations_for_postcode).and_return(OsPlacesApi::LocationResults.new(os_places_api_results)) + os_client +end diff --git a/spec/support/os_places_api_helper.rb b/spec/support/os_places_api_helper.rb new file mode 100644 index 0000000..e966b52 --- /dev/null +++ b/spec/support/os_places_api_helper.rb @@ -0,0 +1,233 @@ +def os_places_api_endpoint(postcode) + "https://api.os.uk/search/places/v1/postcode?output_srs=WGS84&postcode=#{postcode}&dataset=DPA,LPI" +end + +def mock_access_token_manager + token_manager = double("token_manager") + allow(OsPlacesApi::AccessTokenManager).to receive(:new).and_return(token_manager) + allow(token_manager).to receive(:access_token).and_return("some token") +end + +def stub_os_places_api_request(postcode, response, status: 200) + stub_request(:get, os_places_api_endpoint(postcode)) + .to_return(status:, body: response.to_json) +end + +def stub_os_places_api_request_invalid_response(postcode) + stub_request(:get, os_places_api_endpoint(postcode)) + .to_return(status: 200, body: "foo") +end + +def stub_os_places_api_request_good(postcode) + stub_os_places_api_request(postcode, successful_response(postcode), status: 200) +end + +def stub_os_places_api_request_empty_results(postcode) + stub_os_places_api_request(postcode, empty_response(postcode), status: 200) +end + +def stub_os_places_api_request_nil_results(postcode) + stub_os_places_api_request(postcode, nil_response(postcode), status: 200) +end + +def stub_os_places_api_request_filterable_results(postcode) + stub_os_places_api_request(postcode, filterable_response(postcode), status: 200) +end + +def successful_response(postcode) + { + "header": { + "uri": os_places_api_endpoint(postcode), + "query": "postcode=#{postcode}", + "offset": 0, + "totalresults": 1, # really 12, but we've omitted the other 11 in `results` above + "format": "JSON", + "dataset": "DPA,LPI", + "lr": "EN,CY", + "maxresults": 100, + "epoch": "87", + "output_srs": "WGS84", + }, + "results": os_places_api_results, + } +end + +def os_places_api_results + [ + { + "DPA" => { + "UPRN" => "6714278", + "UDPRN" => "54673874", + "ADDRESS" => "1, WHITECHAPEL HIGH STREET, LONDON, E1 8QS", + "BUILDING_NUMBER" => "1", + "THOROUGHFARE_NAME" => "WHITECHAPEL HIGH STREET", + "POST_TOWN" => "LONDON", + "POSTCODE" => "E1 8QS", + "RPC" => "2", + "X_COORDINATE" => 533_813.0, + "Y_COORDINATE" => 181_262.0, + "LNG" => -0.0729933, + "LAT" => 51.5144547, + "STATUS" => "APPROVED", + "LOGICAL_STATUS_CODE" => "1", + "CLASSIFICATION_CODE" => "CO01", + "CLASSIFICATION_CODE_DESCRIPTION" => "Office / Work Studio", + "LOCAL_CUSTODIAN_CODE" => 5900, + "LOCAL_CUSTODIAN_CODE_DESCRIPTION" => "TOWER HAMLETS", + "POSTAL_ADDRESS_CODE" => "D", + "POSTAL_ADDRESS_CODE_DESCRIPTION" => "A record which is linked to PAF", + "BLPU_STATE_CODE" => "1", + "BLPU_STATE_CODE_DESCRIPTION" => "Under construction", + "TOPOGRAPHY_LAYER_TOID" => "osgb1000006035651", + "LAST_UPDATE_DATE" => "17/06/2017", + "ENTRY_DATE" => "17/02/2017", + "BLPU_STATE_DATE" => "17/02/2017", + "LANGUAGE" => "EN", + "MATCH" => 1.0, + "MATCH_DESCRIPTION" => "EXACT", + }, + }, + { + "LPI" => { + "UPRN" => "6714279", + "ADDRESS" => "2, WHITECHAPEL HIGH STREET, LONDON, E1 8QS", + "USRN" => "22701338", + "LPI_KEY" => "5900L000449318", + "PAO_START_NUMBER" => "2", + "STREET_DESCRIPTION" => "WHITECHAPEL HIGH STREET", + "TOWN_NAME" => "LONDON", + "ADMINISTRATIVE_AREA" => "TOWER HAMLETS", + "POSTCODE_LOCATOR" => "E1 8QS", + "RPC" => "2", + "X_COORDINATE" => 533_813.0, + "Y_COORDINATE" => 181_262.0, + "LNG" => -0.0729935, + "LAT" => 51.5144545, + "STATUS" => "APPROVED", + "LOGICAL_STATUS_CODE" => "1", + "CLASSIFICATION_CODE" => "CO01", + "CLASSIFICATION_CODE_DESCRIPTION" => "Office / Work Studio", + "LOCAL_CUSTODIAN_CODE" => 5900, + "LOCAL_CUSTODIAN_CODE_DESCRIPTION" => "TOWER HAMLETS", + "COUNTRY_CODE" => "E", + "COUNTRY_CODE_DESCRIPTION" => "This record is within England", + "POSTAL_ADDRESS_CODE" => "D", + "POSTAL_ADDRESS_CODE_DESCRIPTION" => "A record which is linked to PAF", + "BLPU_STATE_CODE" => "1", + "BLPU_STATE_CODE_DESCRIPTION" => "Under construction", + "TOPOGRAPHY_LAYER_TOID" => "osgb1000006035651", + "LAST_UPDATE_DATE" => "17/06/2017", + "ENTRY_DATE" => "17/02/2017", + "BLPU_STATE_DATE" => "17/02/2017", + "STREET_STATE_CODE" => "2", + "STREET_STATE_CODE_DESCRIPTION" => "Open", + "STREET_CLASSIFICATION_CODE" => "8", + "STREET_CLASSIFICATION_CODE_DESCRIPTION" => "All vehicles", + "LPI_LOGICAL_STATUS_CODE" => "1", + "LPI_LOGICAL_STATUS_CODE_DESCRIPTION" => "APPROVED", + "LANGUAGE" => "EN", + "MATCH" => 1.0, + "MATCH_DESCRIPTION" => "EXACT", + }, + }, + { + "DPA" => { + "UPRN" => "10091989923", + "UDPRN" => "54673879", + "ADDRESS" => "10, WHITECHAPEL HIGH STREET, LONDON, E1 8QS", + "BUILDING_NUMBER" => "10", + "THOROUGHFARE_NAME" => "WHITECHAPEL HIGH STREET", + "POST_TOWN" => "LONDON", + "POSTCODE" => "E1 8QS", + "RPC" => "2", + "X_COORDINATE" => 533_813.0, + "Y_COORDINATE" => 181_262.0, + "LNG" => -0.0729692, + "LAT" => 51.5144785, + "STATUS" => "APPROVED", + "LOGICAL_STATUS_CODE" => "1", + "CLASSIFICATION_CODE" => "CO01", + "CLASSIFICATION_CODE_DESCRIPTION" => "Office / Work Studio", + "LOCAL_CUSTODIAN_CODE" => 7655, + "LOCAL_CUSTODIAN_CODE_DESCRIPTION" => "ORDNANCE SURVEY", + "POSTAL_ADDRESS_CODE" => "E", + "POSTAL_ADDRESS_CODE_DESCRIPTION" => "A record which is linked to PAF", + "BLPU_STATE_CODE" => "1", + "BLPU_STATE_CODE_DESCRIPTION" => "Under construction", + "TOPOGRAPHY_LAYER_TOID" => "osgb1000006035651", + "LAST_UPDATE_DATE" => "17/06/2017", + "ENTRY_DATE" => "17/02/2017", + "BLPU_STATE_DATE" => "17/02/2017", + "LANGUAGE" => "EN", + "MATCH" => 1.0, + "MATCH_DESCRIPTION" => "EXACT", + }, + }, + { + "DPA" => { + "UPRN" => "10091989923", + "UDPRN" => "54673879", + "ADDRESS" => "10, WHITECHAPEL HIGH STREET, LONDON, E1 8QS", + "BUILDING_NUMBER" => "10", + "THOROUGHFARE_NAME" => "WHITECHAPEL HIGH STREET", + "POST_TOWN" => "LONDON", + "POSTCODE" => "E1 8QS", + "RPC" => "2", + "X_COORDINATE" => 533_813.0, + "Y_COORDINATE" => 181_262.0, + "LNG" => -0.0729692, + "LAT" => 51.5144785, + "STATUS" => "APPROVED", + "LOGICAL_STATUS_CODE" => "1", + "CLASSIFICATION_CODE" => "CO01", + "CLASSIFICATION_CODE_DESCRIPTION" => "Office / Work Studio", + "LOCAL_CUSTODIAN_CODE" => 11, + "LOCAL_CUSTODIAN_CODE_DESCRIPTION" => "HIGHWAYS ENGLAND", + "POSTAL_ADDRESS_CODE" => "E", + "POSTAL_ADDRESS_CODE_DESCRIPTION" => "A record which is linked to PAF", + "BLPU_STATE_CODE" => "1", + "BLPU_STATE_CODE_DESCRIPTION" => "Under construction", + "TOPOGRAPHY_LAYER_TOID" => "osgb1000006035651", + "LAST_UPDATE_DATE" => "17/06/2017", + "ENTRY_DATE" => "17/02/2017", + "BLPU_STATE_DATE" => "17/02/2017", + "LANGUAGE" => "EN", + "MATCH" => 1.0, + "MATCH_DESCRIPTION" => "EXACT", + }, + }, + # subsequent results omitted for brevity + ] +end + +def nil_response(postcode) + base = successful_response(postcode) + base[:header][:totalresults] = 0 + base[:results] = nil + base +end + +def empty_response(postcode) + base = successful_response(postcode) + base[:header][:totalresults] = 0 + base[:results] = [] + base +end + +def filterable_response + base = successful_response(postcode) + base[:results] = os_places_api_results_with_filterable_locations, + base +end + +def os_places_api_results_with_filterable_locations + os_places_api_results.deep_dup.each do |location| + location.values.first["LOCAL_CUSTODIAN_CODE_DESCRIPTION"] = "ORDNANCE SURVEY" + end +end + +def os_places_api_results_with_postal_address_code_n + results = os_places_api_results + results.first["DPA"]["POSTAL_ADDRESS_CODE"] = "N" + results +end diff --git a/spec/workers/ons_download_worker_spec.rb b/spec/workers/ons_download_worker_spec.rb new file mode 100644 index 0000000..6e3b237 --- /dev/null +++ b/spec/workers/ons_download_worker_spec.rb @@ -0,0 +1,43 @@ +require "spec_helper" + +RSpec.describe OnsDownloadWorker do + let(:s3_client) { double("s3_client") } + + before do + allow(Aws::S3::Client).to receive(:new).and_return(s3_client) + end + + describe "#perform" do + context "with a valid URL" do + before do + stub_request(:get, "https://test.com/ONSPD_JUN_2023_UK.zip").to_return( + status: 200, + body: ->(_) { File.new("#{Rails.root}/spec/fixtures/ONSPD_JUN_2023_UK.zip") }, + ) + end + + it "grabs a file from the download URL, extracts the zip, creates S3 objects, and creates OnsImportWorkers" do + expect(s3_client).to receive(:put_object).twice + + expect(OnsImportWorker).to receive(:perform_async).with("Data/multi_csv/ONSPD_JUN_2023_UK_AB.csv") + expect(OnsImportWorker).to receive(:perform_async).with("Data/multi_csv/ONSPD_JUN_2023_UK_AL.csv") + + OnsDownloadWorker.new.perform("https://test.com/ONSPD_JUN_2023_UK.zip") + end + end + + context "with an invalid URL" do + before do + stub_request(:get, "https://test.com/ONSPD_JUN_2023_UK.zip").to_return( + status: 200, + body: "hello", + ) + end + + it "grabs a file from the download URL, extracts the zip, creates S3 objects, and creates OnsImportWorkers" do + expect(GovukError).to receive(:notify) + OnsDownloadWorker.new.perform("https://test.com/ONSPD_JUN_2023_UK.zip") + end + end + end +end diff --git a/spec/workers/ons_import_worker_spec.rb b/spec/workers/ons_import_worker_spec.rb new file mode 100644 index 0000000..ffac106 --- /dev/null +++ b/spec/workers/ons_import_worker_spec.rb @@ -0,0 +1,111 @@ +require "spec_helper" + +RSpec.describe OnsImportWorker do + let(:s3_client) { double("s3_client") } + + before do + allow(Aws::S3::Client).to receive(:new).and_return(s3_client) + allow(s3_client).to receive(:get_object) + + Postcode.delete_all + end + + describe "#perform" do + context "with a valid key and file" do + before do + tempfile_from_fixture("#{Rails.root}/spec/fixtures/ONSPD_JUN_2023_UK_AB.csv") + end + + it "imports records not present in the database" do + expect(s3_client).to receive(:get_object).once + + OnsImportWorker.new.perform("ons/JUN_2023/AB.csv") + expect(Postcode.count).to eq(5) + end + + it "sets record retired if a valid doterm exists" do + expect(s3_client).to receive(:get_object).once + + OnsImportWorker.new.perform("ons/JUN_2023/AB.csv") + record = Postcode.where(postcode: "AB10AA").first + expect(record.retired).to be true + end + + it "sets record not retired if doterm is blank" do + expect(s3_client).to receive(:get_object).once + + OnsImportWorker.new.perform("ons/JUN_2023/AB.csv") + record = Postcode.where(postcode: "AB10AE").first + expect(record.retired).to be false + end + end + + context "with an existing record" do + before do + tempfile_from_fixture("#{Rails.root}/spec/fixtures/ONSPD_JUN_2023_UK_AB.csv") + Postcode.create(postcode: "AB10AA", source: "onspd", results: [], retired: false) + end + + it "updates the existing record" do + expect(s3_client).to receive(:get_object).once + + OnsImportWorker.new.perform("ons/JUN_2023/AB.csv") + expect(Postcode.count).to eq(5) + record = Postcode.where(postcode: "AB10AA").first + expect(record.retired).to be true + expect(record.results).not_to eq([]) + end + end + + context "with an existing os_places record" do + before do + tempfile_from_fixture("#{Rails.root}/spec/fixtures/ONSPD_JUN_2023_UK_AB.csv") + Postcode.create(postcode: "AB10AA", source: "os_places", results: ["Original Data"], retired: false) + end + + it "does not touch the existing record" do + OnsImportWorker.new.perform("ons/JUN_2023/AB.csv") + expect(Postcode.count).to eq(5) + record = Postcode.where(postcode: "AB10AA").first + expect(record.retired).to be false + expect(record.results).to eq(["Original Data"]) + end + end + + context "with a file with an invalid doterm in it" do + before do + tempfile_from_fixture("#{Rails.root}/spec/fixtures/ONSPD_JUN_2023_UK_AB_broken.csv") + end + + it "Imports records, marks doterm as Unknown if invalid, logs warning" do + expect(s3_client).to receive(:get_object).once + expect(Rails.logger).to receive(:warn) + + OnsImportWorker.new.perform("ons/JUN_2023/AB.csv") + expect(Postcode.count).to eq(5) + expect(Postcode.first.results.first["ONS"]["DOTERM"]).to eq("Unknown") + end + end + + context "with a key that points to a missing file" do + before do + allow(s3_client).to receive(:get_object).and_raise(Aws::Errors::ServiceError) + end + + it "logs an error" do + expect(s3_client).to receive(:get_object).once + expect(GovukError).to receive(:notify) + + OnsImportWorker.new.perform("ons/JUN_2023/ABC.csv") + end + end + end +end + +def tempfile_from_fixture(file_path) + tempfile = Tempfile.new("tmp/ONSPD.csv") + content = File.open(file_path).read + tempfile.write(content) + tempfile.rewind + allow(Tempfile).to receive(:new).and_return(tempfile) +end diff --git a/spec/workers/postcodes_collection_worker_spec.rb b/spec/workers/postcodes_collection_worker_spec.rb index 6919898..d3758ae 100644 --- a/spec/workers/postcodes_collection_worker_spec.rb +++ b/spec/workers/postcodes_collection_worker_spec.rb @@ -1,3 +1,5 @@ +require "spec_helper" + RSpec.describe PostcodesCollectionWorker do describe "#perform" do before do diff --git a/spec/workers/process_postcode_worker_spec.rb b/spec/workers/process_postcode_worker_spec.rb index ac2bbcd..fb56bad 100644 --- a/spec/workers/process_postcode_worker_spec.rb +++ b/spec/workers/process_postcode_worker_spec.rb @@ -1,11 +1,13 @@ +require "spec_helper" + RSpec.describe ProcessPostcodeWorker do describe "#perform" do let(:postcode) { "E18QS" } it "updates the given postcode" do - stubbed_client = double("OsPlacesApi::Client") + stubbed_client = double("PostcodeManager") - expect(OsPlacesApi::Client).to receive(:new) { stubbed_client } + expect(PostcodeManager).to receive(:new) { stubbed_client } expect(stubbed_client).to receive(:update_postcode).with(postcode) ProcessPostcodeWorker.new.perform(postcode)