Skip to content

Commit

Permalink
Fetch new UPVS dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
luciajanikova committed Nov 6, 2023
1 parent 477c0a7 commit 7d7670b
Show file tree
Hide file tree
Showing 14 changed files with 1,527 additions and 1 deletion.
69 changes: 69 additions & 0 deletions app/jobs/upvs/fetch_all_public_authority_edesks_list_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
class Upvs::FetchAllPublicAuthorityEdesksListJob < ApplicationJob
queue_as :upvs

def perform(url, downloader: HarvesterUtils::Downloader)
csv_file = downloader.download_file(url)
csv_options = { col_sep: File.open(csv_file) { |f| f.readline }.include?(';') ? ';' : ',', headers: true }

TemporaryAllPublicAuthorityEdesk.transaction do
TemporaryAllPublicAuthorityEdesk.create_table!

each_row_as_attributes(csv_file, csv_options) do |attributes|
check_row_attributes(attributes)
TemporaryAllPublicAuthorityEdesk.find_or_initialize_by(uri: attributes[:uri]).update!(attributes)
end

assert_known_edesks_existence!

TemporaryAllPublicAuthorityEdesk.truncate_source_table!
TemporaryAllPublicAuthorityEdesk.insert_to_source_table!
end

BetterUptimeApi.ping_heartbeat('UPVS_FETCH_ALL_EDESKS')
end

class TemporaryAllPublicAuthorityEdesk < TemporaryRecord
def self.source
Upvs::AllPublicAuthorityEdesk
end
end

private

def each_row_as_attributes(csv_file, csv_options)
CSV.foreach(csv_file, csv_options) do |row|
row = row.to_h.transform_keys { |k| k.to_s.gsub(/\p{Cf}|"/, '') }

row[row.keys.first].sub!(/\A"/, '')
row[row.keys.last].sub!(/"\z/, '')

row = row.to_h.transform_keys { |k| k.to_s }
row = row.transform_values { |v| v == 'NULL' ? nil : v }

yield(
cin: row.fetch('ICO'),
uri: row.fetch('URI'),
name: row.fetch('Nazov')
)
end
end

def check_row_attributes(attributes)
cin, uri, name = attributes.slice(:cin, :uri, :name).values

cin_without_leading_zeros = cin.sub(/^[0]+/,'')

if name !~ /TEST/i
raise "#{uri} does not match #{cin}" if uri !~ /ico:\/\/sk\/(0*)#{cin_without_leading_zeros}(_\d+)?/
end

raise "Incorrect encoding" if name =~ /.*\\u.*/
end

def assert_known_edesks_existence!
repository = TemporaryAllPublicAuthorityEdesk
repository.find_by!(uri: 'ico://sk/00151513', cin: '151513', name: 'Úrad vlády Slovenskej republiky')
repository.find_by!(uri: 'ico://sk/00151513_10003', cin: '151513', name: 'Úrad vlády Slovenskej republiky - Petície')
repository.find_by!(uri: 'ico://sk/00164381', cin: '164381', name: 'Ministerstvo školstva, vedy, výskumu a športu Slovenskej republiky')
end
end
27 changes: 27 additions & 0 deletions app/jobs/upvs/find_all_public_authority_edesk_list_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
module Upvs
class ResourceNotFoundError < RuntimeError
end

class FindAllPublicAuthorityEdeskListJob < ApplicationJob
queue_as :upvs

DATASET_URL = 'https://data.gov.sk/dataset/zoznam-vsetkych-elektronickych-schranok-ovm'

def perform(downloader: HarvesterUtils::Downloader, fetch_job: Upvs::FetchAllPublicAuthorityEdesksListJob)
html = downloader.download(DATASET_URL)
doc = Nokogiri::HTML.parse(html)
resource_link = doc.search('.resource-item .dropdown-menu a').detect do |a|
a['href'].include?('.csv')
end

if resource_link
fetch_job.perform_later(resource_link['href'])
else
raise ResourceNotFoundError
end

# TODO add hearbeat
BetterUptimeApi.ping_heartbeat('UPVS_FIND_ALL_EDESKS')
end
end
end
2 changes: 2 additions & 0 deletions app/models/upvs/all_public_authority_edesk.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class Upvs::AllPublicAuthorityEdesk < ApplicationRecord
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
class CreateUpvsAllPublicAuthorityEdesk < ActiveRecord::Migration[6.0]
def change
create_table 'upvs.all_public_authority_edesks' do |t|
t.integer :cin, null: false, limit: 8
t.string :uri, null: false
t.string :name, null: false

t.timestamps
end

add_index 'upvs.all_public_authority_edesks', :uri, unique: true
add_index 'upvs.all_public_authority_edesks', :cin
end
end
65 changes: 64 additions & 1 deletion db/structure.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4612,6 +4612,39 @@ CREATE TABLE public.schema_migrations (
);


--
-- Name: all_public_authority_edesks; Type: TABLE; Schema: upvs; Owner: -
--

CREATE TABLE upvs.all_public_authority_edesks (
id bigint NOT NULL,
cin bigint NOT NULL,
uri character varying NOT NULL,
name character varying NOT NULL,
created_at timestamp(6) without time zone NOT NULL,
updated_at timestamp(6) without time zone NOT NULL
);


--
-- Name: all_public_authority_edesks_id_seq; Type: SEQUENCE; Schema: upvs; Owner: -
--

CREATE SEQUENCE upvs.all_public_authority_edesks_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;


--
-- Name: all_public_authority_edesks_id_seq; Type: SEQUENCE OWNED BY; Schema: upvs; Owner: -
--

ALTER SEQUENCE upvs.all_public_authority_edesks_id_seq OWNED BY upvs.all_public_authority_edesks.id;


--
-- Name: public_authority_edesks; Type: TABLE; Schema: upvs; Owner: -
--
Expand Down Expand Up @@ -5551,6 +5584,13 @@ ALTER TABLE ONLY metais.project_versions ALTER COLUMN id SET DEFAULT nextval('me
ALTER TABLE ONLY metais.projects ALTER COLUMN id SET DEFAULT nextval('metais.projects_id_seq'::regclass);


--
-- Name: all_public_authority_edesks id; Type: DEFAULT; Schema: upvs; Owner: -
--

ALTER TABLE ONLY upvs.all_public_authority_edesks ALTER COLUMN id SET DEFAULT nextval('upvs.all_public_authority_edesks_id_seq'::regclass);


--
-- Name: public_authority_edesks id; Type: DEFAULT; Schema: upvs; Owner: -
--
Expand Down Expand Up @@ -6581,6 +6621,14 @@ ALTER TABLE ONLY public.schema_migrations
ADD CONSTRAINT schema_migrations_pkey PRIMARY KEY (version);


--
-- Name: all_public_authority_edesks all_public_authority_edesks_pkey; Type: CONSTRAINT; Schema: upvs; Owner: -
--

ALTER TABLE ONLY upvs.all_public_authority_edesks
ADD CONSTRAINT all_public_authority_edesks_pkey PRIMARY KEY (id);


--
-- Name: public_authority_edesks public_authority_edesks_pkey; Type: CONSTRAINT; Schema: upvs; Owner: -
--
Expand Down Expand Up @@ -8942,6 +8990,20 @@ CREATE INDEX "index_metais.projects_on_latest_version_id" ON metais.projects USI
CREATE INDEX "index_metais.projects_on_uuid" ON metais.projects USING btree (uuid);


--
-- Name: index_upvs.all_public_authority_edesks_on_cin; Type: INDEX; Schema: upvs; Owner: -
--

CREATE INDEX "index_upvs.all_public_authority_edesks_on_cin" ON upvs.all_public_authority_edesks USING btree (cin);


--
-- Name: index_upvs.all_public_authority_edesks_on_uri; Type: INDEX; Schema: upvs; Owner: -
--

CREATE UNIQUE INDEX "index_upvs.all_public_authority_edesks_on_uri" ON upvs.all_public_authority_edesks USING btree (uri);


--
-- Name: index_upvs.public_authority_edesks_on_cin; Type: INDEX; Schema: upvs; Owner: -
--
Expand Down Expand Up @@ -11001,6 +11063,7 @@ INSERT INTO "schema_migrations" (version) VALUES
('20220902164827'),
('20220919080112'),
('20220919084308'),
('20221219105855');
('20221219105855'),
('20231106173059');


5 changes: 5 additions & 0 deletions lib/tasks/upvs.rake
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
namespace :upvs do
desc 'Sync all public authority eDesks'
task 'all_public_authority_edesks:sync' => :environment do
Upvs::FindAllPublicAuthorityEdeskListJob.perform_later
end

desc 'Sync public authority eDesks'
task 'public_authority_edesks:sync' => :environment do
Upvs::FindPublicAuthorityEdeskListJob.perform_later
Expand Down
6 changes: 6 additions & 0 deletions spec/factories/upvs_factories.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
FactoryBot.define do
factory :upvs_all_public_authority_edesk, class: Upvs::AllPublicAuthorityEdesk do
sequence(:cin) { |n| 10_000_000 + n }
sequence(:uri) { |n| "ico://sk/#{10_000_000 + n}_90000" }
sequence(:name) { |n| "Súkromná materská škola #{n}, Starozagorská 8, Košice" }
end

factory :upvs_public_authority_edesk, class: Upvs::PublicAuthorityEdesk do
sequence(:cin) { |n| 10_000_000 + n }
sequence(:uri) { |n| "ico://sk/#{10_000_000 + n}_90000" }
Expand Down
8 changes: 8 additions & 0 deletions spec/fixtures/upvs/all-edesks-incorrect-encoding.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"ICO","URI","Nazov"
"00332674","ico://sk/00332674","Obec Petrovce, okres Vranov nad Topľou"
"30232295","ico://sk/30232295_71406","Fakulta politických vied a medzinárodných vzťahov Univerzity Mateja Bela v Banskej Bystrici"
"00399418","ico://sk/00399418_10001","Materská škola"
"00308161","ico://sk/00308161","Obec Lovce"
"00327727","ico://sk/00327727_10002","Materská \u0161kola Ruská Nová Ves 24"
"00151513","ico://sk/00151513","Úrad vlády Slovenskej republiky"
"00151513","ico://sk/00151513_10003","Úrad vlády Slovenskej republiky - Petície"
9 changes: 9 additions & 0 deletions spec/fixtures/upvs/all-edesks-missing-leading-zeros.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"ICO","URI","Nazov"
"00332674","ico://sk/00332674","Obec Petrovce, okres Vranov nad Topľou"
"30232295","ico://sk/30232295_71406","Fakulta politických vied a medzinárodných vzťahov Univerzity Mateja Bela v Banskej Bystrici"
"00399418","ico://sk/00399418_10001","Materská škola"
"00308161","ico://sk/00308161","Obec Lovce"
"00164381","ico://sk/00164381","Ministerstvo školstva, vedy, výskumu a športu Slovenskej republiky"
"00151513","ico://sk/00151513","Úrad vlády Slovenskej republiky"
"00151513","ico://sk/00151513_10003","Úrad vlády Slovenskej republiky - Petície"
"214973","ico://sk/214973_10001","CRH (Slovensko) a.s."
8 changes: 8 additions & 0 deletions spec/fixtures/upvs/all-edesks-not-matching.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"ICO","URI","Nazov"
"00332674","ico://sk/00332674","Obec Petrovce, okres Vranov nad Topľou"
"30232295","ico://sk/30232295_71406","Fakulta politických vied a medzinárodných vzťahov Univerzity Mateja Bela v Banskej Bystrici"
"00399418","ico://sk/00399418_10001","Materská škola"
"00308161","ico://sk/00308161","Obec Lovce"
"166260","ico://sk/99166260","Úrad geodézie, kartografie a katastra Slovenskej republiky"
"00151513","ico://sk/00151513","Úrad vlády Slovenskej republiky"
"00151513","ico://sk/00151513_10003","Úrad vlády Slovenskej republiky - Petície"
8 changes: 8 additions & 0 deletions spec/fixtures/upvs/all-edesks.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"ICO","URI","Nazov"
"00332674","ico://sk/00332674","Obec Petrovce, okres Vranov nad Topľou"
"30232295","ico://sk/30232295_71406","Fakulta politických vied a medzinárodných vzťahov Univerzity Mateja Bela v Banskej Bystrici"
"00399418","ico://sk/00399418_10001","Materská škola"
"00308161","ico://sk/00308161","Obec Lovce"
"00164381","ico://sk/00164381","Ministerstvo školstva, vedy, výskumu a športu Slovenskej republiky"
"00151513","ico://sk/00151513","Úrad vlády Slovenskej republiky"
"00151513","ico://sk/00151513_10003","Úrad vlády Slovenskej republiky - Petície"
Loading

0 comments on commit 7d7670b

Please sign in to comment.