Skip to content

Commit

Permalink
Merge pull request #1 from flaxandteal/feature/add-tree-traversal
Browse files Browse the repository at this point in the history
Add tree traversal
  • Loading branch information
philtweir authored Oct 23, 2023
2 parents 4827914 + 5061fdd commit 6e46e04
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 62 deletions.
57 changes: 0 additions & 57 deletions .github/workflows/release.yml

This file was deleted.

7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "berlin-core"
version = "0.2.2"
version = "0.2.3"
edition = "2021"
license = "MIT"
description = "Identify locations and tag them with UN-LOCODEs and ISO-3166-2 subdivisions."
Expand Down Expand Up @@ -36,6 +36,7 @@ strsim = "0.10.0"
petgraph = "0.6.0"

fst = { version = "0.4.7", features = ["levenshtein"] }
indextree = "4.6.0"

[profile.dev]
split-debuginfo = "unpacked"
Expand Down
1 change: 1 addition & 0 deletions src/location.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::cmp::max;
use std::collections::HashMap;

use indextree::NodeId;
use serde::de::Error;
use serde::{Deserialize, Serialize};
use smallvec::{smallvec, SmallVec};
Expand Down
23 changes: 19 additions & 4 deletions src/locations_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use std::time::Instant;

use csv::ReaderBuilder;
use fst::{Automaton, Streamer};
use indextree::{Arena, NodeId};
use rayon::iter::{
IndexedParallelIterator, IntoParallelIterator, IntoParallelRefIterator, ParallelBridge,
ParallelIterator,
Expand All @@ -24,13 +25,15 @@ use crate::SEARCH_INCLUSION_THRESHOLD;
#[derive(Default)]
pub struct LocationsDb {
pub all: UstrMap<Location>,
pub indices: UstrMap<NodeId>,
// state names by code
pub state_by_code: UstrMap<Ustr>,
// key is in format "gb:lon", value is name
pub subdiv_by_code: UstrMap<Ustr>,
pub by_word_map: UstrMap<UstrSet>,
pub by_word_vec: Vec<(Ustr, UstrSet)>,
pub fst: fst::Map<Vec<u8>>,
pub arena: Arena<Ustr>,
}

impl LocationsDb {
Expand All @@ -46,19 +49,29 @@ impl LocationsDb {
pub fn insert(&mut self, l: Location) {
match &l.data {
LocData::St(s) => {
self.state_by_code.insert(s.alpha2, s.name);
self.state_by_code.insert(s.alpha2, l.key);
}
LocData::Subdv(sd) => {
self.subdiv_by_code.insert(l.id, sd.name);
LocData::Subdv(_sd) => {
self.subdiv_by_code.insert(l.id, l.key);
}
LocData::Locd(_) => {}
LocData::Airp(_) => {}
}
let node_id = self.arena.new_node(l.key);
self.indices.insert(l.key, node_id);
self.all.insert(l.key, l);
}
pub fn mk_fst(self) -> Self {
pub fn mk_fst(mut self) -> Self {
let mut words_map: UstrMap<UstrSet> = UstrMap::default();
let arena = &mut self.arena;
self.all.iter().for_each(|(key, loc)| {
let node_id: &NodeId = self.indices.get(key).unwrap();
match loc.get_parents() {
(_, Some(subdiv)) => self.indices.get(&subdiv).unwrap().append(*node_id, arena),
(Some(st), None) => self.indices.get(&st).unwrap().append(*node_id, arena),
(None, None) => (),
};

let codes = loc.get_codes();
let names = loc.get_names();
let words_iter = loc.words.iter().chain(codes.iter()).chain(names.iter());
Expand Down Expand Up @@ -88,6 +101,8 @@ impl LocationsDb {
.expect("Build FST");
LocationsDb {
all: self.all,
arena: self.arena,
indices: self.indices,
state_by_code: self.state_by_code,
subdiv_by_code: self.subdiv_by_code,
by_word_map: words_map,
Expand Down

0 comments on commit 6e46e04

Please sign in to comment.