Skip to content

Commit

Permalink
Merge pull request #373 from nanglo123/add_geoname_cities
Browse files Browse the repository at this point in the history
Only filter cities by minimum population
  • Loading branch information
bgyori committed Sep 24, 2024
2 parents 23da16b + d33df73 commit 51d0c98
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions mira/dkg/resources/geonames.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,16 +109,14 @@ def get_cities(code_to_country, code_to_admin1, code_to_admin2, *, minimum_popul
),
)

cities_df = cities_df[cities_df.population.astype(int) > minimum_population]
cities_df.synonyms = cities_df.synonyms.str.split(",")

terms = {}
for term in code_to_country.values():
terms[term.identifier] = term

cols = ["geonames_id", "name", "synonyms", "country_code", "admin1", "admin2"]
for identifier, name, synonyms, country, admin1, admin2 in cities_df[cols].values:
terms[identifier] = term = Term.from_triple("geonames", identifier, name)
cols = ["geonames_id", "name", "synonyms", "country_code", "admin1",
"admin2", "population"]
for identifier, name, synonyms, country, admin1, admin2, population in (cities_df[cols].values):
if synonyms and not isinstance(synonyms, float):
for synoynm in synonyms:
term.append_synonym(synoynm)
Expand All @@ -135,6 +133,11 @@ def get_cities(code_to_country, code_to_admin1, code_to_admin2, *, minimum_popul

terms[admin1_term.identifier] = admin1_term

# We skip cities that don't meet the minimum population requirement
if int(population) < minimum_population:
continue
terms[identifier] = term = Term.from_triple("geonames", identifier,
name)
if pd.notna(admin2):
admin2_full = f"{country}.{admin1}.{admin2}"
admin2_term = code_to_admin2.get(admin2_full)
Expand Down

0 comments on commit 51d0c98

Please sign in to comment.