Skip to content

Commit

Permalink
Merge pull request #22 from Seshat-Global-History-Databank/cliopatria…
Browse files Browse the repository at this point in the history
…-end-years

Refactor Cliopatria loading and visualiser notebook
  • Loading branch information
edwardchalstrey1 authored Jul 12, 2024
2 parents 13ad837 + 6ffacae commit fb4573c
Show file tree
Hide file tree
Showing 13 changed files with 571 additions and 389 deletions.
71 changes: 71 additions & 0 deletions cliopatria/convert_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import geopandas as gpd
from distinctipy import get_colors, get_hex
import sys

def cliopatria_gdf(gdf):
"""
Load the Cliopatria polity borders dataset from a GeoDataFrame created by GeoPandas (from a GeoJSON file).
Process the Cliopatria dataset for loading to the Seshat database and visualisation in the Seshat website.
Args:
gdf (GeoDataFrame): A GeoDataFrame containing the Cliopatria polity borders dataset.
Returns:
GeoDataFrame: The input GeoDataFrame with additional columns 'DisplayName', 'Color', 'PolityStartYear', and 'PolityEndYear'.
"""

# Generate DisplayName for each shape based on the 'Name' field
gdf['DisplayName'] = gdf['Name'].str.replace('[()]', '', regex=True)

# Add type prefix to DisplayName where type is not 'POLITY'
gdf.loc[gdf['Type'] != 'POLITY', 'DisplayName'] = gdf['Type'].str.capitalize() + ': ' + gdf['DisplayName']

print(f"Generated shape names for {len(gdf)} shapes.")
print("Assigning colours to shapes...")

# Use DistinctiPy package to assign a colour based on the DisplayName field
colour_keys = gdf['DisplayName'].unique()
colours = [get_hex(col) for col in get_colors(len(colour_keys))]
colour_mapping = dict(zip(colour_keys, colours))

# Map colors to a new column
gdf['Color'] = gdf['DisplayName'].map(colour_mapping)

print(f"Assigned colours to {len(gdf)} shapes.")
print("Determining polity start and end years...")

# Add a column called 'PolityStartYear' to the GeoDataFrame which is the minimum 'FromYear' of all shapes with the same 'Name'
gdf['PolityStartYear'] = gdf.groupby('Name')['FromYear'].transform('min')

# Add a column called 'PolityEndYear' to the GeoDataFrame which is the maximum 'ToYear' of all shapes with the same 'Name'
gdf['PolityEndYear'] = gdf.groupby('Name')['ToYear'].transform('max')

print(f"Determined polity start and end years for {len(gdf)} shapes.")

return gdf


# Check if a GeoJSON file path was provided as a command line argument
if len(sys.argv) < 2:
print("Please provide the path to the GeoJSON file as a command line argument.")
sys.exit(1)

geojson_path = sys.argv[1]

try:
gdf = gpd.read_file(geojson_path)
except Exception as e:
print(f"Error loading GeoJSON file: {str(e)}")
sys.exit(1)

# Call the cliopatria_gdf function to process the GeoDataFrame
processed_gdf = cliopatria_gdf(gdf)

# Save the processed GeoDataFrame as a new GeoJSON file
output_path = geojson_path.replace('.geojson', '_seshat_processed.geojson')
try:
processed_gdf.to_file(output_path, driver='GeoJSON')
print(f"Processed GeoDataFrame saved to: {output_path}")
except Exception as e:
print(f"Error saving processed GeoDataFrame: {str(e)}")
sys.exit(1)
18 changes: 12 additions & 6 deletions docs/source/getting-started/setup/spatialdb.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,29 @@ Cliopatria shape dataset
-------------------------

..
TODO: Add a link here to the published Clipatria dataset
TODO: Add a link here to the published Cliopatria dataset
1. Download and unzip the Cliopatria dataset.
2. Populate ``core_videoshapefile`` table using the following command:
2. Update the Cliopatria GeoJSON file with colours and other properties required by Seshat:

.. code-block:: bash
$ python cliopatria/convert_data.py /path/to/cliopatria.geojson
Note: this will create a new file with the same name but with the suffix "_seshat_processed.geojson"
3. Populate ``core_videoshapefile`` table using the following command:

.. code-block:: bash
$ python manage.py populate_videodata /path/to/data
$ python manage.py populate_videodata /path/to/cliopatria_seshat_processed.geojson
Note: if you wish to further simplify the Cliopatria shape resolution used by the world map after loading it into the database, open ``seshat/apps/core/management/commands/populate_videodata.py`` and modify the SQL query under the comment: "Adjust the tolerance param of ST_Simplify as needed"
GADM
----

1. `Download <https://geodata.ucdavis.edu/gadm/gadm4.1/gadm_410-gpkg.zip>`_ the whole world GeoPackage file from the `GADM website <https://gadm.org/download_world.html>`_.
2. Populate the ``core_gadmshapefile``, ``core_gadmcountries`` and ``core_gadmprovinces`` tables using the following command:

.. code-block:: bash
.. code-block:: bash
$ python manage.py populate_gadm /path/to/gpkg_file
$ python manage.py populate_gadm /path/to/gpkg_file
Loading

0 comments on commit fb4573c

Please sign in to comment.