-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #140 from developmentseed/feat/add-cmip6-demo
Feat/add cmip6 demo
- Loading branch information
Showing
4 changed files
with
364 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -106,3 +106,6 @@ cdk.out/ | |
node_modules/ | ||
|
||
.pgdata | ||
|
||
# Demo files to ignore | ||
demo/cmip6/CMIP6_daily_*stac_items.ndjson |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
{ | ||
"id": "CMIP6_daily_GISS-E2-1-G_tas", | ||
"stac_version": "1.0.0", | ||
"license": "not-provided", | ||
"title": "CMIP6 Daily Near-Surface Air Temperature from GISS-E2-1-G Model", | ||
"type": "Collection", | ||
"description": "CMIP6 Daily Near-Surface Air Temperature from GISS-E2-1-G Model", | ||
"links":[], | ||
"extent": { | ||
"spatial": { | ||
"bbox": [ | ||
[ | ||
-180, | ||
-90, | ||
180, | ||
90 | ||
] | ||
] | ||
}, | ||
"temporal": { | ||
"interval": [ | ||
[ | ||
"1950-01-01T00:00:00.000Z", | ||
"1951-12-31T00:00:00.000Z" | ||
] | ||
] | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,311 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "9de99346-8c2a-48bb-b79b-e28a4db099d8", | ||
"metadata": {}, | ||
"source": [ | ||
"# Generate CMIP STAC Items and Load them into a pgSTAC database\n", | ||
"\n", | ||
"This notebook walks through generating STAC items from [NEX GDDP CMIP6 COGs on AWS](https://aws.amazon.com/marketplace/pp/prodview-k6adk576fiwmm#resources).\n", | ||
"\n", | ||
"As-is it uses daily data from the `GISS-E2-1-G` model, the `tas` variable and loads data from 1950 and 1951. The bucket has other data available. It includes monthly aggregates, other models, other variables and more years. The scripts below can easily be modified to STAC-ify other data in the nex-gddp-cmip6-cog bucket." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "6f788363", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import boto3\n", | ||
"import fsspec\n", | ||
"import json\n", | ||
"from pystac import Catalog, Collection, Item, Asset, MediaType\n", | ||
"from datetime import datetime\n", | ||
"import rio_stac\n", | ||
"from pprint import pprint\n", | ||
"import concurrent.futures\n", | ||
"import threading" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "8e4cfbb8", | ||
"metadata": { | ||
"tags": [ | ||
"parameters" | ||
] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# Specify the CMIP model and variable to use\n", | ||
"model = \"GISS-E2-1-G\"\n", | ||
"variable = \"tas\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "762e1e50-46e6-4dab-8462-38d31060e202", | ||
"metadata": {}, | ||
"source": [ | ||
"## Discover the COG files on S3 using fsspec and `.glob`" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "cdaccd78", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"anon = True\n", | ||
"s3_path = f\"s3://nex-gddp-cmip6-cog/daily/{model}/historical/r1i1p1f2/{variable}/\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"id": "a7caab29", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"fs_read = fsspec.filesystem(\"s3\", anon=anon)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"id": "4936f757", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"23725 discovered from s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"file_paths = fs_read.glob(f\"{s3_path}*\")\n", | ||
"print(f\"{len(file_paths)} discovered from {s3_path}\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "78f991f3-8cde-44bd-a955-b3e5b9694320", | ||
"metadata": {}, | ||
"source": [ | ||
"## Subset the data so we don't process all historical data\n", | ||
"\n", | ||
"But you can if you want!" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"id": "999b0670", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Here we prepend the prefix 's3://', which points to AWS.\n", | ||
"subset_files = sorted([\"s3://\" + f for f in file_paths if \"_1950_\" in f or \"_1951_\" in f])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "3bae56f1-1ea6-4755-84b3-149666e84d3d", | ||
"metadata": {}, | ||
"source": [ | ||
"## Double check we discovered some files" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"id": "6af269dc", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Subseted data to files for 1950 and 1951. 730 files to process.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"if len(subset_files) == 0:\n", | ||
" raise Exception(f\"No files to process. Do COGs for the {model} model exist?\")\n", | ||
"else:\n", | ||
" print(f\"Subseted data to files for 1950 and 1951. {len(subset_files)} files to process.\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "ea59aceb-b80a-4166-a684-74de4230ac4a", | ||
"metadata": {}, | ||
"source": [ | ||
"## Setup the collection and items\n", | ||
"\n", | ||
"The collection is statically defined in a json file, but can be modified as desired. Then, iterate throug all the files in S3 and create STAC Item JSON using `rio_stac`. Write all the JSON to an `ndjson` file for inserting." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"id": "57dc1b5f", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"file_prefix = f\"CMIP6_daily_{model}_{variable}\"\n", | ||
"stac_items_file = f\"{file_prefix}_stac_items.ndjson\"\n", | ||
"collection_json = json.loads(open(f'{file_prefix}_collection.json').read())\n", | ||
"collection = Collection.from_dict(collection_json)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"id": "45771e88", | ||
"metadata": { | ||
"lines_to_next_cell": 1 | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# clear the ndjson items file\n", | ||
"with open(stac_items_file, 'w') as file:\n", | ||
" pass" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"id": "ececf9d5", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def process_item(s3_file, file, lock):\n", | ||
" print(f\"Processing {s3_file}\")\n", | ||
" filename = s3_file.split('/')[-1]\n", | ||
" year, month, day = filename.split('_')[-3:]\n", | ||
" day = day.replace('.tif', '')\n", | ||
" datetime_ = datetime.strptime(f'{year}{month}{day}', '%Y%m%d') \n", | ||
" # Create a new Item\n", | ||
" item = rio_stac.create_stac_item(\n", | ||
" id=filename,\n", | ||
" source=s3_file,\n", | ||
" collection=collection.id,\n", | ||
" input_datetime=datetime_,\n", | ||
" with_proj=True,\n", | ||
" with_raster=True,\n", | ||
" asset_name=\"data\",\n", | ||
" asset_roles=[\"data\"],\n", | ||
" asset_media_type=\"image/tiff; application=geotiff; profile=cloud-optimized\"\n", | ||
" )\n", | ||
" tiling_asset = Asset(\n", | ||
" href=s3_file,\n", | ||
" roles=['virtual', 'tiling'],\n", | ||
" title='tiling',\n", | ||
" description='Virtual asset for tiling',\n", | ||
" extra_fields={\n", | ||
" 'compose:rescale': [210, 330],\n", | ||
" 'compose:colormap_name': 'hot'\n", | ||
" }\n", | ||
" )\n", | ||
" item.assets['tiling'] = tiling_asset\n", | ||
" with lock:\n", | ||
" file.write(json.dumps(item.to_dict()) + '\\n')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "f38a5953-a195-4106-b172-26ba2bce9533", | ||
"metadata": {}, | ||
"source": [ | ||
"NOTE: This can take awhile if processing all 730 file which is why it is subset to only 2 files below, for demonstration purposes." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"id": "645d3ccb", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_01.tif\n", | ||
"Processing s3://nex-gddp-cmip6-cog/daily/GISS-E2-1-G/historical/r1i1p1f2/tas/tas_day_GISS-E2-1-G_historical_r1i1p1f2_gn_1950_01_02.tif\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"lock = threading.Lock()\n", | ||
"file = open(stac_items_file, 'a')\n", | ||
"with concurrent.futures.ThreadPoolExecutor() as executor:\n", | ||
" futures = [executor.submit(process_item, obj, file, lock) for obj in subset_files[0:2]]\n", | ||
" [future.result() for future in concurrent.futures.as_completed(futures)]\n", | ||
"file.close()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "8e062949-16c9-4a79-b2ee-1579f244d74f", | ||
"metadata": {}, | ||
"source": [ | ||
"# Final step - seed the database" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 16, | ||
"id": "6965e650-f89a-4c7d-9f41-11774a905b81", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"postgresql://postgres:password@localhost:5432/postgres\n", | ||
"Inserting collection from CMIP6_daily_GISS-E2-1-G_tas_collection.json\n", | ||
"Inserting items from CMIP6_daily_GISS-E2-1-G_tas_stac_items.ndjson\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"!./seed-db.sh {model} {variable}" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"jupytext": { | ||
"cell_metadata_filter": "-all", | ||
"formats": "ipynb,py", | ||
"main_language": "python" | ||
}, | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.12" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#!/bin/bash | ||
model="$1" | ||
variable="$2" | ||
|
||
collection_json_file="CMIP6_daily_${model}_${variable}_collection.json" | ||
items_json_file="CMIP6_daily_${model}_${variable}_stac_items.ndjson" | ||
|
||
if [ -z "$DATABASE_URL" ]; then | ||
username=postgres | ||
password=password | ||
host=localhost | ||
dbname=postgres | ||
port=5432 | ||
DATABASE_URL="postgresql://$username:$password@$host:$port/$dbname" | ||
fi | ||
|
||
echo $DATABASE_URL | ||
echo "Inserting collection from $collection_json_file" | ||
pypgstac load collections $collection_json_file --dsn $DATABASE_URL --method insert_ignore | ||
echo "Inserting items from $items_json_file" | ||
pypgstac load items $items_json_file --dsn $DATABASE_URL --method insert_ignore |