diff --git a/notebooks/rooki.ipynb b/notebooks/rooki.ipynb index fbc3097..0977036 100644 --- a/notebooks/rooki.ipynb +++ b/notebooks/rooki.ipynb @@ -2,12 +2,22 @@ "cells": [ { "cell_type": "markdown", - "id": "31690744", + "id": "931a4b84-bb67-44e4-aa91-30f3d8bcc529", "metadata": { "tags": [] }, "source": [ - "# Compute Demo: Use Rooki to access CMIP6 data\n", + "# Compute Demo: Use Rooki to access CMIP6 data" + ] + }, + { + "cell_type": "markdown", + "id": "81f6c01b-1e08-463d-90d5-b9e7be5a61ac", + "metadata": { + "tags": [] + }, + "source": [ + "## Overview\n", "\n", "[Rooki](https://github.com/roocs/rooki) is a Python client to interact with [Rook](https://github.com/roocs/rook) data subsetting service for climate model data. This service is used in the backend by the [European Copernicus Climate Data Store](https://cds.climate.copernicus.eu) to access the CMIP6 data pool. The Rook service is deployed for load-balancing at IPSL (Paris) and DKRZ (Hamburg). The CMIP6 data pool is shared with ESGF. The provided CMIP6 subset for Copernicus is synchronized at both sites. \n", "\n", @@ -15,22 +25,33 @@ "\n", "The operators can be called remotly using the [OGC Web Processing Service](https://ogcapi.ogc.org/processes/) (WPS) standard.\n", "\n", + "![rook 4 cds](https://github.com/atmodatcode/tgif_copernicus/raw/main/media/rook.png)\n", + "\n", "**ROOK**: **R**emote **O**perations **O**n **K**limadaten\n", "\n", "* Rook: https://github.com/roocs/rook\n", "* Rooki: https://github.com/roocs/rooki\n", "* Clisops: https://github.com/roocs/clisops\n", - "* Rook Presentation: https://github.com/cehbrecht/talk-rook-status-kickoff-meeting-2022/blob/main/Rook_C3S2_380_2022-02-11.pdf\n" + "* Rook Presentation: https://github.com/cehbrecht/talk-rook-status-kickoff-meeting-2022/blob/main/Rook_C3S2_380_2022-02-11.pdf" ] }, { "cell_type": "markdown", - "id": "dd77c5b2", - "metadata": {}, + "id": "31d3693d-4e01-4982-b1d0-dffcd2a13157", + "metadata": { + "tags": [] + }, "source": [ - "## Overview\n", + "## Prerequisites\n", + "\n", + "| Concepts | Importance | Notes |\n", + "| --- | --- | --- |\n", + "| [Intro to Xarray](https://foundations.projectpythia.org/core/xarray/xarray-intro.html) | Necessary | |\n", + "| [Understanding of NetCDF](https://foundations.projectpythia.org/core/data-formats/netcdf-cf.html) | Helpful | Familiarity with metadata structure |\n", + "| [Knowing OGC services](https://ogcapi.ogc.org/processes/) | Helpful | Understanding of the service interfaces |\n", "\n", - "![rook 4 cds](https://github.com/atmodatcode/tgif_copernicus/raw/main/media/rook.png)" + "\n", + "- **Time to learn**: 15 minutes" ] }, { @@ -59,7 +80,11 @@ "id": "d6ed87c2", "metadata": {}, "source": [ - "## Retrieve subset of CMIP6 data" + "## Retrieve subset of CMIP6 data\n", + "\n", + "The CMIP6 dataset is identified by a dataset-id. An intake catalog as available to lookup the available datasets:\n", + "\n", + "https://nbviewer.org/github/roocs/rooki/blob/master/notebooks/demo/demo-intake-catalog.ipynb" ] }, { @@ -93,7 +118,7 @@ "id": "f822b3c8", "metadata": {}, "source": [ - "## Open Dataset with xarray" + "### Open Dataset with xarray" ] }, { @@ -106,7 +131,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Downloading to /var/folders/5f/t661zdnd181ck1dv429s4p8r0000gn/T/metalink_bs7xsieh/tas_Amon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_20000116-20000116.nc.\n" + "Downloading to /var/folders/5f/t661zdnd181ck1dv429s4p8r0000gn/T/metalink_c868rf7f/tas_Amon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_20000116-20000116.nc.\n" ] }, { @@ -501,7 +526,7 @@ " variant_label: r1i1p1f1\n", " license: CMIP6 model data produced by MPI-M is licensed un...\n", " cmor_version: 3.5.0\n", - " tracking_id: hdl:21.14100/af75dd9f-d9c2-4e0e-a294-2bb0d5b740cf
array(['2000-01-16T12:00:00.000000000'], dtype='datetime64[ns]')
array([-39.740099, -38.805039, -37.86998 , -36.93492 , -35.99986 , -35.064799,\n", + " tracking_id: hdl:21.14100/af75dd9f-d9c2-4e0e-a294-2bb0d5b740cf
array(['2000-01-16T12:00:00.000000000'], dtype='datetime64[ns]')
array([-39.740099, -38.805039, -37.86998 , -36.93492 , -35.99986 , -35.064799,\n", " -34.129739, -33.194679, -32.259618, -31.324558, -30.389497, -29.454436,\n", " -28.519375, -27.584315, -26.649254, -25.714193, -24.779132, -23.84407 ,\n", " -22.909009, -21.973948, -21.038887, -20.103825, -19.168764, -18.233703,\n", @@ -522,7 +547,7 @@ " 61.246401, 62.181452, 63.116502, 64.051551, 64.986599, 65.921645,\n", " 66.856691, 67.791734, 68.726776, 69.661816, 70.596854, 71.531889,\n", " 72.466921, 73.401949, 74.336973, 75.271992, 76.207005, 77.142011,\n", - " 78.077007, 79.011992, 79.946962])
array([-30. , -29.0625, -28.125 , -27.1875, -26.25 , -25.3125, -24.375 ,\n", + " 78.077007, 79.011992, 79.946962])
array([-30. , -29.0625, -28.125 , -27.1875, -26.25 , -25.3125, -24.375 ,\n", " -23.4375, -22.5 , -21.5625, -20.625 , -19.6875, -18.75 , -17.8125,\n", " -16.875 , -15.9375, -15. , -14.0625, -13.125 , -12.1875, -11.25 ,\n", " -10.3125, -9.375 , -8.4375, -7.5 , -6.5625, -5.625 , -4.6875,\n", @@ -537,7 +562,7 @@ " 48.75 , 49.6875, 50.625 , 51.5625, 52.5 , 53.4375, 54.375 ,\n", " 55.3125, 56.25 , 57.1875, 58.125 , 59.0625, 60. , 60.9375,\n", " 61.875 , 62.8125, 63.75 , 64.6875, 65.625 , 66.5625, 67.5 ,\n", - " 68.4375, 69.375 ])
[1 values with dtype=float64]
[2 values with dtype=datetime64[ns]]
[258 values with dtype=float64]
[214 values with dtype=float64]
[13803 values with dtype=float32]
PandasIndex(DatetimeIndex(['2000-01-16 12:00:00'], dtype='datetime64[ns]', name='time', freq=None))
PandasIndex(Index([ -39.74009905729821, -38.805039385498155, -37.869979577336935,\n", + " 68.4375, 69.375 ])
[1 values with dtype=float64]
[2 values with dtype=datetime64[ns]]
[258 values with dtype=float64]
[214 values with dtype=float64]
[13803 values with dtype=float32]
PandasIndex(DatetimeIndex(['2000-01-16 12:00:00'], dtype='datetime64[ns]', name='time', freq=None))
PandasIndex(Index([ -39.74009905729821, -38.805039385498155, -37.869979577336935,\n", " -36.93491964067397, -35.99985958284597, -35.06479941071204,\n", " -34.12973913069453, -33.19467874881559, -32.259618270730414,\n", " -31.324557701757275,\n", @@ -546,12 +571,12 @@ " 74.336973173452, 75.2719921848602, 76.20700512086066,\n", " 77.14201065705359, 78.07700705430369, 79.0119919826722,\n", " 79.94696224738567],\n", - " dtype='float64', name='lat', length=129))
PandasIndex(Index([ -30.0, -29.0625, -28.125, -27.1875, -26.25, -25.3125, -24.375,\n", + " dtype='float64', name='lat', length=129))
PandasIndex(Index([ -30.0, -29.0625, -28.125, -27.1875, -26.25, -25.3125, -24.375,\n", " -23.4375, -22.5, -21.5625,\n", " ...\n", " 60.9375, 61.875, 62.8125, 63.75, 64.6875, 65.625, 66.5625,\n", " 67.5, 68.4375, 69.375],\n", - " dtype='float64', name='lon', length=107))
[1 values with dtype=float64]
array(['2000-01-16T12:00:00.000000000', '2000-02-15T12:00:00.000000000',\n", - " '2000-03-16T12:00:00.000000000'], dtype='datetime64[ns]')
[2 values with dtype=float64]
[2 values with dtype=float64]
[3 values with dtype=float64]
PandasIndex(DatetimeIndex(['2000-01-16 12:00:00', '2000-02-15 12:00:00',\n", + " tracking_id: hdl:21.14100/af75dd9f-d9c2-4e0e-a294-2bb0d5b740cf
[1 values with dtype=float64]
array(['2000-01-16T12:00:00.000000000', '2000-02-15T12:00:00.000000000',\n", + " '2000-03-16T12:00:00.000000000'], dtype='datetime64[ns]')
[2 values with dtype=float64]
[2 values with dtype=float64]
[3 values with dtype=float64]
PandasIndex(DatetimeIndex(['2000-01-16 12:00:00', '2000-02-15 12:00:00',\n", " '2000-03-16 12:00:00'],\n", - " dtype='datetime64[ns]', name='time', freq=None))