From 688d8b1f829670d3292a844a4c8310c38f3f0306 Mon Sep 17 00:00:00 2001 From: Raymond Wiker Date: Fri, 18 Oct 2024 14:10:46 +0200 Subject: [PATCH] Feature/documentation update (#349) * Fix for async iterators. * Added documentation. Changed return value for some of the aggregation so that the value is availble directly, instead of through the 'value'/'values' key of a dict. * Updated documentation (explorer.rst). --------- Co-authored-by: Raymond Wiker --- docs/explorer.rst | 389 +++++++++++------- src/fmu/sumo/explorer/objects/_metrics.py | 112 ++++- .../sumo/explorer/objects/_search_context.py | 16 +- 3 files changed, 347 insertions(+), 170 deletions(-) diff --git a/docs/explorer.rst b/docs/explorer.rst index b4c542d..da53c0e 100644 --- a/docs/explorer.rst +++ b/docs/explorer.rst @@ -28,13 +28,13 @@ Run tests pytest tests/ -Api Reference +Api Reference ------------- - `API reference `_ .. warning:: - OpenVDS does not publish builds for MacOS nor for Python version 3.12. You can still use the + OpenVDS does not publish builds for MacOS nor for Python version 3.12. You can still use the Explorer without OpenVDS, but some Cube methods will not work. Usage and examples @@ -45,9 +45,9 @@ Initializing an Explorer object We establish a connection to Sumo by initializing an Explorer object. This object will handle authentication and can be used to retrieve cases and case data. -.. code-block:: +.. code-block:: python - from fmu.sumo.explorer import Explorer + from fmu.sumo.explorer import Explorer sumo = Explorer() @@ -57,48 +57,99 @@ Authentication If you have not used the `Explorer` before and no access token is found in your system, a login form will open in your web browser. It is also possible to provide the `Explorer` with an existing token to use for authentication, in this case you will not be prompted to login. -.. code-block:: +.. code-block:: python - from fmu.sumo.explorer import Explorer + from fmu.sumo.explorer import Explorer USER_TOKEN="123456789" sumo = Explorer(token=USER_TOKEN) This assumes the `Explorer` is being used within a system which handles authentication and queries Sumo on a users behalf. +The SearchContext class +^^^^^^^^^^^^^^^^^^^^^^^ +This is a class that encapsulates a set of search criteria, in the +form of elements that either must match, or must not match. It is used +as a base class for certain other classes in `fmu.sumo.explorer`: + +* `Explorer` objects are essentially empty search contexts; the only + filters are related to who the user is, and what documents he should + be allowed to see. + +* `Case` objects are search contexts that match objects in a specific + case. + +* `Iteration` objects are search contexts that match objects in a + specific iteration. + +* `Realization` objects are search contexts that match objects in a + specific realization. + +The `.filter()` method on instances of `SearchContext` yields new +instances of `SearchContexts`, with additional restrictions. For a +full list of filter parameters, try `help(exp.filter)`: + +.. code-block:: python + + from fmu.sumo.explorer import Explorer + sumo = Explorer() + help(explorer.filter) + +Note that this full set of filters may not make sense for all objects; +for instance, `content` will not be useful for `Case` objects. + +There are shortcut methods for narrowing to specific object classes: +`cases`, `surfaces`, `tables`, `cubes`, `polygons` and +`dictionaries`. These correspond to `.filter(cls="surface")` and so +on. + +For a `SearchContext` it is also possible to extract all possible +value for specific properties. These properties include + +* `names` +* `tagnames` +* `dataformats` +* `aggregations` +* `stages` +* `vertical_domains` +* `contents` +* `columns` +* `statuses` +* `users` + Finding a case ^^^^^^^^^^^^^^ The `Explorer` has a property called `cases` which represents all cases you have access to in Sumo: -.. code-block:: +.. code-block:: python - from fmu.sumo.explorer import Explorer + from fmu.sumo.explorer import Explorer - sumo = Explorer() + sumo = Explorer() - cases = sumo.cases + cases = sumo.cases -The `cases` property is a `CaseCollection` object and acts as a list of cases. -We can use the `filter` method to apply filters to the case collection which will return a new filtered `CaseCollection` instance: +The `cases` property is a `SearchContext` that matches FMU cases. We +can use the `.filter()` method to narrow down the set of cases matched: -.. code-block:: +.. code-block:: python - from fmu.sumo.explorer import Explorer + from fmu.sumo.explorer import Explorer - sumo = Explorer() + sumo = Explorer() cases = sumo.cases cases = cases.filter(user="peesv") In this example we're getting all the cases belonging to user `peesv`. -The resulting `CaseCollection` is iterable: +The resulting `SearchContext` is iterable: -.. code-block:: +.. code-block:: python - from fmu.sumo.explorer import Explorer + from fmu.sumo.explorer import Explorer - sumo = Explorer() + sumo = Explorer() cases = sumo.cases cases = cases.filter(user="peesv") @@ -108,22 +159,23 @@ The resulting `CaseCollection` is iterable: print(case.name) print(case.status) -We can use the filter method to filter on the following properties: +We can use the `.filter()` method to filter on the following properties for +cases: -* uuid -* name -* status -* user -* asset -* field +* `uuid` +* `name` +* `status` +* `user` +* `asset` +* `field` -Example: finding all official cases uploaded by `peesv` in Drogon: +Example: finding all official cases uploaded by `peesv` in Drogon: -.. code-block:: +.. code-block:: python - from fmu.sumo.explorer import Explorer + from fmu.sumo.explorer import Explorer - sumo = Explorer() + sumo = Explorer() cases = sumo.cases cases = cases.filter( @@ -133,15 +185,16 @@ Example: finding all official cases uploaded by `peesv` in Drogon: ) -The `CaseCollection` has properties which lets us find available filter values. +Since `cases` is a `SearchContext`, we can also determine the +full set of values present for specific properties. -Example: finding assets +Example: finding assets -.. code-block:: +.. code-block:: python - from fmu.sumo.explorer import Explorer + from fmu.sumo.explorer import Explorer - sumo = Explorer() + sumo = Explorer() cases = sumo.cases cases = cases.filter( @@ -151,14 +204,15 @@ Example: finding assets assets = cases.assets -The `CaseCollection.assets` property gives us a list of unique values for the asset property in our list of cases. -We can now use this information to apply an asset filter: +The `.assets` property gives us a list of unique values for the asset +property in our list of cases. We can now use this information to +apply an asset filter: -.. code-block:: +.. code-block:: python - from fmu.sumo.explorer import Explorer + from fmu.sumo.explorer import Explorer - sumo = Explorer() + sumo = Explorer() cases = sumo.cases cases = cases.filter( @@ -174,19 +228,19 @@ We can now use this information to apply an asset filter: We can retrieve list of unique values for the following properties: -* names -* statuses -* users -* assets -* fields +* `names` +* `statuses` +* `users` +* `assets` +* `fields` You can also use a case `uuid` to get a `Case` object: -.. code-block:: +.. code-block:: python - from fmu.sumo.explorer import Explorer + from fmu.sumo.explorer import Explorer - sumo = Explorer() + sumo = Explorer() my_case = sumo.get_case_by_uuid("1234567") @@ -199,13 +253,13 @@ that match specific criteria. For example, if we define ``data.time.t0.label=base`` and ``data.time.t1.label=monitor``, we can use the ``has`` filter to find cases that have ``4d-seismic`` data: -.. code-block:: +.. code-block:: python - from fmu.sumo.explorer import Explorer, Filters + from fmu.sumo.explorer import Explorer, Filters - exp = Explorer(env="prod") + exp = Explorer(env="prod") - cases = exp.cases.filter(asset="Heidrun", has=Filters.seismic4d) + cases = exp.cases.filter(asset="Heidrun", has=Filters.seismic4d) In this case, we have a predefined filter for ``4d-seismic``, exposed thorugh ``fmu.sumo.explorer.Filters``. There is no magic involved; any @@ -215,58 +269,61 @@ for them to be added to ``fmu.sumo.explorer.Filters``. It is also possible to chain filters. The previous example could also be handled by -.. code-block:: - cases = exp.cases.filter(asset="Heidrun", - has={"term":{"data.content.keyword": "seismic"}})\ - .filter(has={"term":{"data.time.t0.label.keyword":"base"}})\ - .filter(has={"term":{"data.time.t1.label.keyword":"monitor"}}) +.. code-block:: python + + cases = exp.cases.filter(asset="Heidrun", + has={"term":{"data.content.keyword": "seismic"}})\ + .filter(has={"term":{"data.time.t0.label.keyword":"base"}})\ + .filter(has={"term":{"data.time.t1.label.keyword":"monitor"}}) Browsing data in a case ^^^^^^^^^^^^^^^^^^^^^^^ The `Case` object has properties for accessing different data types: -* surfaces -* polygons -* tables +* `surfaces` +* `polygons` +* `tables` +* `cubes` -Example: get case surfaces +Example: get case surfaces -.. code-block:: +.. code-block:: python - from fmu.sumo.explorer import Explorer + from fmu.sumo.explorer import Explorer - sumo = Explorer() + sumo = Explorer() case = sumo.get_case_by_uuid("1234567") surfaces = case.surfaces -The `SurfaceCollection` object has a filter method and properties for getting filter values, similar to `CaseCollection`: +The value of `surfaces` is another `SearchContext`, so the `.filter()` +method can be used to further refine the set of matching objects: -.. code-block:: +.. code-block:: python - from fmu.sumo.explorer import Explorer + from fmu.sumo.explorer import Explorer - sumo = Explorer() + sumo = Explorer() case = sumo.get_case_by_uuid("1234567") surfaces = case.surfaces.filter(iteration="iter-0") contents = surfaces.contents - + surfaces = surfaces.filter( content=contents[0] ) - names = surfaces.names + names = surfaces.names surfaces = surfaces.filter( name=names[0] ) - tagnames = surfaces.tagnames + tagnames = surfaces.tagnames surfaces = surfaces.filter( tagname=tagnames[0] @@ -276,68 +333,69 @@ The `SurfaceCollection` object has a filter method and properties for getting fi vertical_domain = surfaces.filter(vertical_domain = "depth") -The `SurfaceCollection.filter` method takes the following parameters: +For a `SearchContext` that matches `surface`, objects the following +are useful parameters to `.filter()`: -* uuid -* name -* tagname -* content -* dataformat -* iteration -* realization -* aggregation -* stage -* time -* stratigraphic -* vertical_domain +* `uuid` +* `name` +* `tagname` +* `content` +* `dataformat` +* `iteration` +* `realization` +* `aggregation` +* `stage` +* `time` +* `stratigraphic` +* `vertical_domain` All parameters support a single value, a list of values or a `boolean` value. -Example: get aggregated surfaces +Example: get aggregated surfaces -.. code-block:: +.. code-block:: python - from fmu.sumo.explorer import Explorer + from fmu.sumo.explorer import Explorer - sumo = Explorer() + sumo = Explorer() case = sumo.get_case_by_uuid("1234567") # get mean aggregated surfaces surfaces = case.surfaces.filter(aggregation="mean") - # get min, max and mean aggregated surfaces + # get min, max and mean aggregated surfaces surfaces = case.surfaces.filter(aggregation=["min", "max", "mean"]) # get all aggregated surfaces surfaces = case.surfaces.filter(aggregation=True) - # get names of aggregated surfaces + # get names of aggregated surfaces names = surfaces.names We can get list of filter values for the following properties: -* names -* contents -* tagnames -* dataformats -* iterations -* realizations -* aggregations -* stages -* timestamps -* intervals -* stratigraphic -* vertical_domain +* `names` +* `contents` +* `tagnames` +* `dataformats` +* `iterations` +* `realizations` +* `aggregations` +* `stages` +* `timestamps` +* `intervals` +* `stratigraphic` +* `vertical_domain` Once we have a `Surface` object we can get surface metadata using properties: -.. code-block:: +.. code-block:: python - from fmu.sumo.explorer import Explorer + from fmu.sumo.explorer import Explorer - sumo = Explorer() + sumo = Explorer() case = sumo.get_case_by_uuid("1234567") @@ -351,35 +409,35 @@ Once we have a `Surface` object we can get surface metadata using properties: print(surface.stratigraphic) print(surface.vertical_domain) -We can get the surface binary data as a `BytesIO` object using the `blob` property. +We can get the surface binary data as a `BytesIO` object using the `blob` property. The `to_regular_surface` method returns the surface as a `xtgeo.RegularSurface` object. -.. code-block:: +.. code-block:: python - from fmu.sumo.explorer import Explorer + from fmu.sumo.explorer import Explorer - sumo = Explorer() + sumo = Explorer() case = sumo.get_case_by_uuid("1234567") surface = case.surfaces[0] # get blob - blob = surface.blob + blob = surface.blob # get xtgeo.RegularSurface - reg_surf = surface.to_regular_surface() + reg_surf = surface.to_regular_surface() reg_surf.quickplot() -If we know the `uuid` of the surface we want to work with we can get it directly from the `Explorer` object: +If we know the `uuid` of the surface we want to work with we can get it directly from the `Explorer` object: -.. code-block:: +.. code-block:: python - from fmu.sumo.explorer import Explorer + from fmu.sumo.explorer import Explorer - sumo = Explorer() + sumo = Explorer() surface = sumo.get_surface_by_uuid("1234567") @@ -389,30 +447,16 @@ If we know the `uuid` of the surface we want to work with we can get it directly Pagination: Iterating over large resultsets ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -If you want to iterate/paginate over large number of results you _must_ use the -`keep_alive` parameter to avoid errors and get an exact and complete list of -objects. The `keep_alive` parameter creates a 'snapshot' in the backend, -which ensures consistent results for you, but at the same time using some -resources on the server-side. To avoid server-side problems, the `keep_alive` -parameter should be as short as possible, but still large enough for you -(or your users) to iterate over the data-set. If you are not sure what to -use, start with 15m, i.e. 15 minutes. This means that you expect that there -will be a maximum of 15 minutes between each time fmu-sumo calls the back-end, -so not the complete time period of a user session. +Previously, it was necessary to use a `Point-In-Time` mechanism when +iterating over large result sets; this was enabled by specifying a +`keep_alive` parameter in the `Explorer` constructor call. This is no +longer necessary, as it is handled internally and transparently in +`SearchContext`. -The 'snapshot' will of course not reflect any updates to data performed -simultaneously by you or anyone else. +The following was necessary to iterate over a large collection of +surfaces: -For how large result-sets should you use the `keep_alive` parameter? As of -early 2024, the `Explorer` uses 500 objects pagination, so you should use -the `keep_alive` parameter for all result-sets larger than 500 objects. - -The 'snapshot' works in exactly the same way for async and sync methods. - -Here is example code iterating over a large result-set using the `keep_alive` -parameter: - -.. code-block:: python +.. code-block:: python import asyncio @@ -436,17 +480,39 @@ parameter: asyncio.run(main()) +This can now be reduced to: + +.. code-block:: python + + from fmu.sumo.explorer import Explorer + + explorer = Explorer(env="prod") + case = explorer.get_case_by_uuid("dec73fae-bb11-41f2-be37-73ba005c4967") + + surface_collection: SurfaceCollection = case.surfaces.filter( + iteration="iter-1", + ) + + async def main(): + count = await surface_collection.length_async() + async for surf in surface_collection: + print(surf.name) + # Do something with surf + + asyncio.run(main()) + + Time filtering ^^^^^^^^^^^^^^ The `TimeFilter` class lets us construct time filters to be used in the `SurfaceCollection.filter` method: Example: get surfaces with timestamp in a specific range -.. code-block:: +.. code-block:: python from fmu.sumo.explorer import Explorer, TimeFilter, TimeType - sumo = Explorer() + sumo = Explorer() case = sumo.get_case_by_uuid("1234567") @@ -459,13 +525,13 @@ Example: get surfaces with timestamp in a specific range surfaces = case.surfaces.filter(time=time) -Example: get surfaces with exact interval +Example: get surfaces with exact interval -.. code-block:: +.. code-block:: python from fmu.sumo.explorer import Explorer, TimeFilter, TimeType - sumo = Explorer() + sumo = Explorer() case = sumo.get_case_by_uuid("1234567") @@ -481,11 +547,11 @@ Example: get surfaces with exact interval Time filters can also be used to get all surfaces that has a specific type of time data. -.. code-block:: +.. code-block:: python from fmu.sumo.explorer import Explorer, TimeFilter, TimeType - sumo = Explorer() + sumo = Explorer() case = sumo.get_case_by_uuid("1234567") @@ -513,13 +579,14 @@ Time filters can also be used to get all surfaces that has a specific type of ti Performing aggregations ^^^^^^^^^^^^^^^^^^^^^^^ -The `SurfaceCollection` class can be used to do on-demand surface aggregations. +The `SearchContext` class can be used to do on-demand aggregations; +this is currently implemented for `surfaces` and `tables`. -.. code-block:: +.. code-block:: python - from fmu.sumo.explorer import Explorer + from fmu.sumo.explorer import Explorer - sumo = Explorer() + sumo = Explorer() case = sumo.get_case_by_uuid("1234567") @@ -535,10 +602,30 @@ The `SurfaceCollection` class can be used to do on-demand surface aggregations. mean = surfaces.mean() min = surfaces.min() - max = surfaces.max() + max = surfaces.max() p10 = surfaces.p10() p10.quickplot() -In this example we perform aggregations on all realized instance of the surface `Valysar Fm. (FACIES_Fraction_Channel)` in iteration 0. -The aggregation methods return `xtgeo.RegularSurface` objects. + +In this example we perform aggregations on all realized instance of +the surface `Valysar Fm. (FACIES_Fraction_Channel)` in +iteration 0. The aggregation methods return `xtgeo.RegularSurface` +objects. + +.. note:: The methods `.mean()`, `.min()`, etc are deprecated; the + preferred way is to use the method `.aggregate()` with the parameter + `operation`; e.g, `surfaces.aggregate(operation="mean")`. + +For `table` aggregation it is also necessary to specify the columns you want: + +.. code-block:: python + + from fmu.sumo.explorer import Explorer + + sumo = Explorer(env="dev") + case = sumo.get_case_by_uuid("5b558daf-61c5-400a-9aa2-c602bb471a16") + tables = case.tables.filter(iteration="iter-0", realization=True, + tagname=summary, column="FOPT") + agg = tables.aggregate(operation="collection", columns=["FOPT"]) + agg.to_pandas() diff --git a/src/fmu/sumo/explorer/objects/_metrics.py b/src/fmu/sumo/explorer/objects/_metrics.py index c8bda8f..8d0b943 100644 --- a/src/fmu/sumo/explorer/objects/_metrics.py +++ b/src/fmu/sumo/explorer/objects/_metrics.py @@ -10,35 +10,125 @@ def _aggregate(self, op, **kwargs): "size": 0} res = self._search_context._sumo.post("/search", json=qdoc).json() return res["aggregations"]["agg"] - + def min(self, field): - return self._aggregate("min", field=field) + """Find the minimum value for the specified property across the + current set of objects. + + Arguments: + - field (str): the name of a property in the metadata. + + Returns: + The minimum value. + + """ + return self._aggregate("min", field=field)["value"] def max(self, field): - return self._aggregate("max", field=field) + """Find the maximum value for the specified property across the + current set of objects. + + Arguments: + - field (str): the name of a property in the metadata. + + Returns: + The maximum value. + + """ + return self._aggregate("max", field=field)["value"] def avg(self, field): - return self._aggregate("avg", field=field) + """Find the average value for the specified property across the + current set of objects. + + Arguments: + - field (str): the name of a property in the metadata. + + Returns: + The average value. + + """ + return self._aggregate("avg", field=field)["value"] def sum(self, field): - return self._aggregate("sum", field=field) + """Find the sumo of all values for the specified property across + the current set of objects. + + Arguments: + - field (str): the name of a property in the metadata. + + Returns: + The sum of all values. + + """ + return self._aggregate("sum", field=field)["value"] def value_count(self, field): - return self._aggregate("value_count", field=field) + """Find the count of values for the specified property across the + current set of objects. + + Arguments: + - field (str): the name of a property in the metadata. + + Returns: + The total number of values. + + """ + return self._aggregate("value_count", field=field)["value"] def cardinality(self, field): - return self._aggregate("cardinality", field=field) + """Find the count of distinct values for the specified property + across the current set of objects. + + Arguments: + - field (str): the name of a property in the metadata. + + Returns: + The number of distinct values. + + Note: The value returned is approximate. + + """ + return self._aggregate("cardinality", field=field)["value"] def stats(self, field): + """Compute a basic set of statistics of the values for the specified + property across the current set of objects. + + Arguments: + - field (str): the name of a property in the metadata. + + Returns: + A dictionary of statistical metrics. + + """ return self._aggregate("stats", field=field) def extended_stats(self, field): + """Compute an extended set of statistics of the values for the + specified property across the current set of objects. + + Arguments: + - field (str): the name of a property in the metadata. + + Returns: + A dictionary of statistical metrics. + + """ return self._aggregate("extended_stats", field=field) def percentiles(self, field, percents=None): - return self._aggregate("percentiles", field=field, - percents=percents) + """Find the values at specific percentiles for the specified + property across the current set of objects. + + Arguments: + - field (str): the name of a property in the metadata. + - percents ([number]): list of percent values. If omitted, uses + a default set of values. - + Returns: + A dictionary of percentiles. - + """ + return self._aggregate("percentiles", field=field, + percents=percents)["values"] diff --git a/src/fmu/sumo/explorer/objects/_search_context.py b/src/fmu/sumo/explorer/objects/_search_context.py index 4d54663..c264ccd 100644 --- a/src/fmu/sumo/explorer/objects/_search_context.py +++ b/src/fmu/sumo/explorer/objects/_search_context.py @@ -432,12 +432,12 @@ async def uuids_async(self): def __iter__(self): self._curr_index = 0 - if self._hits is None: - self._hits = self._search_all() - pass return self def __next__(self): + if self._hits is None: + self._hits = self._search_all() + pass if self._curr_index < len(self._hits): uuid = self._hits[self._curr_index] self._maybe_prefetch(self._curr_index) @@ -446,21 +446,21 @@ def __next__(self): else: raise StopIteration - async def __aiter__(self): + def __aiter__(self): self._curr_index = 0 - if self._hits is None: - self._hits = await self._search_all_async() - pass return self async def __anext__(self): + if self._hits is None: + self._hits = await self._search_all_async() + pass if self._curr_index < len(self._hits): uuid = self._hits[self._curr_index] await self._maybe_prefetch_async(self._curr_index) self._curr_index += 1 return await self.get_object_async(uuid) else: - raise StopIteration + raise StopAsyncIteration def __getitem__(self, index): if self._hits is None: