From a851ae991173463479fc5a0fa2e5b7134ad61ca2 Mon Sep 17 00:00:00 2001 From: Matthias Schaub Date: Tue, 12 Jul 2022 17:19:49 +0200 Subject: [PATCH] wip --- workers/ohsome_quality_analyst/api/api.py | 9 -- .../api/request_models.py | 47 +++++++---- .../ohsome_quality_analyst/base/indicator.py | 23 +++++- workers/ohsome_quality_analyst/cli/cli.py | 13 +-- workers/ohsome_quality_analyst/definitions.py | 82 ++++--------------- .../building_completeness/indicator.py | 5 ++ .../building_completeness/metadata.yaml | 2 + .../indicators/currentness/metadata.yaml | 12 ++- .../metadata.yaml | 4 +- .../ghs_pop_comparison_roads/metadata.yaml | 2 + .../mapping_saturation/metadata.yaml | 7 ++ .../indicators/minimal/indicator.py | 4 +- .../indicators/minimal/metadata.yaml | 8 ++ .../indicators/poi_density/metadata.yaml | 2 + .../indicators/tags_ratio/metadata.yaml | 3 + workers/ohsome_quality_analyst/oqt.py | 5 +- .../reports/minimal/metadata.yaml | 2 +- 17 files changed, 115 insertions(+), 115 deletions(-) diff --git a/workers/ohsome_quality_analyst/api/api.py b/workers/ohsome_quality_analyst/api/api.py index 1757e2fe6..7c69a4b84 100644 --- a/workers/ohsome_quality_analyst/api/api.py +++ b/workers/ohsome_quality_analyst/api/api.py @@ -32,7 +32,6 @@ from ohsome_quality_analyst.config import configure_logging from ohsome_quality_analyst.definitions import ( ATTRIBUTION_URL, - INDICATOR_LAYER, get_attribution, get_dataset_names, get_fid_fields, @@ -280,14 +279,6 @@ async def get_available_regions(asGeoJSON: bool = False): return response -@app.get("/indicator-layer-combinations") -async def get_indicator_layer_combinations(): - """Get names of available indicator-layer combinations.""" - response = empty_api_response() - response["result"] = INDICATOR_LAYER - return response - - @app.get("/indicators") async def indicator_names(): """Get names of available indicators.""" diff --git a/workers/ohsome_quality_analyst/api/request_models.py b/workers/ohsome_quality_analyst/api/request_models.py index 527af57d3..d3f521d77 100644 --- a/workers/ohsome_quality_analyst/api/request_models.py +++ b/workers/ohsome_quality_analyst/api/request_models.py @@ -8,7 +8,7 @@ """ from enum import Enum -from typing import Optional, Union +from typing import Optional, Tuple, Union import pydantic from geojson import Feature, FeatureCollection @@ -16,12 +16,12 @@ from ohsome_quality_analyst.base.layer import LayerData from ohsome_quality_analyst.definitions import ( - INDICATOR_LAYER, get_dataset_names, get_fid_fields, get_indicator_names, get_layer_keys, get_report_names, + get_valid_layers, ) from ohsome_quality_analyst.utils.helper import loads_geojson, snake_to_lower_camel @@ -36,6 +36,14 @@ class BaseIndicator(BaseModel): name: IndicatorEnum = pydantic.Field( ..., title="Indicator Name", example="GhsPopComparisonBuildings" ) + threshholds: Optional[ + Tuple[ + Union[float, str], + Union[str, float], + Union[str, float], + Union[str, float], + ] + ] = None include_svg: bool = False include_html: bool = False include_data: bool = False @@ -48,6 +56,17 @@ class Config: allow_mutation = False extra = "forbid" + @pydantic.root_validator + @classmethod + def validate_thresholds(cls, values): + if values["threshholds"] is not None and values["name"] != "Currentness": + raise ValueError( + "Setting custom threshholds is only supported for the Currentness " + + "Indicator.", + ) + else: + return values + class BaseReport(BaseModel): name: ReportEnum = pydantic.Field( @@ -131,13 +150,13 @@ class IndicatorBpolys(BaseIndicator, BaseLayerName, BaseBpolys): @pydantic.root_validator @classmethod def validate_indicator_layer(cls, values): - try: - indicator_layer = (values["name"].value, values["layer_key"].value) - except KeyError: - raise ValueError("An issue with the layer or indicator name occurred.") - if indicator_layer not in INDICATOR_LAYER: + indicator_key = values["name"].value + layer_key = values["layer_key"].value + if layer_key not in get_valid_layers(indicator_key): raise ValueError( - "Indicator layer combination is invalid: " + str(indicator_layer) + "Layer ({0}) is not available for indicator ({1})".format( + layer_key, indicator_key + ) ) else: return values @@ -147,13 +166,13 @@ class IndicatorDatabase(BaseIndicator, BaseLayerName, BaseDatabase): @pydantic.root_validator @classmethod def validate_indicator_layer(cls, values): - try: - indicator_layer = (values["name"].value, values["layer_key"].value) - except KeyError: - raise ValueError("An issue with the layer or indicator name occurred.") - if indicator_layer not in INDICATOR_LAYER: + indicator_key = values["name"].value + layer_key = values["layer_key"].value + if layer_key not in get_valid_layers(indicator_key): raise ValueError( - "Indicator layer combination is invalid: " + str(indicator_layer) + "Layer ({0}) is not available for indicator ({1})".format( + layer_key, indicator_key + ) ) else: return values diff --git a/workers/ohsome_quality_analyst/base/indicator.py b/workers/ohsome_quality_analyst/base/indicator.py index 3bfdcf12c..8ee05fb47 100644 --- a/workers/ohsome_quality_analyst/base/indicator.py +++ b/workers/ohsome_quality_analyst/base/indicator.py @@ -5,7 +5,7 @@ from dataclasses import asdict, dataclass from datetime import datetime, timezone from io import StringIO -from typing import Dict, Literal, Optional +from typing import Dict, Literal, Optional, Tuple import matplotlib.pyplot as plt from dacite import from_dict @@ -42,8 +42,8 @@ class Result: value is determined by the result classes value (float): The result value class_ (int): The result class. An integer between 1 and 5. It maps to the - result labels. This value is used by the reports to determine an overall - result. + result labels (1 -> red; 5 -> green). This value is used by the reports to + determine an overall result. description (str): The result description. svg (str): Figure of the result as SVG """ @@ -63,17 +63,32 @@ def label(self) -> Literal["green", "yellow", "red", "undefined"]: class BaseIndicator(metaclass=ABCMeta): - """The base class of every indicator.""" + """The base class of every indicator. + + Attributes: + thresholds (tuple): A tuple with four float values representing the thresholds + between the result classes. The first element is the threshold between the + result class 1 and 2, the second element is the threshold between the result + class 2 and 3 and so on. + """ def __init__( self, layer: Layer, feature: Feature, + thresholds: Optional[Tuple[float, float, float, float]] = None, ) -> None: self.layer: Layer = layer self.feature: Feature = feature + # setattr(object, key, value) could be used instead of relying on from_dict. metadata = get_metadata("indicators", type(self).__name__) + layer_thresholds = metadata.pop("layer-thresholds") + if thresholds is None: + # TODO: filter layer_thresholds + self.thresholds = layer_thresholds[layer.key] + else: + self.thresholds = thresholds self.metadata: Metadata = from_dict(data_class=Metadata, data=metadata) self.result: Result = Result( description=self.metadata.label_description["undefined"], diff --git a/workers/ohsome_quality_analyst/cli/cli.py b/workers/ohsome_quality_analyst/cli/cli.py index e5e05fc0f..13543a4ae 100644 --- a/workers/ohsome_quality_analyst/cli/cli.py +++ b/workers/ohsome_quality_analyst/cli/cli.py @@ -15,11 +15,7 @@ ) from ohsome_quality_analyst.cli import options from ohsome_quality_analyst.config import configure_logging, get_config_value -from ohsome_quality_analyst.definitions import ( - INDICATOR_LAYER, - load_layer_definitions, - load_metadata, -) +from ohsome_quality_analyst.definitions import load_layer_definitions, load_metadata from ohsome_quality_analyst.geodatabase import client as db_client from ohsome_quality_analyst.utils.helper import json_serialize, write_geojson @@ -93,13 +89,6 @@ def get_available_regions(): click.echo(format_row.format(region["ogc_fid"], region["name"])) -@cli.command("list-indicator-layer-combination") -def get_indicator_layer_combination(): - """List all possible indicator-layer-combinations.""" - for combination in INDICATOR_LAYER: - click.echo(combination) - - @cli.command("create-indicator") @cli_option(options.indicator_name) @cli_option(options.layer_key) diff --git a/workers/ohsome_quality_analyst/definitions.py b/workers/ohsome_quality_analyst/definitions.py index 49e667a48..54b2db184 100644 --- a/workers/ohsome_quality_analyst/definitions.py +++ b/workers/ohsome_quality_analyst/definitions.py @@ -1,10 +1,12 @@ """Global Variables and Functions.""" +from __future__ import annotations + import glob import logging import os from dataclasses import dataclass from types import MappingProxyType -from typing import Dict, List, Optional +from typing import Dict, List, Literal, Optional, Tuple import yaml @@ -57,63 +59,6 @@ class RasterDataset: ), ) -# Possible indicator layer combinations -INDICATOR_LAYER = ( - ("BuildingCompleteness", "building_area"), - ("GhsPopComparisonBuildings", "building_count"), - ("GhsPopComparisonRoads", "jrc_road_length"), - ("GhsPopComparisonRoads", "major_roads_length"), - ("MappingSaturation", "building_count"), - ("MappingSaturation", "major_roads_length"), - ("MappingSaturation", "amenities"), - ("MappingSaturation", "jrc_health_count"), - ("MappingSaturation", "jrc_mass_gathering_sites_count"), - ("MappingSaturation", "jrc_railway_length"), - ("MappingSaturation", "jrc_road_length"), - ("MappingSaturation", "jrc_education_count"), - ("MappingSaturation", "mapaction_settlements_count"), - ("MappingSaturation", "mapaction_major_roads_length"), - ("MappingSaturation", "mapaction_rail_length"), - ("MappingSaturation", "mapaction_lakes_area"), - ("MappingSaturation", "mapaction_rivers_length"), - ("MappingSaturation", "infrastructure_lines"), - ("MappingSaturation", "poi"), - ("MappingSaturation", "lulc"), - ("Currentness", "major_roads_count"), - ("Currentness", "building_count"), - ("Currentness", "amenities"), - ("Currentness", "jrc_health_count"), - ("Currentness", "jrc_education_count"), - ("Currentness", "jrc_road_count"), - ("Currentness", "jrc_railway_count"), - ("Currentness", "jrc_airport_count"), - ("Currentness", "jrc_water_treatment_plant_count"), - ("Currentness", "jrc_power_generation_plant_count"), - ("Currentness", "jrc_cultural_heritage_site_count"), - ("Currentness", "jrc_bridge_count"), - ("Currentness", "jrc_mass_gathering_sites_count"), - ("Currentness", "mapaction_settlements_count"), - ("Currentness", "mapaction_major_roads_length"), - ("Currentness", "mapaction_rail_length"), - ("Currentness", "mapaction_lakes_count"), - ("Currentness", "mapaction_rivers_length"), - ("Currentness", "infrastructure_lines"), - ("Currentness", "poi"), - ("Currentness", "lulc"), - ("PoiDensity", "poi"), - ("TagsRatio", "building_count"), - ("TagsRatio", "major_roads_length"), - ("TagsRatio", "jrc_health_count"), - ("TagsRatio", "jrc_education_count"), - ("TagsRatio", "jrc_road_length"), - ("TagsRatio", "jrc_airport_count"), - ("TagsRatio", "jrc_power_generation_plant_count"), - ("TagsRatio", "jrc_cultural_heritage_site_count"), - ("TagsRatio", "jrc_bridge_count"), - ("TagsRatio", "jrc_mass_gathering_sites_count"), - ("Minimal", "minimal"), -) - ATTRIBUTION_TEXTS = MappingProxyType( { "OSM": "© OpenStreetMap contributors", @@ -128,17 +73,16 @@ class RasterDataset: ) -def load_metadata(module_name: str) -> Dict: - """Read metadata of all indicators or reports from YAML files. +def load_metadata(module_name: Literal["indicators", "reports"]) -> Dict: + """Load metadata of all indicators or reports from YAML files. - Those text files are located in the directory of each indicator/report. + The YAML files are located in the directory of each individual indicator or report. - Args: - module_name: Either indicators or reports. Returns: - A Dict with the class names of the indicators/reports - as keys and metadata as values. + A dictionary with the indicator or report keys as directory keys and the content + of the YAML file (metadata) as values. """ + # TODO: Is this check needed if Literal is used in func declaration? if module_name != "indicators" and module_name != "reports": raise ValueError("module name value can only be 'indicators' or 'reports'.") @@ -275,11 +219,15 @@ def get_attribution(data_keys: list) -> str: return "; ".join([str(v) for v in filtered.values()]) +# TODO def get_valid_layers(indcator_name: str) -> tuple: """Get valid Indicator/Layer combination of an Indicator.""" - return tuple([tup[1] for tup in INDICATOR_LAYER if tup[0] == indcator_name]) + return tuple( + [tup[1] for tup in INDICATOR_LAYER_THRESHOLDS if tup[0] == indcator_name] + ) +# TODO def get_valid_indicators(layer_key: str) -> tuple: """Get valid Indicator/Layer combination of a Layer.""" - return tuple([tup[0] for tup in INDICATOR_LAYER if tup[1] == layer_key]) + return tuple([tup[0] for tup in INDICATOR_LAYER_THRESHOLDS if tup[1] == layer_key]) diff --git a/workers/ohsome_quality_analyst/indicators/building_completeness/indicator.py b/workers/ohsome_quality_analyst/indicators/building_completeness/indicator.py index 9177e63e0..5adc29153 100644 --- a/workers/ohsome_quality_analyst/indicators/building_completeness/indicator.py +++ b/workers/ohsome_quality_analyst/indicators/building_completeness/indicator.py @@ -2,6 +2,7 @@ import os from io import StringIO from string import Template +from typing import Optional, Tuple import dateutil.parser import geojson @@ -58,10 +59,14 @@ def __init__( self, layer: Layer, feature: Feature, + thresholds: Optional[Tuple[float, float, float, float]], ) -> None: + if thresholds is None: + thresholds = (0.2, 0.5, 0.8, 0.9) super().__init__( layer=layer, feature=feature, + thresholds=thresholds, ) self.model_name: str = "Random Forest Regressor" # Lists of elements per hexagonal cell diff --git a/workers/ohsome_quality_analyst/indicators/building_completeness/metadata.yaml b/workers/ohsome_quality_analyst/indicators/building_completeness/metadata.yaml index 9710d8e1e..2856c8843 100644 --- a/workers/ohsome_quality_analyst/indicators/building_completeness/metadata.yaml +++ b/workers/ohsome_quality_analyst/indicators/building_completeness/metadata.yaml @@ -23,3 +23,5 @@ BuildingCompleteness: average of the ratios per hex-cell between the building area mapped in OSM and the predicted building area is $completeness_ratio %. The weight is the predicted building area. + layer-thresholds: + - { layer: building_area, thresholds: [0.2, 0.5, 0.8, 0.9] } diff --git a/workers/ohsome_quality_analyst/indicators/currentness/metadata.yaml b/workers/ohsome_quality_analyst/indicators/currentness/metadata.yaml index 3b827991e..e03ac7ce0 100644 --- a/workers/ohsome_quality_analyst/indicators/currentness/metadata.yaml +++ b/workers/ohsome_quality_analyst/indicators/currentness/metadata.yaml @@ -19,7 +19,11 @@ Currentness: an extrinsic comparison to identify if this means that data quality is bad or if there is just nothing to map here. result_description: | - In the last $threshold_green years $green % of the elements were edited the last time. - In the period from $threshold_yellow_start to $threshold_yellow_end years ago $yellow % of the elements were edited the last time. - The remaining $red % were last edited more than $threshold_red years ago. - The median currentness of the $elements features ($layer_name) is $median_years year(s). + Over 50% of the $elements features ($layer_name) were edited $years. + layer-thresholds: + - { layer: amenities, thresholds: [ 0.2, null, 0.6, null] } + - { layer: building_count, thresholds: [ 0.2, null, 0.6, null] } + - { layer: major_roads_count, thresholds: [ 0.2, null, 0.6, null] } + - { layer: infrastructure_lines, thresholds: [ 0.2, null, 0.6, null] } + - { layer: poi, thresholds: [ 0.2, null, 0.6, null] } + - { layer: lulc, thresholds: [ 0.2, null, 0.6, null] } diff --git a/workers/ohsome_quality_analyst/indicators/ghs_pop_comparison_buildings/metadata.yaml b/workers/ohsome_quality_analyst/indicators/ghs_pop_comparison_buildings/metadata.yaml index 36d689f8c..b655ea660 100644 --- a/workers/ohsome_quality_analyst/indicators/ghs_pop_comparison_buildings/metadata.yaml +++ b/workers/ohsome_quality_analyst/indicators/ghs_pop_comparison_buildings/metadata.yaml @@ -23,4 +23,6 @@ GhsPopComparisonBuildings: $pop_count people living in an area of $area sqkm, which results in a population density $pop_count_per_sqkm of people per sqkm. - $feature_count_per_sqkm buildings per sqkm mapped. + $feature_count_per_sqkm buildings per sqkm mapped. + layer-thresholds: + - { layer: building_count, thresholds: [{ a:0.75 }, null, { a: 5.0 }, null] } diff --git a/workers/ohsome_quality_analyst/indicators/ghs_pop_comparison_roads/metadata.yaml b/workers/ohsome_quality_analyst/indicators/ghs_pop_comparison_roads/metadata.yaml index d1e7a4c5c..aec0b4f12 100644 --- a/workers/ohsome_quality_analyst/indicators/ghs_pop_comparison_roads/metadata.yaml +++ b/workers/ohsome_quality_analyst/indicators/ghs_pop_comparison_roads/metadata.yaml @@ -24,3 +24,5 @@ GhsPopComparisonRoads: $area sqkm, which results in a population density $pop_count_per_sqkm of people per sqkm. $feature_length_per_sqkm km of roads per sqkm mapped. + layer-thresholds: + - { layer: major_roads_length, thresholds: [{ a: 1000 }, null, { a: 500 }, null] } diff --git a/workers/ohsome_quality_analyst/indicators/mapping_saturation/metadata.yaml b/workers/ohsome_quality_analyst/indicators/mapping_saturation/metadata.yaml index 1e1dfe09d..6182d9fa0 100644 --- a/workers/ohsome_quality_analyst/indicators/mapping_saturation/metadata.yaml +++ b/workers/ohsome_quality_analyst/indicators/mapping_saturation/metadata.yaml @@ -15,3 +15,10 @@ MappingSaturation: Saturation could not be calculated. result_description: | The saturation of the last 3 years is $saturation%. + layer-thresholds: + - { layer: amenities, thresholds: [0.3, null, 0.97, null] } + - { layer: building_count, thresholds: [0.3, null, 0.97, null] } + - { layer: infrastructure_lines, thresholds: [0.3, null, 0.97, null] } + - { layer: lulc, thresholds: [0.3, null, 0.97, null] } + - { layer: major_roads_length, thresholds: [0.3, null, 0.97, null] } + - { layer: poi, thresholds: [0.3, null, 0.97, null] } diff --git a/workers/ohsome_quality_analyst/indicators/minimal/indicator.py b/workers/ohsome_quality_analyst/indicators/minimal/indicator.py index 118e7617f..9c9835bb0 100644 --- a/workers/ohsome_quality_analyst/indicators/minimal/indicator.py +++ b/workers/ohsome_quality_analyst/indicators/minimal/indicator.py @@ -10,8 +10,8 @@ class Minimal(BaseIndicator): - def __init__(self, layer: Layer, feature: Feature) -> None: - super().__init__(layer=layer, feature=feature) + def __init__(self, layer: Layer, feature: Feature, thresholds: tuple) -> None: + super().__init__(layer=layer, feature=feature, thresholds=thresholds) self.count = 0 async def preprocess(self) -> None: diff --git a/workers/ohsome_quality_analyst/indicators/minimal/metadata.yaml b/workers/ohsome_quality_analyst/indicators/minimal/metadata.yaml index 0dfc4bd8e..3037c6014 100644 --- a/workers/ohsome_quality_analyst/indicators/minimal/metadata.yaml +++ b/workers/ohsome_quality_analyst/indicators/minimal/metadata.yaml @@ -14,3 +14,11 @@ Minimal: The quality level could not be calculated for this indicator. result_description: | Some description of the result. + layer-thresholds: + # List of associative lists of valid layers and thresholds. + # A threshold is a list of four float values. + # The first element is the threshold between the result class 1 + # (maps to the label 'red'). + - { layer: minimal, thresholds: [0.2, 0.4, 0.6, 0.8] } + # Thresholds can also be functins parameters + # thresholds: [ {a: 0.2, b: 0.4,} { a: 0.6, b: 0.8 } ... ] diff --git a/workers/ohsome_quality_analyst/indicators/poi_density/metadata.yaml b/workers/ohsome_quality_analyst/indicators/poi_density/metadata.yaml index 86863da0d..c9da5bdba 100644 --- a/workers/ohsome_quality_analyst/indicators/poi_density/metadata.yaml +++ b/workers/ohsome_quality_analyst/indicators/poi_density/metadata.yaml @@ -22,3 +22,5 @@ PoiDensity: The density of landmarks (points of reference, e.g. waterbodies, supermarkets, churches, bus stops) is $result features per sqkm. + layer-thresholds: + - { layer: poi, thresholds: [10, null, null, 30] } diff --git a/workers/ohsome_quality_analyst/indicators/tags_ratio/metadata.yaml b/workers/ohsome_quality_analyst/indicators/tags_ratio/metadata.yaml index 6e7f27ac6..7a9f7f5b0 100644 --- a/workers/ohsome_quality_analyst/indicators/tags_ratio/metadata.yaml +++ b/workers/ohsome_quality_analyst/indicators/tags_ratio/metadata.yaml @@ -17,3 +17,6 @@ TagsRatio: result_description: | The ratio of the features (all: $all) compared to features with expected tags (matched: $matched) is $result. + layer-thresholds: + - { layer: building_count, thresholds: [ 0.25, null, 0.75, null] } + - { layer: major_roads_length, thresholds: [ 0.25, null, 0.75, null] } diff --git a/workers/ohsome_quality_analyst/oqt.py b/workers/ohsome_quality_analyst/oqt.py index b257a4f29..f6f6a33ba 100644 --- a/workers/ohsome_quality_analyst/oqt.py +++ b/workers/ohsome_quality_analyst/oqt.py @@ -183,6 +183,7 @@ async def _( IndicatorBpolys( name=name, layerKey=parameters.layer_key.value, + threshholds=parameters.threshholds, bpolys=feature, ) ) @@ -200,6 +201,7 @@ async def _( name = parameters.name.value layer: Layer = get_layer_definition(parameters.layer_key.value) feature = parameters.bpolys + threshholds = parameters.threshholds logging.info("Calculating Indicator for custom AOI ...") logging.info("Feature id: {0:4}".format(feature.get("id", 1))) @@ -207,7 +209,7 @@ async def _( logging.info("Layer name: {0:4}".format(layer.name)) indicator_class = name_to_class(class_type="indicator", name=name) - indicator = indicator_class(layer, feature) + indicator = indicator_class(layer, feature, threshholds) logging.info("Run preprocessing") await indicator.preprocess() @@ -364,6 +366,7 @@ async def create_all_indicators( elif indicator_name is not None and layer_key is not None: indicator_layer = [(indicator_name, layer_key)] else: + # TODO indicator_layer = INDICATOR_LAYER tasks: List[asyncio.Task] = [] diff --git a/workers/ohsome_quality_analyst/reports/minimal/metadata.yaml b/workers/ohsome_quality_analyst/reports/minimal/metadata.yaml index 5c32447d7..5977fa20d 100644 --- a/workers/ohsome_quality_analyst/reports/minimal/metadata.yaml +++ b/workers/ohsome_quality_analyst/reports/minimal/metadata.yaml @@ -2,7 +2,7 @@ Minimal: name: Minimal description: | - This report shows the quality for two indicators: + This report shows the quality for two indicators":" Mapping Saturation and Currentness. It's main function is to test the interactions between database, api and website.