Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: topic specific thresholds #384

Draft
wants to merge 16 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions workers/ohsome_quality_analyst/api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
from ohsome_quality_analyst.config import configure_logging
from ohsome_quality_analyst.definitions import (
ATTRIBUTION_URL,
INDICATOR_LAYER,
get_attribution,
get_dataset_names,
get_fid_fields,
Expand Down Expand Up @@ -280,14 +279,6 @@ async def get_available_regions(asGeoJSON: bool = False):
return response


@app.get("/indicator-layer-combinations")
async def get_indicator_layer_combinations():
"""Get names of available indicator-layer combinations."""
response = empty_api_response()
response["result"] = INDICATOR_LAYER
return response


@app.get("/indicators")
async def indicator_names():
"""Get names of available indicators."""
Expand Down
41 changes: 33 additions & 8 deletions workers/ohsome_quality_analyst/api/request_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,20 @@
"""

from enum import Enum
from typing import Optional, Union
from typing import Optional, Tuple, Union

import pydantic
from geojson import Feature, FeatureCollection
from pydantic import BaseModel

from ohsome_quality_analyst.base.layer import LayerData
from ohsome_quality_analyst.definitions import (
INDICATOR_LAYER,
get_dataset_names,
get_fid_fields,
get_indicator_names,
get_layer_keys,
get_report_names,
get_valid_layers,
)
from ohsome_quality_analyst.utils.helper import loads_geojson, snake_to_lower_camel

Expand All @@ -36,6 +36,14 @@ class BaseIndicator(BaseModel):
name: IndicatorEnum = pydantic.Field(
..., title="Indicator Name", example="GhsPopComparisonBuildings"
)
thresholds: Optional[
Tuple[
Union[float, str],
Union[str, float],
Union[str, float],
Union[str, float],
]
]
include_svg: bool = False
include_html: bool = False
include_data: bool = False
Expand All @@ -48,6 +56,17 @@ class Config:
allow_mutation = False
extra = "forbid"

@pydantic.root_validator
@classmethod
def validate_thresholds(cls, values):
if values["thresholds"] is not None and values["name"] != "Currentness":
raise ValueError(
"Setting custom thresholds is only supported for the Currentness "
+ "Indicator.",
)
else:
return values


class BaseReport(BaseModel):
name: ReportEnum = pydantic.Field(
Expand Down Expand Up @@ -132,12 +151,15 @@ class IndicatorBpolys(BaseIndicator, BaseLayerName, BaseBpolys):
@classmethod
def validate_indicator_layer(cls, values):
try:
indicator_layer = (values["name"].value, values["layer_key"].value)
indicator_key = values["name"].value
layer_key = values["layer_key"].value
except KeyError:
raise ValueError("An issue with the layer or indicator name occurred.")
if indicator_layer not in INDICATOR_LAYER:
if layer_key not in get_valid_layers(indicator_key):
raise ValueError(
"Indicator layer combination is invalid: " + str(indicator_layer)
"Layer ({0}) is not available for indicator ({1})".format(
layer_key, indicator_key
)
)
else:
return values
Expand All @@ -148,12 +170,15 @@ class IndicatorDatabase(BaseIndicator, BaseLayerName, BaseDatabase):
@classmethod
def validate_indicator_layer(cls, values):
try:
indicator_layer = (values["name"].value, values["layer_key"].value)
indicator_key = values["name"].value
layer_key = values["layer_key"].value
except KeyError:
raise ValueError("An issue with the layer or indicator name occurred.")
if indicator_layer not in INDICATOR_LAYER:
if layer_key not in get_valid_layers(indicator_key):
raise ValueError(
"Indicator layer combination is invalid: " + str(indicator_layer)
"Layer ({0}) is not available for indicator ({1})".format(
layer_key, indicator_key
)
)
else:
return values
Expand Down
26 changes: 22 additions & 4 deletions workers/ohsome_quality_analyst/base/indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from dataclasses import asdict, dataclass
from datetime import datetime, timezone
from io import StringIO
from typing import Dict, Literal, Optional
from typing import Dict, Literal, Optional, Tuple

import matplotlib.pyplot as plt
from dacite import from_dict
Expand Down Expand Up @@ -42,8 +42,8 @@ class Result:
value is determined by the result classes
value (float): The result value
class_ (int): The result class. An integer between 1 and 5. It maps to the
result labels. This value is used by the reports to determine an overall
result.
result labels (1 -> red; 5 -> green). This value is used by the reports to
determine an overall result.
description (str): The result description.
svg (str): Figure of the result as SVG
"""
Expand All @@ -63,17 +63,35 @@ def label(self) -> Literal["green", "yellow", "red", "undefined"]:


class BaseIndicator(metaclass=ABCMeta):
"""The base class of every indicator."""
"""The base class of every indicator.

Attributes:
thresholds (tuple): A tuple with four float values representing the thresholds
between the result classes. The first element is the threshold between the
result class 1 and 2, the second element is the threshold between the result
class 2 and 3 and so on.
"""

def __init__(
self,
layer: Layer,
feature: Feature,
thresholds: Optional[Tuple[float, float, float, float]] = None,
) -> None:
self.layer: Layer = layer
self.feature: Feature = feature

# setattr(object, key, value) could be used instead of relying on from_dict.
metadata = get_metadata("indicators", type(self).__name__)

layer_thresholds = metadata.pop("layer-thresholds")
if thresholds is not None:
self.thresholds = thresholds
elif layer_thresholds[layer.key] is not None:
self.thresholds = layer_thresholds[layer.key]
else:
self.thresholds = layer_thresholds["default"]

self.metadata: Metadata = from_dict(data_class=Metadata, data=metadata)
self.result: Result = Result(
description=self.metadata.label_description["undefined"],
Expand Down
13 changes: 1 addition & 12 deletions workers/ohsome_quality_analyst/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,7 @@
)
from ohsome_quality_analyst.cli import options
from ohsome_quality_analyst.config import configure_logging, get_config_value
from ohsome_quality_analyst.definitions import (
INDICATOR_LAYER,
load_layer_definitions,
load_metadata,
)
from ohsome_quality_analyst.definitions import load_layer_definitions, load_metadata
from ohsome_quality_analyst.geodatabase import client as db_client
from ohsome_quality_analyst.utils.helper import json_serialize, write_geojson

Expand Down Expand Up @@ -93,13 +89,6 @@ def get_available_regions():
click.echo(format_row.format(region["ogc_fid"], region["name"]))


@cli.command("list-indicator-layer-combination")
def get_indicator_layer_combination():
"""List all possible indicator-layer-combinations."""
for combination in INDICATOR_LAYER:
click.echo(combination)


@cli.command("create-indicator")
@cli_option(options.indicator_name)
@cli_option(options.layer_key)
Expand Down
93 changes: 22 additions & 71 deletions workers/ohsome_quality_analyst/definitions.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
"""Global Variables and Functions."""
from __future__ import annotations

import glob
import logging
import os
from dataclasses import dataclass
from types import MappingProxyType
from typing import Dict, List, Optional
from typing import Dict, List, Literal, Optional

import yaml

Expand Down Expand Up @@ -57,63 +59,6 @@ class RasterDataset:
),
)

# Possible indicator layer combinations
INDICATOR_LAYER = (
("BuildingCompleteness", "building_area"),
("GhsPopComparisonBuildings", "building_count"),
("GhsPopComparisonRoads", "jrc_road_length"),
("GhsPopComparisonRoads", "major_roads_length"),
("MappingSaturation", "building_count"),
("MappingSaturation", "major_roads_length"),
("MappingSaturation", "amenities"),
("MappingSaturation", "jrc_health_count"),
("MappingSaturation", "jrc_mass_gathering_sites_count"),
("MappingSaturation", "jrc_railway_length"),
("MappingSaturation", "jrc_road_length"),
("MappingSaturation", "jrc_education_count"),
("MappingSaturation", "mapaction_settlements_count"),
("MappingSaturation", "mapaction_major_roads_length"),
("MappingSaturation", "mapaction_rail_length"),
("MappingSaturation", "mapaction_lakes_area"),
("MappingSaturation", "mapaction_rivers_length"),
("MappingSaturation", "infrastructure_lines"),
("MappingSaturation", "poi"),
("MappingSaturation", "lulc"),
("Currentness", "major_roads_count"),
("Currentness", "building_count"),
("Currentness", "amenities"),
("Currentness", "jrc_health_count"),
("Currentness", "jrc_education_count"),
("Currentness", "jrc_road_count"),
("Currentness", "jrc_railway_count"),
("Currentness", "jrc_airport_count"),
("Currentness", "jrc_water_treatment_plant_count"),
("Currentness", "jrc_power_generation_plant_count"),
("Currentness", "jrc_cultural_heritage_site_count"),
("Currentness", "jrc_bridge_count"),
("Currentness", "jrc_mass_gathering_sites_count"),
("Currentness", "mapaction_settlements_count"),
("Currentness", "mapaction_major_roads_length"),
("Currentness", "mapaction_rail_length"),
("Currentness", "mapaction_lakes_count"),
("Currentness", "mapaction_rivers_length"),
("Currentness", "infrastructure_lines"),
("Currentness", "poi"),
("Currentness", "lulc"),
("PoiDensity", "poi"),
("TagsRatio", "building_count"),
("TagsRatio", "major_roads_length"),
("TagsRatio", "jrc_health_count"),
("TagsRatio", "jrc_education_count"),
("TagsRatio", "jrc_road_length"),
("TagsRatio", "jrc_airport_count"),
("TagsRatio", "jrc_power_generation_plant_count"),
("TagsRatio", "jrc_cultural_heritage_site_count"),
("TagsRatio", "jrc_bridge_count"),
("TagsRatio", "jrc_mass_gathering_sites_count"),
("Minimal", "minimal"),
)

ATTRIBUTION_TEXTS = MappingProxyType(
{
"OSM": "© OpenStreetMap contributors",
Expand All @@ -128,17 +73,16 @@ class RasterDataset:
)


def load_metadata(module_name: str) -> Dict:
"""Read metadata of all indicators or reports from YAML files.
def load_metadata(module_name: Literal["indicators", "reports"]) -> Dict:
"""Load metadata of all indicators or reports from YAML files.

Those text files are located in the directory of each indicator/report.
The YAML files are located in the directory of each individual indicator or report.

Args:
module_name: Either indicators or reports.
Returns:
A Dict with the class names of the indicators/reports
as keys and metadata as values.
A dictionary with the indicator or report keys as directory keys and the content
of the YAML file (metadata) as values.
"""
# TODO: Is this check needed if Literal is used in func declaration?
if module_name != "indicators" and module_name != "reports":
raise ValueError("module name value can only be 'indicators' or 'reports'.")

Expand Down Expand Up @@ -275,11 +219,18 @@ def get_attribution(data_keys: list) -> str:
return "; ".join([str(v) for v in filtered.values()])


def get_valid_layers(indcator_name: str) -> tuple:
"""Get valid Indicator/Layer combination of an Indicator."""
return tuple([tup[1] for tup in INDICATOR_LAYER if tup[0] == indcator_name])
def get_valid_layers(indicator_name: str) -> list:
"""Get valid Indicator/Layer combination of an indicator."""
metadata = load_metadata("indicators")
layers = metadata[indicator_name]["layer-thresholds"].keys()
return [l for l in layers if l != "default"] # noqa: E741


def get_valid_indicators(layer_key: str) -> tuple:
"""Get valid Indicator/Layer combination of a Layer."""
return tuple([tup[0] for tup in INDICATOR_LAYER if tup[1] == layer_key])
def get_valid_indicators(layer_key: str) -> list:
"""Get valid Indicator/Layer combination of a layer."""
metadata = load_metadata("indicators")
valid_indicators = []
for indicator_key, metadata_ in metadata.items():
if any(layer_key in metadata_["layer-thresholds"].keys()):
valid_indicators.append(indicator_key)
return valid_indicators
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
from io import StringIO
from string import Template
from typing import Optional, Tuple

import dateutil.parser
import geojson
Expand Down Expand Up @@ -58,10 +59,14 @@ def __init__(
self,
layer: Layer,
feature: Feature,
thresholds: Optional[Tuple[float, float, float, float]] = None,
) -> None:
if thresholds is None:
thresholds = (0.2, 0.5, 0.8, 0.9)
super().__init__(
layer=layer,
feature=feature,
thresholds=thresholds,
)
self.model_name: str = "Random Forest Regressor"
# Lists of elements per hexagonal cell
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,6 @@ BuildingCompleteness:
average of the ratios per hex-cell between the building area mapped in OSM and the
predicted building area is $completeness_ratio %. The weight is the
predicted building area.
layer-thresholds:
default: [0.2, 0.5, 0.8, 0.9]
building_area: null
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
from io import StringIO
from string import Template
from typing import Optional, Tuple

import dateutil.parser
import geojson
Expand Down Expand Up @@ -33,12 +34,13 @@ def __init__(
self,
layer: Layer,
feature: geojson.Feature,
thresholds: Optional[Tuple[float, float, float, float]] = None,
) -> None:
super().__init__(layer=layer, feature=feature)
self.threshold_4 = 2
self.threshold_3 = 3
self.threshold_2 = 4
self.threshold_1 = 8
self.threshold_4 = self.thresholds[0]
self.threshold_3 = self.thresholds[1]
self.threshold_2 = self.thresholds[2]
self.threshold_1 = self.thresholds[3]
self.element_count = None
self.contributions_sum = None
self.contributions_rel = {} # yearly interval
Expand Down
Loading