Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Find potential matches faster #61

Merged
merged 22 commits into from
Oct 26, 2023
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
90a5a0d
Filter potential matches with CPCs too
patricoferris Sep 19, 2023
8b3cad2
Fix CPC ordering
patricoferris Sep 20, 2023
bec9e71
Upgrade rerunner to understand rescaled_cpcs and few other tweaks
robinmessage Sep 21, 2023
3b926b4
Add DRangedTree utility for fast finding of potential matches
robinmessage Sep 21, 2023
c8d06dc
WIP: noddling around to make tree work fast with new structure and co…
robinmessage Sep 21, 2023
6b7e91f
WIP: trying to eliminate overlaps faster but currently broken
robinmessage Sep 21, 2023
57455fe
WIP: working enough version of DRangedTree
robinmessage Sep 21, 2023
4f758f0
WIP: crashing in yigracheffe trying to retrieve cpc pixels
robinmessage Sep 21, 2023
dd28d1c
Kind of working CPC matching, but CPC appears offset from Patrick's c…
robinmessage Sep 21, 2023
4d811ef
Tweaking CPC offsets
robinmessage Sep 21, 2023
69ed5cf
WIP: use coarse CPC; work around Yigracheffe issue
robinmessage Sep 22, 2023
69f69fd
Pull out number of divisions into a constant and increase to 40x40
robinmessage Sep 26, 2023
5ce1171
WIP: manually realign Gola CPCs
robinmessage Sep 26, 2023
c28e567
Improved test suite to account for new CPC columns and fraction of po…
robinmessage Sep 27, 2023
23cc3d0
Use expected_fraction to cut branches in DRangedTree early
robinmessage Sep 27, 2023
99a7b93
Fix DRangedTree bounding on left-hand side of values
robinmessage Sep 27, 2023
8cda030
Tweak DRangedTree self test
robinmessage Sep 27, 2023
3072cf7
Code review fixes, most specifically horizontal striping
robinmessage Oct 26, 2023
db1f4d8
Fix command line description of find_potential_matches
robinmessage Oct 26, 2023
13edeb4
Replace find_potential_matches with fast version
robinmessage Oct 26, 2023
72a0395
Lint
robinmessage Oct 26, 2023
a641f21
Types
robinmessage Oct 26, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions methods/matching/calculate_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import pandas as pd
from geopandas import gpd # type: ignore
from yirgacheffe.layers import TiledGroupLayer, RasterLayer, VectorLayer # type: ignore
from yirgacheffe.layers import GroupLayer, RasterLayer, VectorLayer # type: ignore
from yirgacheffe.window import PixelScale # type: ignore

from methods.common import LandUseClass
Expand Down Expand Up @@ -47,14 +47,14 @@ def build_layer_collection(
outline_layer = VectorLayer.layer_from_file(boundary_filename, None, pixel_scale, projection)

lucs = [
TiledGroupLayer([
GroupLayer([
RasterLayer.layer_from_file(os.path.join(jrc_directory_path, filename)) for filename in
glob.glob(f"*{year}*.tif", root_dir=jrc_directory_path)
], name=f"luc_{year}") for year in luc_years
]

cpcs = [
TiledGroupLayer([
GroupLayer([
RasterLayer.layer_from_file(
os.path.join(cpc_directory_path, filename)
) for filename in
Expand All @@ -65,21 +65,21 @@ def build_layer_collection(

# ecoregions is such a heavy layer it pays to just rasterize it once - we should possibly do this once
# as part of import of the ecoregions data
ecoregions = TiledGroupLayer([
ecoregions = GroupLayer([
RasterLayer.layer_from_file(os.path.join(ecoregions_directory_path, filename)) for filename in
glob.glob("*.tif", root_dir=ecoregions_directory_path)
], name="ecoregions")

elevation = TiledGroupLayer([
elevation = GroupLayer([
RasterLayer.layer_from_file(os.path.join(elevation_directory_path, filename)) for filename in
glob.glob("srtm*.tif", root_dir=elevation_directory_path)
], name="elevation")
slopes = TiledGroupLayer([
slopes = GroupLayer([
RasterLayer.layer_from_file(os.path.join(slope_directory_path, filename)) for filename in
glob.glob("slope*.tif", root_dir=slope_directory_path)
], name="slopes")

access = TiledGroupLayer([
access = GroupLayer([
RasterLayer.layer_from_file(os.path.join(access_directory_path, filename)) for filename in
glob.glob("*.tif", root_dir=access_directory_path)
], name="access")
Expand Down
43 changes: 41 additions & 2 deletions methods/matching/find_potential_matches.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def worker(
example_jrc_layer.pixel_scale,
example_jrc_layer.projection,
[start_year, start_year - 5, start_year - 10],
[], # CPC not needed at this stage
[start_year, start_year - 5, start_year - 10],
matching_zone_filename,
jrc_directory_path,
cpc_directory_path,
Expand Down Expand Up @@ -107,10 +107,49 @@ def worker(
filtered_luc5 = matching_collection.lucs[1].numpy_apply(lambda chunk: chunk == matching[luc5])
filtered_luc10 = matching_collection.lucs[2].numpy_apply(lambda chunk: chunk == matching[luc10])

# +/- 10% of CPC_U and CPC_D for t0, t-5 and t-10
filtered_cpc_u0 = matching_collection.cpcs[0].numpy_apply(
lambda chunk: np.logical_and(
chunk >= (matching["cpc0_u"] - matching["cpc0_u"] * 0.1),
chunk <= (matching["cpc0_u"] + matching["cpc0_u"] * 0.1)
)
)
filtered_cpc_d0 = matching_collection.cpcs[1].numpy_apply(
lambda chunk: np.logical_and(
chunk >= (matching["cpc0_d"] - matching["cpc0_d"] * 0.1),
chunk <= (matching["cpc0_d"] + matching["cpc0_d"] * 0.1)
)
)
filtered_cpc_u5 = matching_collection.cpcs[2].numpy_apply(
lambda chunk: np.logical_and(
chunk >= (matching["cpc5_u"] - matching["cpc5_u"] * 0.1),
chunk <= (matching["cpc5_u"] + matching["cpc5_u"] * 0.1)
)
)
filtered_cpc_d5 = matching_collection.cpcs[3].numpy_apply(
lambda chunk: np.logical_and(
chunk >= (matching["cpc5_d"] - matching["cpc5_d"] * 0.1),
chunk <= (matching["cpc5_d"] + matching["cpc5_d"] * 0.1)
)
)
filtered_cpc_u10 = matching_collection.cpcs[4].numpy_apply(
lambda chunk: np.logical_and(
chunk >= (matching["cpc10_u"] - matching["cpc10_u"] * 0.1),
chunk <= (matching["cpc10_u"] + matching["cpc10_u"] * 0.1)
)
)
filtered_cpc_d10 = matching_collection.cpcs[5].numpy_apply(
lambda chunk: np.logical_and(
chunk >= (matching["cpc10_d"] - matching["cpc10_d"] * 0.1),
chunk <= (matching["cpc10_d"] + matching["cpc10_d"] * 0.1)
)
)

filtered_countries = matching_collection.countries.numpy_apply(lambda chunk: chunk == matching.country)

calc = matching_collection.boundary * filtered_ecoregions * filtered_elevation * filtered_countries * \
filtered_luc0 * filtered_luc5 * filtered_luc10 * filtered_slopes * filtered_access
filtered_luc0 * filtered_luc5 * filtered_luc10 * filtered_slopes * filtered_access * filtered_cpc_u0 * \
filtered_cpc_u5 * filtered_cpc_u10 * filtered_cpc_d0 * filtered_cpc_d5 * filtered_cpc_d10
calc.save(matching_pixels)


Expand Down
Loading