From a12a5f6cca255159db509fb351d6e1fa6a7a3c4e Mon Sep 17 00:00:00 2001 From: Dan Baston Date: Tue, 14 Jan 2025 09:00:38 -0800 Subject: [PATCH] VRT Pixel Functions: Add function to evaluate arbitrary expression (#11209) Adds a C++ pixel function called "expression" that can evaluate an arbitrary expression (or indeed, a mini-program) using either: - the [exprtk library](https://www.partow.net/programming/exprtk/index.html). This is a single MIT-licensed header, that appears to be quite widely used. But which causes a significant increase in size of libgdal (8 MB) - or [muparser](https://github.com/beltoforion/muparser), that supports a reasonable variety of functions including a ternary conditional operator muparser is the deault when the "dialect" is not specified. An example VRT that is allowed is: ``` expression source_0.tif 1 source_1.tif 1 ``` --- .github/workflows/alpine/Dockerfile.ci | 1 + .github/workflows/cmake_builds.yml | 4 +- .../workflows/fedora_rawhide/Dockerfile.ci | 1 + .github/workflows/ubuntu_20.04/Dockerfile.ci | 6 + .github/workflows/ubuntu_20.04/build.sh | 1 + .github/workflows/ubuntu_24.04/Dockerfile.ci | 1 + .pre-commit-config.yaml | 2 + .../vrt/arraysource_derived_expression.vrt | 14 + autotest/gdrivers/vrtderived.py | 219 ++++++++ autotest/gdrivers/vrtprocesseddataset.py | 286 +++++++++++ autotest/pymod/gdaltest.py | 26 + cmake/helpers/CheckDependentLibraries.cmake | 3 + cmake/modules/packages/FindExprTk.cmake | 93 ++++ cmake/modules/packages/Findmuparser.cmake | 53 ++ .../development/building_from_source.rst | 36 +- doc/source/drivers/raster/vrt.rst | 484 ++++++++++-------- .../drivers/raster/vrt_processed_dataset.rst | 52 +- doc/source/spelling_wordlist.txt | 1 + frmts/vrt/CMakeLists.txt | 20 + frmts/vrt/pixelfunctions.cpp | 107 ++++ frmts/vrt/vrtdataset.h | 8 + frmts/vrt/vrtderivedrasterband.cpp | 25 + frmts/vrt/vrtexpression.h | 205 ++++++++ frmts/vrt/vrtexpression_exprtk.cpp | 380 ++++++++++++++ frmts/vrt/vrtexpression_muparser.cpp | 169 ++++++ frmts/vrt/vrtprocesseddataset.cpp | 8 +- frmts/vrt/vrtprocesseddatasetfunctions.cpp | 285 +++++++++++ frmts/vrt/vrtsources.cpp | 1 + port/cpl_known_config_options.h | 4 + 29 files changed, 2272 insertions(+), 223 deletions(-) create mode 100644 autotest/gdrivers/data/vrt/arraysource_derived_expression.vrt create mode 100644 cmake/modules/packages/FindExprTk.cmake create mode 100644 cmake/modules/packages/Findmuparser.cmake create mode 100644 frmts/vrt/vrtexpression.h create mode 100644 frmts/vrt/vrtexpression_exprtk.cpp create mode 100644 frmts/vrt/vrtexpression_muparser.cpp diff --git a/.github/workflows/alpine/Dockerfile.ci b/.github/workflows/alpine/Dockerfile.ci index ece744f9b120..291a18345ba0 100644 --- a/.github/workflows/alpine/Dockerfile.ci +++ b/.github/workflows/alpine/Dockerfile.ci @@ -41,6 +41,7 @@ RUN apk add \ lz4-dev \ make \ mariadb-connector-c-dev \ + muparser-dev \ netcdf-dev \ odbc-cpp-wrapper-dev \ ogdi-dev \ diff --git a/.github/workflows/cmake_builds.yml b/.github/workflows/cmake_builds.yml index eac11bb6d1b4..36b90f13a3d2 100644 --- a/.github/workflows/cmake_builds.yml +++ b/.github/workflows/cmake_builds.yml @@ -328,7 +328,7 @@ jobs: base-devel git mingw-w64-x86_64-toolchain mingw-w64-x86_64-cmake mingw-w64-x86_64-ccache mingw-w64-x86_64-pcre mingw-w64-x86_64-xerces-c mingw-w64-x86_64-zstd mingw-w64-x86_64-libarchive mingw-w64-x86_64-geos mingw-w64-x86_64-libspatialite mingw-w64-x86_64-proj - mingw-w64-x86_64-cgal mingw-w64-x86_64-libfreexl mingw-w64-x86_64-hdf5 mingw-w64-x86_64-netcdf mingw-w64-x86_64-poppler mingw-w64-x86_64-podofo mingw-w64-x86_64-postgresql + mingw-w64-x86_64-cgal mingw-w64-x86_64-libfreexl mingw-w64-x86_64-hdf5 mingw-w64-x86_64-muparser mingw-w64-x86_64-netcdf mingw-w64-x86_64-poppler mingw-w64-x86_64-podofo mingw-w64-x86_64-postgresql mingw-w64-x86_64-libgeotiff mingw-w64-x86_64-libpng mingw-w64-x86_64-libtiff mingw-w64-x86_64-openjpeg2 mingw-w64-x86_64-python-pip mingw-w64-x86_64-python-numpy mingw-w64-x86_64-python-pytest mingw-w64-x86_64-python-setuptools mingw-w64-x86_64-python-lxml mingw-w64-x86_64-swig mingw-w64-x86_64-python-psutil mingw-w64-x86_64-blosc mingw-w64-x86_64-libavif - name: Setup cache @@ -434,7 +434,7 @@ jobs: cfitsio freexl geotiff libjpeg-turbo libpq libspatialite libwebp-base pcre pcre2 postgresql \ sqlite tiledb zstd cryptopp cgal doxygen librttopo openssl liblzma-devel \ openjdk ant qhull armadillo blas blas-devel libblas libcblas liblapack liblapacke blosc libarchive \ - libarrow pyarrow libaec libheif libavif cmake fsspec + libarrow pyarrow libaec libheif libavif muparser cmake fsspec - name: Check CMake version shell: bash -l {0} run: | diff --git a/.github/workflows/fedora_rawhide/Dockerfile.ci b/.github/workflows/fedora_rawhide/Dockerfile.ci index 667192588774..4ab9c2eb94ee 100644 --- a/.github/workflows/fedora_rawhide/Dockerfile.ci +++ b/.github/workflows/fedora_rawhide/Dockerfile.ci @@ -19,6 +19,7 @@ RUN dnf install -y clang make diffutils ccache cmake \ mdbtools-devel mdbtools-odbc unixODBC-devel \ armadillo-devel qhull-devel \ hdf-devel hdf5-devel netcdf-devel \ + muParser-devel \ libpq-devel \ libavif-devel \ python3-setuptools python3-pip python3-devel python3-lxml swig \ diff --git a/.github/workflows/ubuntu_20.04/Dockerfile.ci b/.github/workflows/ubuntu_20.04/Dockerfile.ci index 483992d8f358..d58dff8cb451 100644 --- a/.github/workflows/ubuntu_20.04/Dockerfile.ci +++ b/.github/workflows/ubuntu_20.04/Dockerfile.ci @@ -49,6 +49,7 @@ RUN apt-get update -y \ liblz4-dev \ liblzma-dev \ libmono-system-drawing4.0-cil \ + libmuparser-dev \ libmysqlclient-dev \ libnetcdf-dev \ libogdi-dev \ @@ -274,6 +275,11 @@ RUN if test "${OPENDRIVE_VERSION}" != ""; then ( \ && rm -rf libOpenDRIVE-${OPENDRIVE_VERSION} \ ); fi +# Install exprtk +RUN wget -q https://www.partow.net/downloads/exprtk.zip && \ + unzip -j -d /usr/local/include exprtk.zip exprtk/exprtk.hpp && \ + rm exprtk.zip + RUN ldconfig COPY requirements.txt /tmp/ diff --git a/.github/workflows/ubuntu_20.04/build.sh b/.github/workflows/ubuntu_20.04/build.sh index 203c0856e286..f602c0019123 100755 --- a/.github/workflows/ubuntu_20.04/build.sh +++ b/.github/workflows/ubuntu_20.04/build.sh @@ -15,6 +15,7 @@ cmake "${GDAL_SOURCE_DIR:=..}" \ -DCMAKE_INSTALL_PREFIX=/tmp/install-gdal \ -DGDAL_USE_TIFF_INTERNAL=OFF \ -DGDAL_USE_GEOTIFF_INTERNAL=OFF \ + -DGDAL_USE_EXPRTK=ON \ -DECW_ROOT=/opt/libecwj2-3.3 \ -DMRSID_ROOT=/usr/local \ -DFileGDB_ROOT=/usr/local/FileGDB_API \ diff --git a/.github/workflows/ubuntu_24.04/Dockerfile.ci b/.github/workflows/ubuntu_24.04/Dockerfile.ci index f927052222d0..87a682a43eee 100644 --- a/.github/workflows/ubuntu_24.04/Dockerfile.ci +++ b/.github/workflows/ubuntu_24.04/Dockerfile.ci @@ -37,6 +37,7 @@ RUN apt-get update && \ liblcms2-2 \ liblz4-dev \ liblzma-dev \ + libmuparser-dev \ libmysqlclient-dev \ libnetcdf-dev \ libogdi-dev \ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 460236f83428..bafbb6dc5a84 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,7 +42,9 @@ repos: autotest/cpp/data/| autotest/gdrivers/data/| swig/| + third_party/exprtk/| third_party/fast_float/| + third_party/muparser/| third_party/LercLib/| autotest/ogr/data/| alg/internal_libqhull/| diff --git a/autotest/gdrivers/data/vrt/arraysource_derived_expression.vrt b/autotest/gdrivers/data/vrt/arraysource_derived_expression.vrt new file mode 100644 index 000000000000..b0c95bbafde5 --- /dev/null +++ b/autotest/gdrivers/data/vrt/arraysource_derived_expression.vrt @@ -0,0 +1,14 @@ + + + expression + + + + Float64 + + + 10 + + + + diff --git a/autotest/gdrivers/vrtderived.py b/autotest/gdrivers/vrtderived.py index 0a07de55c24c..0547ca0e33a0 100755 --- a/autotest/gdrivers/vrtderived.py +++ b/autotest/gdrivers/vrtderived.py @@ -1055,6 +1055,225 @@ def identity(in_ar, out_ar, *args, **kwargs): assert vrt_ds.GetRasterBand(1).DataType == dtype +############################################################################### +# Test arbitrary expression pixel functions + + +def vrt_expression_xml(tmpdir, expression, dialect, sources): + + drv = gdal.GetDriverByName("GTiff") + + nx = 1 + ny = 1 + + expression = expression.replace("<", "<").replace(">", ">") + + xml = f""" + + expression + """ + + for i, source in enumerate(sources): + if type(source) is tuple: + source_name, source_value = source + else: + source_name = "" + source_value = source + + src_fname = tmpdir / f"source_{i}.tif" + + with drv.Create(src_fname, 1, 1, 1, gdal.GDT_Float64) as ds: + ds.GetRasterBand(1).Fill(source_value) + + xml += f""" + {src_fname} + 1 + """ + + xml += "" + + return xml + + +@pytest.mark.parametrize( + "expression,sources,result,dialects", + [ + pytest.param("A", [("A", 77)], 77, None, id="identity"), + pytest.param( + "(NIR-R)/(NIR+R)", + [("NIR", 77), ("R", 63)], + (77 - 63) / (77 + 63), + None, + id="simple expression", + ), + pytest.param( + "if (A > B) 1.5*C ; else A", + [("A", 77), ("B", 63), ("C", 18)], + 27, + ["exprtk"], + id="exprtk conditional (explicit)", + ), + pytest.param( + "(A > B) ? 1.5*C : A", + [("A", 77), ("B", 63), ("C", 18)], + 27, + ["muparser"], + id="muparser conditional (explicit)", + ), + pytest.param( + "(A > B)*(1.5*C) + (A <= B)*(A)", + [("A", 77), ("B", 63), ("C", 18)], + 27, + None, + id="conditional (implicit)", + ), + pytest.param( + "B2 * PopDensity", + [("PopDensity", 3), ("", 7)], + 21, + None, + id="implicit source name", + ), + pytest.param( + "B1 / sum(BANDS)", + [("", 3), ("", 5), ("", 31)], + 3 / (3 + 5 + 31), + None, + id="use of BANDS variable", + ), + pytest.param( + "B1 / sum(B2, B3) ", + [("", 3), ("", 5), ("", 31)], + 3 / (5 + 31), + None, + id="aggregate specified inputs", + ), + pytest.param( + "var q[2] := {B2, B3}; B1 * q", + [("", 3), ("", 5), ("", 31)], + 15, # First value in returned vector. This behavior doesn't seem desirable + # but I haven't figured out how to detect a vector return. + ["exprtk"], + id="return vector", + ), + pytest.param( + "B1 + B2 + B3", + (5, 9, float("nan")), + float("nan"), + None, + id="nan propagated via arithmetic", + ), + pytest.param( + "if (B3) B1 ; else B2", + (5, 9, float("nan")), + 5, + ["exprtk"], + id="exprtk nan = truth in conditional?", + ), + pytest.param( + "B3 ? B1 : B2", + (5, 9, float("nan")), + 5, + ["muparser"], + id="muparser nan = truth in conditional?", + ), + pytest.param( + "if (B3 > 0) B1 ; else B2", + (5, 9, float("nan")), + 9, + ["exprtk"], + id="exprtk nan comparison is false in conditional", + ), + pytest.param( + "(B3 > 0) ? B1 : B2", + (5, 9, float("nan")), + 9, + ["muparser"], + id="muparser nan comparison is false in conditional", + ), + pytest.param( + "if (B1 > 5) B1", + (1,), + float("nan"), + ["exprtk"], + id="expression returns nodata", + ), + ], +) +@pytest.mark.parametrize("dialect", ("exprtk", "muparser")) +def test_vrt_pixelfn_expression( + tmp_vsimem, expression, sources, result, dialect, dialects +): + pytest.importorskip("numpy") + + if not gdaltest.gdal_has_vrt_expression_dialect(dialect): + pytest.skip(f"Expression dialect {dialect} is not available") + + if dialects and dialect not in dialects: + pytest.skip(f"Expression not supported for dialect {dialect}") + + xml = vrt_expression_xml(tmp_vsimem, expression, dialect, sources) + + with gdal.Open(xml) as ds: + assert pytest.approx(ds.ReadAsArray()[0][0], nan_ok=True) == result + + +@pytest.mark.parametrize( + "expression,sources,dialect,exception", + [ + pytest.param( + "A*B + C", + [("A", 77), ("B", 63)], + "exprtk", + "Undefined symbol", + id="exprtk undefined variable", + ), + pytest.param( + "A*B + C", + [("A", 77), ("B", 63)], + "muparser", + "Unexpected token", + id="muparser undefined variable", + ), + pytest.param( + "(".join(["asin", "sin", "acos", "cos"] * 100) + "(X" + 100 * 4 * ")", + [("X", 0.5)], + "exprtk", + "exceeds maximum allowed stack depth", + id="expression is too complex", + ), + pytest.param( + " ".join(["sin(x) + cos(x)"] * 10000), + [("x", 0.5)], + "exprtk", + "exceeds maximum of 100000 set by GDAL_EXPRTK_MAX_EXPRESSION_LENGTH", + id="expression is too long", + ), + ], +) +def test_vrt_pixelfn_expression_invalid( + tmp_vsimem, expression, sources, dialect, exception +): + pytest.importorskip("numpy") + + if not gdaltest.gdal_has_vrt_expression_dialect(dialect): + pytest.skip(f"Expression dialect {dialect} is not available") + + messages = [] + + def handle(ecls, ecode, emsg): + messages.append(emsg) + + xml = vrt_expression_xml(tmp_vsimem, expression, dialect, sources) + + with gdaltest.error_handler(handle): + ds = gdal.Open(xml) + if ds: + assert ds.ReadAsArray() is None + + assert exception in "".join(messages) + + ############################################################################### # Cleanup. diff --git a/autotest/gdrivers/vrtprocesseddataset.py b/autotest/gdrivers/vrtprocesseddataset.py index cf6c5f038351..1a80043ebc3d 100755 --- a/autotest/gdrivers/vrtprocesseddataset.py +++ b/autotest/gdrivers/vrtprocesseddataset.py @@ -10,6 +10,7 @@ # SPDX-License-Identifier: MIT ############################################################################### +import math import os import gdaltest @@ -1215,6 +1216,291 @@ def test_vrtprocesseddataset_trimming_errors(tmp_vsimem): ) +############################################################################### +# Test expressions + + +@pytest.mark.parametrize( + "dialect,expression,src,expected,error,env", + [ + pytest.param( + "exprtk", + "return [BANDS[1], BANDS[2]]", + np.array([[[1, 2]], [[3, 4]], [[5, 6]]]), + np.array([[[3, 4]], [[5, 6]]]), + None, + {}, + id="multiple bands in, multiple bands out (1)", + ), + pytest.param( + "exprtk", + "return [BANDS]", + np.array([[[1, 2]], [[3, 4]], [[5, 6]]]), + np.array([[[1, 2]], [[3, 4]], [[5, 6]]]), + None, + {}, + id="multiple bands in, multiple bands out (2)", + ), + pytest.param( + "muparser", + "B2, B3", + np.array([[[1, 2]], [[3, 4]], [[5, 6]]]), + np.array([[[3, 4]], [[5, 6]]]), + None, + {}, + id="multiple bands in, multiple bands out (3)", + ), + pytest.param( + "muparser", + "BANDS", + np.array([[[1, 2]], [[3, 4]], [[5, 6]]]), + np.array([[[1, 2]], [[3, 4]], [[5, 6]]]), + None, + {}, + id="multiple bands in, multiple bands out (4)", + ), + pytest.param( + "exprtk", + """ + // Reduce every 10 bands of input into a single + // band of output, using avg() + const var chunksize := 10; + const var outsize := BANDS[] / chunksize; + + var chunk[chunksize]; + var out[outsize]; + + for (var i := 0; i < out[]; i += 1) { + for (var j := 0; j < chunk[]; j += 1) { + chunk[j] := BANDS[i * chunk[] + j]; + }; + out[i] := avg(chunk); + }; + return [out]; + """, + np.arange(100).reshape(50, 1, 2), + np.array([[[9, 10]], [[29, 30]], [[49, 50]], [[69, 70]], [[89, 90]]]), + None, + {}, + id="procedural", + ), + pytest.param( + "exprtk", + "B1", + np.array([[[1, 2]], [[3, 4]], [[5, 6]]]), + np.array([[1, 2]]), + None, + {}, + id="multiple bands in, single band out (1)", + ), + pytest.param( + "muparser", + "B1", + np.array([[[1, 2]], [[3, 4]], [[5, 6]]]), + np.array([[1, 2]]), + None, + {}, + id="multiple bands in, single band out (2)", + ), + pytest.param( + "exprtk", + "BANDS[0]", + np.array([[[1, 2]], [[3, 4]], [[5, 6]]]), + np.array([[1, 2]]), + None, + {}, + id="multiple bands in, single band out (3)", + ), + pytest.param( + "exprtk", + "return [B1];", + np.array([[[1, 2]], [[3, 4]], [[5, 6]]]), + np.array([[1, 2]]), + None, + {}, + id="multiple bands in, single band out (4)", + ), + pytest.param( + "exprtk", + "return [B1, B2]", + np.array([[[1, 2]], [[3, 4]], [[5, 6]]]), + np.array([[1, 2]]), + "returned 2 values but 1 output band", + {}, + id="return wrong number of bands", + ), + pytest.param( + "exprtk", + "return [BANDS, B2]", + np.array([[[1, 2]], [[3, 4]], [[5, 6]]]), + np.array([[1, 2]]), + "must return a vector or a list of scalars", + {}, + id="return wrong number of bands", + ), + pytest.param( + "exprtk", + """ + var out[3]; + for (var i := 0; i < 100; i += 1) { + out[i] := i; + }; + return [out]; + """, + np.array([[[1, 2]], [[3, 4]], [[5, 6]]]), + np.array([[[1, 1]], [[2, 2]], [[3, 3]]]), + "Attempted to access index 3", + {}, + id="out of bounds vector access", + ), + pytest.param( + "exprtk", + """ + var out[5]; + return [out]; + """, + np.array([[[1, 2]]]), + np.array([[[1, 1]]]), + "Failed to parse expression", + {"GDAL_EXPRTK_MAX_VECTOR_LENGTH": "4"}, + id="vector too large", + ), + pytest.param( + "exprtk", + """ + var out[3]; + for (var i := 0; i < out[]; i += 1) { + out[i] := i; + }; + return [out]; + """, + np.array([[[1, 2]], [[3, 4]], [[5, 6]]]), + np.array([[[1, 1]], [[2, 2]], [[3, 3]]]), + "Failed to parse expression", + {"GDAL_EXPRTK_ENABLE_LOOPS": "NO"}, + id="loops disabled", + ), + pytest.param( + "exprtk", + """ + for (var i := 0; i < 20000; i += 1) { + sleep(0.2/20000); // we only check runtime every 10,000 iterations + }; + return [B1]; + """, + np.array([[[1, 2]]]), + np.array([[1, 2]]), + "time exceeded maximum", + {"GDAL_EXPRTK_TIMEOUT_SECONDS": "0.1"}, + id="loop evaluation timeout", + ), + ], +) +def test_vrtprocesseddataset_expression( + request, tmp_vsimem, expression, src, dialect, expected, env, error +): + if not gdaltest.gdal_has_vrt_expression_dialect(dialect): + pytest.skip(f"{dialect} not available") + + if "timeout" in request.node.name and "debug" not in gdal.VersionInfo(""): + pytest.skip("Timeout tests only work on debug builds") + + src_filename = tmp_vsimem / "src.tif" + + num_input_bands = 1 if len(src.shape) == 2 else src.shape[0] + expected_output_bands = 1 if len(expected.shape) == 2 else expected.shape[0] + + with gdal.GetDriverByName("GTiff").Create( + src_filename, 2, 1, num_input_bands + ) as src_ds: + src_ds.WriteArray(src) + src_ds.SetGeoTransform([0, 1, 0, 0, 0, 1]) + + output_band_xml = "".join( + f"""""" + for i in range(expected_output_bands) + ) + + vrt_xml = f""" + + {src_filename} + + + + Expression + {expression.replace('<', '<').replace('>', '>')} + {dialect} + + + {output_band_xml} + + """ + + with gdal.config_options(env): + if error: + with pytest.raises(Exception, match=error): + ds = gdal.Open(vrt_xml) + result = ds.ReadAsArray() + else: + ds = gdal.Open(vrt_xml) + result = ds.ReadAsArray() + np.testing.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "batch_size", + ( + pytest.param(13, id="batch size greater than input"), + pytest.param(1, id="batch size=1"), + pytest.param(3, id="input bands are multiple of batch size"), + pytest.param(5, id="input bands are not a multiple of batch size"), + ), +) +def test_vrtprocesseddataset_expression_batchsize(tmp_vsimem, batch_size): + + if not gdaltest.gdal_has_vrt_expression_dialect("muparser"): + pytest.skip("muparser not available") + + src_filename = tmp_vsimem / "in.tif" + + inputs = np.arange(12) + inputs = inputs.reshape(inputs.size, 1, 1) + + with gdal.GetDriverByName("GTiff").Create( + src_filename, + 1, + 1, + bands=inputs.size, + ) as src_ds: + src_ds.WriteArray(inputs) + src_ds.SetGeoTransform([0, 1, 0, 0, 0, 1]) + + num_chunks = math.ceil(inputs.size / batch_size) + chunks = [np.arange(batch_size) + r * batch_size for r in range(num_chunks)] + expected = np.array([inputs[chunk[chunk < inputs.size]].sum() for chunk in chunks]) + + vrt_xml = f""" + + {src_filename} + + + + Expression + sum(BANDS) + muparser + {batch_size} + + + + + """ + + with gdal.Open(vrt_xml) as ds: + actual = ds.ReadAsArray().flatten() + + np.testing.assert_equal(actual, expected) + + ############################################################################### # Test that serialization (for example due to statistics computation) properly # works diff --git a/autotest/pymod/gdaltest.py b/autotest/pymod/gdaltest.py index a17d6bce3ada..185e43d20872 100755 --- a/autotest/pymod/gdaltest.py +++ b/autotest/pymod/gdaltest.py @@ -13,6 +13,7 @@ ############################################################################### import contextlib +import functools import io import json import math @@ -2131,3 +2132,28 @@ def handler(lvl, no, msg): assert any( [err["level"] == type and match in err["message"] for err in errors] ), f'Did not receive an error of type {err_levels[type]} matching "{match}"' + + +############################################################################### +# Check VRT capabilities + + +@functools.lru_cache() +def gdal_has_vrt_expression_dialect(dialect): + with disable_exceptions(), gdal.quiet_errors(): + vrt = f""" + + expression + + + + Float64 + + + 10 + + + + """ + ds = gdal.Open(vrt) + return ds.ReadRaster() is not None diff --git a/cmake/helpers/CheckDependentLibraries.cmake b/cmake/helpers/CheckDependentLibraries.cmake index ca5d76ef1909..09cf2536d907 100644 --- a/cmake/helpers/CheckDependentLibraries.cmake +++ b/cmake/helpers/CheckDependentLibraries.cmake @@ -443,6 +443,9 @@ gdal_check_package(IDB "enable ogr_IDB driver" CAN_DISABLE) gdal_check_package(rdb "enable RIEGL RDB library" CONFIG CAN_DISABLE) include(CheckDependentLibrariesTileDB) +gdal_check_package(ExprTk "Enable C++ Mathematical Expression Tooklit Library (ExprTk) for VRT expressions" DISABLED_BY_DEFAULT) +gdal_check_package(muparser "Enable muparser library for VRT expressions" RECOMMENDED CAN_DISABLE) + gdal_check_package(OpenEXR "OpenEXR >=2.2" CAN_DISABLE) gdal_check_package(MONGOCXX "Enable MongoDBV3 driver" CAN_DISABLE) diff --git a/cmake/modules/packages/FindExprTk.cmake b/cmake/modules/packages/FindExprTk.cmake new file mode 100644 index 000000000000..b057721761c2 --- /dev/null +++ b/cmake/modules/packages/FindExprTk.cmake @@ -0,0 +1,93 @@ +# /*========================================================================= +# +# Program: Visualization Toolkit +# Module: Copyright.txt +# +# Copyright (c) 1993-2015 Ken Martin, Will Schroeder, Bill Lorensen +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither name of Ken Martin, Will Schroeder, or Bill Lorensen nor the names +# of any contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# =========================================================================*/ + +# - Try to find ExprTk lib +# +# This module supports requiring a minimum version, e.g. you can do +# find_package(ExprTk 2.7) +# to require version 2.7 or newer of ExprTk. +# +# Once done this will define +# +# ExprTk_FOUND - system has exprtk with correct version +# ExprTk_INCLUDE_DIRS - the exprtk include directory +# ExprTk_VERSION - exprtk version +# +# And the following imported target: +# +# ExprTk::ExprTk + +find_path(ExprTk_INCLUDE_DIR + NAMES exprtk.hpp + DOC "Path to ExprTk header") +mark_as_advanced(ExprTk_INCLUDE_DIR) + +if (ExprTk_INCLUDE_DIR) + file(STRINGS "${ExprTk_INCLUDE_DIR}/exprtk.hpp" _exprtk_version_header REGEX "\"[0-9.]+\"") + set(ExprTk_VERSION) + foreach (_exprtk_version_line IN LISTS _exprtk_version_header) + if ("${ExprTk_VERSION}" STREQUAL "") + string(REGEX MATCH [[version = "(2\.7[0-9.]+)".*$]] _exprtk_version_match "${_exprtk_version_line}") + set(ExprTk_VERSION "${CMAKE_MATCH_1}") + else () + string(REGEX MATCH "\"([0-9.]+)\".*$" _exprtk_version_match "${_exprtk_version_line}") + set(ExprTk_VERSION "${ExprTk_VERSION}${CMAKE_MATCH_1}") + endif () + if (_exprtk_version_match MATCHES "\;") + break() + endif () + endforeach () + if (NOT ExprTk_VERSION) + # fallback: version in exprtk.hpp has always started with 2.7 + set(ExprTk_VERSION "2.7") + endif () + unset(_exprtk_version_header) + unset(_exprtk_version_line) + unset(_exprtk_version_match) +endif () + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ExprTk + REQUIRED_VARS ExprTk_INCLUDE_DIR + VERSION_VAR ExprTk_VERSION) + +if (ExprTk_FOUND) + set(ExprTk_INCLUDE_DIRS "${ExprTk_INCLUDE_DIR}") + if (NOT TARGET ExprTk::ExprTk) + add_library(ExprTk::ExprTk INTERFACE IMPORTED) + set_target_properties(ExprTk::ExprTk PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${ExprTk_INCLUDE_DIR}") + endif () +endif () diff --git a/cmake/modules/packages/Findmuparser.cmake b/cmake/modules/packages/Findmuparser.cmake new file mode 100644 index 000000000000..a284f57567a7 --- /dev/null +++ b/cmake/modules/packages/Findmuparser.cmake @@ -0,0 +1,53 @@ +# * Find muparser +# Find the muparser C++ math expression parser library +# +# muparser::muparser - Imported target to use +# MUPARSER_FOUND - True if muparser # was found. +# +# Original Author: 2019 Rylie Pavlik +# +# +# Copyright 2019, Collabora, Ltd. +# +# Distributed under the Boost Software License, Version 1.0. +# (See accompanying file LICENSE_1_0.txt or copy at +# http://www.boost.org/LICENSE_1_0.txt) +# +# SPDX-License-Identifier: BSL-1.0 + +set(MUPARSER_ROOT_DIR + "${MUPARSER_ROOT_DIR}" + CACHE PATH "Directory to search for mmuparser") + +find_package(PkgConfig QUIET) +pkg_check_modules(PC_MUPARSER QUIET muparser) + +find_path( + MUPARSER_INCLUDE_DIR + NAMES muParser.h + PATHS "${MUPARSER_ROOT_DIR}" + HINTS ${PC_MUPARSER_INCLUDEDIR} ${PC_MUPARSER_INCLUDE_DIRS}) +find_library( + MUPARSER_LIBRARY + NAMES muparser + PATHS "${MUPARSER_ROOT_DIR}" + HINTS ${PC_MUPARSER_LIBDIR} ${PC_MUPARSER_LIBRARY_DIRS}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(muparser DEFAULT_MSG MUPARSER_INCLUDE_DIR + MUPARSER_LIBRARY) + +if(MUPARSER_FOUND) + if(NOT TARGET muparser::muparser) + add_library(muparser::muparser UNKNOWN IMPORTED) + set_target_properties( + muparser::muparser + PROPERTIES IMPORTED_LOCATION "${MUPARSER_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${MUPARSER_INCLUDE_DIR}") + endif() + set(MUPARSER_INCLUDE_DIRS ${MUPARSER_INCLUDE_DIR}) + set(MUPARSER_LIBRARIES ${MUPARSER_LIBRARY}) + mark_as_advanced(MUPARSER_ROOT_DIR) +endif() + +mark_as_advanced(MUPARSER_INCLUDE_DIR MUPARSER_LIBRARY) diff --git a/doc/source/development/building_from_source.rst b/doc/source/development/building_from_source.rst index 2bf228ddcc4c..9103a013c444 100644 --- a/doc/source/development/building_from_source.rst +++ b/doc/source/development/building_from_source.rst @@ -277,7 +277,7 @@ CMake package dependent options .. Put packages in alphabetic order. Generally speaking, packages (external dependencies) will be automatically found if -they are in default locations used by CMake. This can be also tuned for example +they are in default locations used by CMake. This can also be tuned for example with the ``CMAKE_PREFIX_PATH`` variable. Starting with CMake 3.12, it is also possible to use a @@ -616,6 +616,22 @@ the XercesC library. Control whether to use EXPAT. Defaults to ON when EXPAT is found. +ExprTk +****** + +`ExprTk `__ is a +mathematical expression parser and evaluation engine. It can be used by the +:ref:`raster.vrt` driver. Building with ExprTk may increase the size of the +GDAL library by several megabytes. + +.. option:: GDAL_USE_EXPRTK=ON/OFF + + Control whether to use ExprTk. Defaults to OFF even if ExprTk is found. + +.. option:: EXPRTK_INCLUDE_DIR + + Path to the include directory with the :file:`exprtk.hpp` header file. + FileGDB ******* @@ -1378,6 +1394,24 @@ The library is normally found if installed in standard location, and at version Control whether to use MSSQL_ODBC. Defaults to ON when MSSQL_ODBC is found. +muparser +******** + +`muparser `__ is a mathematical expression +parser and evaluation engine. It can be used by the :ref:`raster.vrt` driver. + +.. option:: GDAL_USE_MUPARSER=ON/OFF + + Control whether to use muparser. Defaults to ON when muparser is found. + +.. option:: MUPARSER_INCLUDE_DIR + + Path to directory with muparser headers. + +.. option:: MUPARSER_LIBRARY + + Path to library to be linked. + MYSQL ***** diff --git a/doc/source/drivers/raster/vrt.rst b/doc/source/drivers/raster/vrt.rst index a976405f27ce..f0b1cb6bc70d 100644 --- a/doc/source/drivers/raster/vrt.rst +++ b/doc/source/drivers/raster/vrt.rst @@ -1037,91 +1037,69 @@ should be specified with the above :cpp:func:`GDALRasterBand::SetMetadataItem` e .. _vrt_derived_bands: -Using Derived Bands (with pixel functions in C/C++) ---------------------------------------------------- - -A specialized type of band is a 'derived' band which derives its pixel -information from its source bands. With this type of band you must also -specify a pixel function, which has the responsibility of generating the -output raster. Pixel functions are created by an application and then -registered with GDAL using a unique key. +Derived Bands (pixel functions) +------------------------------- +A "derived" band is a specialized type of band that derives its pixel +values from its source bands using a "pixel function" at the time +that values are read from the raster. Using derived bands you can create VRT datasets that manipulate bands on -the fly without having to create new band files on disk. For example, you -might want to generate a band using four source bands from a nine band input +the fly without having to create new band files on disk. +GDAL provides a number of +:ref:`built-in pixel functions `; +additional pixel functions can be defined in :ref:`C++ ` +or :ref:`Python ` and registered with GDAL using +a unique key. + +.. example:: + :title: Calculating a derived band + :id: vrt-derived-1 + +For example, you +might want to generate a band using four source bands from a nine-band input dataset (x0, x3, x4, and x8) and some constant y: .. code-block:: c band_value = sqrt((x3*x3+x4*x4)/(x0*x8)) + y; -You could write the pixel function to compute this value and then register -it with GDAL with the name "MyFirstFunction". Then, the following VRT XML -could be used to display this derived band: - +This can be accomplished using the built-in "expression" pixel function. +The following VRT XML can be used: .. code-block:: xml Magnitude - MyFirstFunction - - + expression + + nine_band.dat 1 - - - + nine_band.dat 4 - - - + nine_band.dat 5 - - - + nine_band.dat 9 - - -.. note:: - - PixelFunctionArguments can only be used with C++ pixel functions in GDAL versions 3.4 and greater. - - -In addition to the subclass specification (VRTDerivedRasterBand) and -the PixelFunctionType value, there is another new parameter that can come -in handy: SourceTransferType. Typically the source rasters are obtained -using the data type of the derived band. There might be times, -however, when you want the pixel function to have access to -higher resolution source data than the data type being generated. -For example, you might have a derived band of type "Float", which takes -a single source of type "CFloat32" or "CFloat64", and returns the imaginary -portion. To accomplish this, set the SourceTransferType to "CFloat64". -Otherwise the source would be converted to "Float" prior to -calling the pixel function, and the imaginary portion would be lost. +.. seealso:: + :ref:`VRT processed datasets `, which provide a means of calculating multiple bands at once. -.. code-block:: xml - - - Magnitude - MyFirstFunction - CFloat64 - ... +.. _builtin_pixel_functions: -Default Pixel Functions -+++++++++++++++++++++++ +Built-in Pixel Functions +++++++++++++++++++++++++ GDAL provides a set of default pixel functions that can be used without writing new code: @@ -1166,14 +1144,16 @@ GDAL provides a set of default pixel functions that can be used without writing - 2 - - - divide one raster band by another (``b1 / b2``) - * - **norm_diff** - - 2 - - - - - computes the normalized difference between two raster bands: ``(b1 - b2)/(b1 + b2)`` * - **exp** - 1 - ``base`` (optional), ``fact`` (optional) - computes the exponential of each element in the input band ``x`` (of real values): ``e ^ x``. The function also accepts two optional parameters: ``base`` and ``fact`` that allow to compute the generalized formula: ``base ^ ( fact * x )``. Note: this function is the recommended one to perform conversion form logarithmic scale (dB): `` 10. ^ (x / 20.)``, in this case ``base = 10.`` and ``fact = 0.05`` i.e. ``1. / 20`` + * - **expression** + - 1 + - ``expression`` + - evaluate a specified expression using `muparser `__ (default) or `ExprTk `__. The expression is specified using the "expression" argument; the dialect may be specified using the "dialect" argument. Within the expression, band values can be accessed through the variables ``B1``, ``B2``, etc. ; by giving a name to a source band (e.g., ````); or through the ``BANDS`` vector. With ExprTk, ``BANDS`` is exposed as a standard (0-indexed) vector. With muparser, it is expanded into a list of all input bands. + + ExprTk and muparser support a number of built-in functions and control structures. Refer to the documentation of those libraries for details. * - **imag** - 1 - - @@ -1214,6 +1194,10 @@ GDAL provides a set of default pixel functions that can be used without writing - >= 2 - ``k`` (optional) - multiply 2 or more raster bands. If the optional ``k`` parameter is provided then the result is multiplied by the scalar ``k``. + * - **norm_diff** + - 2 + - - + - computes the normalized difference between two raster bands: ``(b1 - b2)/(b1 + b2)`` * - **phase** - 1 - - @@ -1230,14 +1214,6 @@ GDAL provides a set of default pixel functions that can be used without writing - 1 - - - extract real part from a single raster band (just a copy if the input is non-complex) - * - **sqrt** - - 1 - - - - - perform the square root of a single raster band (real only) - * - **sum** - - >= 2 - - ``k`` (optional) - - sum 2 or more raster bands. If the optional ``k`` parameter is provided then it is added to each element of the result * - **replace_nodata** - = 1 - ``to`` (optional) @@ -1246,10 +1222,58 @@ GDAL provides a set of default pixel functions that can be used without writing - = 1 - - - perform scaling according to the ``offset`` and ``scale`` values of the raster band + * - **sqrt** + - 1 + - - + - perform the square root of a single raster band (real only) + * - **sum** + - >= 2 + - ``k`` (optional) + - sum 2 or more raster bands. If the optional ``k`` parameter is provided then it is added to each element of the result + +.. example:: + :title: VRT expression with a simple condition + + This example uses a simple ``if`` block to calculate the derived band + value depending on the value of an input band. Note the use of ``>`` + to escape the ``>`` character in the XML attribute. Using the same + VRT as :example:`vrt-derived-1`, the following expression can be + used: + + .. code-block:: xml + + expression + + + or + + .. code-block:: xml + + expression + + +.. example:: + :title: VRT expression using all bands + + The ``BANDS`` variable provides access to all of the input bands, either + individually (``BANDS[0]`` for the first band) or collectively. Here, + we concisely evaluate the value of a band relative to the sum of other + bands. Using the same VRT as :example:`vrt-derived-1`: + + .. code-block:: xml -Writing Pixel Functions -+++++++++++++++++++++++ + expression + + +.. _cpp_pixel_functions: + +Writing Pixel Functions in C/C++ +++++++++++++++++++++++++++++++++ + +If the built-in pixel functions are not suitable, a custom function can be written in C or C++. In this case, the +You could write the pixel function to compute this value and then +register it with GDAL with a name such as "MyFirstFunction". To register this function with GDAL (prior to accessing any VRT datasets with derived bands that use this function), an application calls :cpp:func:`GDALAddDerivedBandPixelFuncWithArgs` with a key and a :cpp:type:`GDALDerivedPixelFuncWithArgs`: @@ -1372,14 +1396,45 @@ The following is an implementation of the pixel function: return CE_None; } -Using Derived Bands (with pixel functions in Python) ----------------------------------------------------- +Setting the working data type +***************************** + +In addition to the subclass specification (VRTDerivedRasterBand) and +the PixelFunctionType value, there is another new parameter that can come +in handy: SourceTransferType. Typically the source rasters are obtained +using the data type of the derived band. There might be times, +however, when you want the pixel function to have access to +higher resolution source data than the data type being generated. +For example, you might have a derived band of type "Float", which takes +a single source of type "CFloat32" or "CFloat64", and returns the imaginary +portion. To accomplish this, set the SourceTransferType to "CFloat64". +Otherwise the source would be converted to "Float" prior to +calling the pixel function, and the imaginary portion would be lost. + +.. code-block:: xml -Starting with GDAL 2.2, in addition to pixel functions written in C/C++ as + + + Magnitude + MyFirstFunction + CFloat64 + ... + +.. _python_pixel_functions: + +Writing Pixel Functions in Python ++++++++++++++++++++++++++++++++++ + +In addition to pixel functions written in C/C++ as documented in the :ref:`vrt_derived_bands` section, it is possible to use pixel functions written in Python. Both `CPython `_ and `NumPy `_ are requirements at run-time. +.. note:: + + Python pixel functions are not enabled by default; see :ref:`raster_vrt_security_implications`. + + The subelements for VRTRasterBand (whose subclass specification must be set to VRTDerivedRasterBand) are : @@ -1415,157 +1470,156 @@ returned by the pixel function is ignored. If wanting to fill ``out_ar`` from another array, use the ``out_ar[:] = ...`` syntax. -Examples -++++++++ - -VRT that multiplies the values of the source file by a factor of 1.5 -******************************************************************** - -.. code-block:: xml - - - EPSG:26711 - 440720,60,0,3751320,0,-60 - - multiply - Python - - - - - byte.tif - - - +Python pixel function examples +++++++++++++++++++++++++++++++ + +.. example:: + :title: VRT that multiplies the values of the source file by a factor of 1.5 + + .. code-block:: xml + + + EPSG:26711 + 440720,60,0,3751320,0,-60 + + multiply + Python + + + + + byte.tif + + + + +.. example:: + :title: VRT that adds 2 (or more) rasters + + .. code-block:: xml + + + EPSG:26711 + 440720,60,0,3751320,0,-60 + + add + Python + + + + byte.tif + + + byte2.tif + + + + +.. example:: + :title: VRT that computes hillshading using an external library + + .. code-block:: xml + + + EPSG:4326 + -80.004166666666663,0.008333333333333,0, + 44.004166666666663,0,-0.008333333333333 + + Gray + + n43.dt0 + + Python + hillshading.hillshade + + 1 + Int16 + + + + with :file:`hillshading.py`: + + .. code-block:: python + + # Licence: MIT + # Copyright 2016, Even Rouault + import math + + def hillshade_int(in_ar, out_ar, xoff, yoff, xsize, ysize, raster_xsize, + raster_ysize, radius, gt, z, scale): + ovr_scale_x = float(out_ar.shape[1] - 2 * radius) / xsize + ovr_scale_y = float(out_ar.shape[0] - 2 * radius) / ysize + ewres = gt[1] / ovr_scale_x + nsres = gt[5] / ovr_scale_y + inv_nsres = 1.0 / nsres + inv_ewres = 1.0 / ewres + + az = 315 + alt = 45 + degreesToRadians = math.pi / 180 + + sin_alt = math.sin(alt * degreesToRadians) + azRadians = az * degreesToRadians + z_scale_factor = z / (8 * scale) + cos_alt_mul_z_scale_factor = \ + math.cos(alt * degreesToRadians) * z_scale_factor + cos_az_mul_cos_alt_mul_z_scale_factor_mul_254 = \ + 254 * math.cos(azRadians) * cos_alt_mul_z_scale_factor + sin_az_mul_cos_alt_mul_z_scale_factor_mul_254 = \ + 254 * math.sin(azRadians) * cos_alt_mul_z_scale_factor + square_z_scale_factor = z_scale_factor * z_scale_factor + sin_alt_mul_254 = 254.0 * sin_alt + + for j in range(radius, out_ar.shape[0]-radius): + win_line = in_ar[0][j-radius:j+radius+1,:] + for i in range(radius, out_ar.shape[1]-radius): + win = win_line[:,i-radius:i+radius+1].tolist() + x = inv_ewres * ((win[0][0] + win[1][0] + win[1][0] + win[2][0])-\ + (win[0][2] + win[1][2] + win[1][2] + win[2][2])) + y = inv_nsres * ((win[2][0] + win[2][1] + win[2][1] + win[2][2])-\ + (win[0][0] + win[0][1] + win[0][1] + win[0][2])) + xx_plus_yy = x * x + y * y + cang_mul_254 = (sin_alt_mul_254 - \ + (y * cos_az_mul_cos_alt_mul_z_scale_factor_mul_254 - \ + x * sin_az_mul_cos_alt_mul_z_scale_factor_mul_254)) / \ + math.sqrt(1 + square_z_scale_factor * xx_plus_yy) + if cang_mul_254 < 0: + out_ar[j,i] = 1 + else: + out_ar[j,i] = 1 + round(cang_mul_254) + + def hillshade(in_ar, out_ar, xoff, yoff, xsize, ysize, raster_xsize, + raster_ysize, radius, gt, **kwargs): + z = float(kwargs['z_factor']) + scale= float(kwargs['scale']) + hillshade_int(in_ar, out_ar, xoff, yoff, xsize, ysize, raster_xsize, + raster_ysize, radius, gt, z, scale) -VRT that adds 2 (or more) rasters -********************************* - -.. code-block:: xml - - - EPSG:26711 - 440720,60,0,3751320,0,-60 - - add - Python - - - - byte.tif - - - byte2.tif - - - - -VRT that computes hillshading using an external library -******************************************************* - -.. code-block:: xml - - - EPSG:4326 - -80.004166666666663,0.008333333333333,0, - 44.004166666666663,0,-0.008333333333333 - - Gray - - n43.dt0 - - Python - hillshading.hillshade - - 1 - Int16 - - - -with hillshading.py: - -.. code-block:: python +.. note:: - # Licence: MIT - # Copyright 2016, Even Rouault - import math - - def hillshade_int(in_ar, out_ar, xoff, yoff, xsize, ysize, raster_xsize, - raster_ysize, radius, gt, z, scale): - ovr_scale_x = float(out_ar.shape[1] - 2 * radius) / xsize - ovr_scale_y = float(out_ar.shape[0] - 2 * radius) / ysize - ewres = gt[1] / ovr_scale_x - nsres = gt[5] / ovr_scale_y - inv_nsres = 1.0 / nsres - inv_ewres = 1.0 / ewres - - az = 315 - alt = 45 - degreesToRadians = math.pi / 180 - - sin_alt = math.sin(alt * degreesToRadians) - azRadians = az * degreesToRadians - z_scale_factor = z / (8 * scale) - cos_alt_mul_z_scale_factor = \ - math.cos(alt * degreesToRadians) * z_scale_factor - cos_az_mul_cos_alt_mul_z_scale_factor_mul_254 = \ - 254 * math.cos(azRadians) * cos_alt_mul_z_scale_factor - sin_az_mul_cos_alt_mul_z_scale_factor_mul_254 = \ - 254 * math.sin(azRadians) * cos_alt_mul_z_scale_factor - square_z_scale_factor = z_scale_factor * z_scale_factor - sin_alt_mul_254 = 254.0 * sin_alt - - for j in range(radius, out_ar.shape[0]-radius): - win_line = in_ar[0][j-radius:j+radius+1,:] - for i in range(radius, out_ar.shape[1]-radius): - win = win_line[:,i-radius:i+radius+1].tolist() - x = inv_ewres * ((win[0][0] + win[1][0] + win[1][0] + win[2][0])-\ - (win[0][2] + win[1][2] + win[1][2] + win[2][2])) - y = inv_nsres * ((win[2][0] + win[2][1] + win[2][1] + win[2][2])-\ - (win[0][0] + win[0][1] + win[0][1] + win[0][2])) - xx_plus_yy = x * x + y * y - cang_mul_254 = (sin_alt_mul_254 - \ - (y * cos_az_mul_cos_alt_mul_z_scale_factor_mul_254 - \ - x * sin_az_mul_cos_alt_mul_z_scale_factor_mul_254)) / \ - math.sqrt(1 + square_z_scale_factor * xx_plus_yy) - if cang_mul_254 < 0: - out_ar[j,i] = 1 - else: - out_ar[j,i] = 1 + round(cang_mul_254) - - def hillshade(in_ar, out_ar, xoff, yoff, xsize, ysize, raster_xsize, - raster_ysize, radius, gt, **kwargs): - z = float(kwargs['z_factor']) - scale= float(kwargs['scale']) - hillshade_int(in_ar, out_ar, xoff, yoff, xsize, ysize, raster_xsize, - raster_ysize, radius, gt, z, scale) - -Python module path -++++++++++++++++++ - -When importing modules from inline Python code or when relying on out-of-line -code (PixelFunctionType of the form "module_name.function_name"), you need -to make sure the modules are accessible through the python path. Note that -contrary to the Python interactive interpreter, the current path is not -automatically added when used from GDAL. So you may need to define the -**PYTHONPATH** environment variable if you get ModuleNotFoundError exceptions. + When importing modules from inline Python code or when relying on out-of-line + code (PixelFunctionType of the form "module_name.function_name"), you need + to make sure the modules are accessible through the python path. Note that + contrary to the Python interactive interpreter, the current path is not + automatically added when used from GDAL. So you may need to define the + **PYTHONPATH** environment variable if you get ModuleNotFoundError exceptions. .. _raster_vrt_security_implications: Security implications -********************* ++++++++++++++++++++++ The ability to run Python code potentially opens the door to many potential vulnerabilities if the user of GDAL may process untrusted datasets. To avoid diff --git a/doc/source/drivers/raster/vrt_processed_dataset.rst b/doc/source/drivers/raster/vrt_processed_dataset.rst index b3383126763b..823718ba148d 100644 --- a/doc/source/drivers/raster/vrt_processed_dataset.rst +++ b/doc/source/drivers/raster/vrt_processed_dataset.rst @@ -9,8 +9,9 @@ VRT processed dataset A VRT processed dataset is a specific variant of the :ref:`raster.vrt` format, to apply chained processing steps that may apply to several bands at the same time. -The following built-in algorithms are introduced, and may typically be applied -in the following order: +The following built-in algorithms are available: + +- Expression: evaluate a specified expression (e.g., "(B1+B2)/B3"). Available since GDAL 3.11. - LocalScaleOffset: apply per-pixel gain and offset coming (typically subsampled) from auxiliary datasets. Can be used for dehazing processing. @@ -307,3 +308,50 @@ The following optional arguments may be specified: - ``src_nodata``: Override the input nodata value coming from the previous step (or the input dataset for the first step). - ``dst_nodata``: Set the output nodata value. + +Expression +---------- + +Evaluate an expression using `muparser `__ or, if enabled at compile-time, `ExprTk `__. + +The following argument must be specified: + +- ``expression``: An expression to be evaluated. Band values can be accessed by the variables ``B1``, ``B2``, etc. + Although muparser does not support vector data types, the name ``BANDS`` will be expanded into a list of the + available bands, allowing the use of expressions such as ``sum(BANDS)``. When using ExprTk, the name ``BANDS`` + will be a vector whose (0-indexed) elements can be accessed individually. Expressions may return more than one + value; one output band will be created for each value. The expression must return the same number of values + each time it is invoked. + +The following optional arguments may be specified: + +- ``dialect``: Indicates the library that should be used to evaluate the expression. Defaults to "muparser". + +- ``batch_size``: When set to a value ``N``, the expression will be evaluated independently in groups of + up to ``N`` bands. The result of the expression will be the concatenation of the results from each batch. + +When using ExprTk, the following configuration options are available: + +- .. config:: GDAL_EXPRTK_MAX_EXPRESSION_LENGTH + :default: 100000 + + Indicates the maximum number of characters expression that will be passed to the ExprTk parser. + +- .. config:: GDAL_EXPRTK_MAX_VECTOR_LENGTH + :default: 100000 + + Indicates the maximum length of a vector variables declared within an expression. + +- .. config:: GDAL_EXPRTK_ENABLE_LOOPS + :choices: YES, NO + :default: YES + + Indicates whether looping constructs (for, while, etc.) may be used within an expression. + +- .. config:: GDAL_EXPRTK_TIMEOUT_SECONDS + :default: 1 + + Indicates the maximum per-pixel runtime of an ExprTk expression. ExprTk performs runtime + checks only when loops are used. + + diff --git a/doc/source/spelling_wordlist.txt b/doc/source/spelling_wordlist.txt index f39a583c6268..6d2ed460c8a5 100644 --- a/doc/source/spelling_wordlist.txt +++ b/doc/source/spelling_wordlist.txt @@ -1873,6 +1873,7 @@ multithreaded multithreading Multithreading mundialis +muparser Mutatie mutex Mutex diff --git a/frmts/vrt/CMakeLists.txt b/frmts/vrt/CMakeLists.txt index 8e423afd6d3a..acea9ced4ef2 100644 --- a/frmts/vrt/CMakeLists.txt +++ b/frmts/vrt/CMakeLists.txt @@ -1,3 +1,4 @@ + add_gdal_driver( TARGET gdal_vrt BUILTIN @@ -5,6 +6,7 @@ add_gdal_driver( vrtdataset.h vrtderivedrasterband.cpp vrtdriver.cpp + vrtexpression.h vrtfilters.cpp vrtrasterband.cpp vrtrawrasterband.cpp @@ -22,6 +24,23 @@ gdal_standard_includes(gdal_vrt) target_include_directories(gdal_vrt PRIVATE ${GDAL_RASTER_FORMAT_SOURCE_DIR}/raw $) +if (GDAL_USE_EXPRTK) + target_sources(gdal_vrt PRIVATE vrtexpression_exprtk.cpp) + if (MSVC) + set_source_files_properties(vrtexpression_exprtk.cpp PROPERTIES COMPILE_FLAGS "/bigobj") + elseif(MINGW) + set_source_files_properties(vrtexpression_exprtk.cpp PROPERTIES COMPILE_FLAGS "-Wa,-mbig-obj") + endif() + target_compile_definitions(gdal_vrt PRIVATE GDAL_VRT_ENABLE_EXPRTK) + target_link_libraries(gdal_vrt PRIVATE ExprTk::ExprTk) +endif() + +if (GDAL_USE_MUPARSER) + target_sources(gdal_vrt PRIVATE vrtexpression_muparser.cpp) + gdal_target_link_libraries(gdal_vrt PRIVATE muparser::muparser) + target_compile_definitions(gdal_vrt PRIVATE GDAL_VRT_ENABLE_MUPARSER) +endif() + set(GDAL_DATA_FILES ${CMAKE_CURRENT_SOURCE_DIR}/data/gdalvrt.xsd ) @@ -44,3 +63,4 @@ set_property(SOURCE vrtwarped.cpp PROPERTY SKIP_UNITY_BUILD_INCLUSION ON) if (NOT GDAL_ENABLE_DRIVER_VRT) target_compile_definitions(gdal_vrt PRIVATE -DNO_OPEN) endif() + diff --git a/frmts/vrt/pixelfunctions.cpp b/frmts/vrt/pixelfunctions.cpp index 72e028719691..da919aef1a0f 100644 --- a/frmts/vrt/pixelfunctions.cpp +++ b/frmts/vrt/pixelfunctions.cpp @@ -14,6 +14,7 @@ #include #include "gdal.h" #include "vrtdataset.h" +#include "vrtexpression.h" #include @@ -1617,6 +1618,110 @@ static CPLErr MaxPixelFunc(void **papoSources, int nSources, void *pData, nPixelSpace, nLineSpace, papszArgs); } +static const char pszExprPixelFuncMetadata[] = + "" + " " + " " + " exprtk" + " muparser" + " " + ""; + +static CPLErr ExprPixelFunc(void **papoSources, int nSources, void *pData, + int nXSize, int nYSize, GDALDataType eSrcType, + GDALDataType eBufType, int nPixelSpace, + int nLineSpace, CSLConstList papszArgs) +{ + /* ---- Init ---- */ + + if (GDALDataTypeIsComplex(eSrcType)) + { + CPLError(CE_Failure, CPLE_AppDefined, + "expression cannot by applied to complex data types"); + return CE_Failure; + } + + std::unique_ptr poExpression; + + const char *pszExpression = CSLFetchNameValue(papszArgs, "expression"); + + const char *pszSourceNames = CSLFetchNameValue(papszArgs, "SOURCE_NAMES"); + const CPLStringList aosSourceNames( + CSLTokenizeString2(pszSourceNames, "|", 0)); + + std::vector adfValuesForPixel(nSources); + + const char *pszDialect = CSLFetchNameValue(papszArgs, "dialect"); + if (!pszDialect) + { + pszDialect = "muparser"; + } + + poExpression = gdal::MathExpression::Create(pszExpression, pszDialect); + + // cppcheck-suppress knownConditionTrueFalse + if (!poExpression) + { + return CE_Failure; + } + + { + int iSource = 0; + for (const auto &osName : aosSourceNames) + { + poExpression->RegisterVariable(osName, + &adfValuesForPixel[iSource++]); + } + } + + if (strstr(pszExpression, "BANDS")) + { + poExpression->RegisterVector("BANDS", &adfValuesForPixel); + } + + std::unique_ptr padfResults( + static_cast(VSI_MALLOC2_VERBOSE(nXSize, sizeof(double)))); + if (!padfResults) + return CE_Failure; + + /* ---- Set pixels ---- */ + size_t ii = 0; + for (int iLine = 0; iLine < nYSize; ++iLine) + { + for (int iCol = 0; iCol < nXSize; ++iCol, ++ii) + { + for (int iSrc = 0; iSrc < nSources; iSrc++) + { + // cppcheck-suppress unreadVariable + adfValuesForPixel[iSrc] = + GetSrcVal(papoSources[iSrc], eSrcType, ii); + } + + if (auto eErr = poExpression->Evaluate(); eErr != CE_None) + { + return CE_Failure; + } + else + { + padfResults.get()[iCol] = poExpression->Results()[0]; + } + } + + GDALCopyWords(padfResults.get(), GDT_Float64, sizeof(double), + static_cast(pData) + + static_cast(nLineSpace) * iLine, + eBufType, nPixelSpace, nXSize); + } + + /* ---- Return success ---- */ + return CE_None; +} // ExprPixelFunc + /************************************************************************/ /* GDALRegisterDefaultPixelFunc() */ /************************************************************************/ @@ -1738,5 +1843,7 @@ CPLErr GDALRegisterDefaultPixelFunc() pszMinMaxFuncMetadataNodata); GDALAddDerivedBandPixelFuncWithArgs("max", MaxPixelFunc, pszMinMaxFuncMetadataNodata); + GDALAddDerivedBandPixelFuncWithArgs("expression", ExprPixelFunc, + pszExprPixelFuncMetadata); return CE_None; } diff --git a/frmts/vrt/vrtdataset.h b/frmts/vrt/vrtdataset.h index aa6852b6b80c..daac98c71e7f 100644 --- a/frmts/vrt/vrtdataset.h +++ b/frmts/vrt/vrtdataset.h @@ -190,6 +190,11 @@ class CPL_DLL VRTSource return false; } + virtual const CPLString &GetName() const + { + return m_osName; + } + /** Returns a string with the VRTSource class type. * This method must be implemented in all subclasses */ @@ -199,6 +204,9 @@ class CPL_DLL VRTSource { return CE_None; } + + protected: + CPLString m_osName{}; }; typedef VRTSource *(*VRTSourceParser)(const CPLXMLNode *, const char *, diff --git a/frmts/vrt/vrtderivedrasterband.cpp b/frmts/vrt/vrtderivedrasterband.cpp index 4c58ce962ee1..dfc7816c520c 100644 --- a/frmts/vrt/vrtderivedrasterband.cpp +++ b/frmts/vrt/vrtderivedrasterband.cpp @@ -1364,6 +1364,31 @@ CPLErr VRTDerivedRasterBand::IRasterIO( papszArgs = CSLSetNameValue(papszArgs, pszKey, pszValue); } + CPLString osSourceNames; + for (int iBuffer = 0; iBuffer < nBufferCount; iBuffer++) + { + int iSource = anMapBufferIdxToSourceIdx[iBuffer]; + const VRTSource *poSource = papoSources[iSource]; + + if (iBuffer > 0) + { + osSourceNames += "|"; + } + + const auto &osName = poSource->GetName(); + if (osName.empty()) + { + osSourceNames += "B" + std::to_string(iBuffer + 1); + } + else + { + osSourceNames += osName; + } + } + + papszArgs = + CSLSetNameValue(papszArgs, "SOURCE_NAMES", osSourceNames.c_str()); + eErr = (poPixelFunc->first)( static_cast(pBuffers), nBufferCount, pData, nBufXSize, nBufYSize, eSrcType, eBufType, static_cast(nPixelSpace), diff --git a/frmts/vrt/vrtexpression.h b/frmts/vrt/vrtexpression.h new file mode 100644 index 000000000000..c0919a35750c --- /dev/null +++ b/frmts/vrt/vrtexpression.h @@ -0,0 +1,205 @@ +/****************************************************************************** + * + * Project: Virtual GDAL Datasets + * Purpose: Implementation of MathExpression + * Author: Daniel Baston + * + ****************************************************************************** + * Copyright (c) 2024, ISciences LLC + * + * SPDX-License-Identifier: MIT + ****************************************************************************/ + +#pragma once + +#include "cpl_error.h" + +#include +#include + +namespace gdal +{ + +/** + * Class to support evaluation of a mathematical expression + */ +class MathExpression +{ + public: + virtual ~MathExpression() = default; + + /** + * Create a MathExpression using a specified dialect. + * @param pszExpression The body of the expression, e.g. "X + 3" + * @param pszDialect The expression dialect, e.g. "muparser" + * @return a MathExpression using the specified dialect, or nullptr on error. + */ + static std::unique_ptr Create(const char *pszExpression, + const char *pszDialect); + + /** + * Register a variable to be used in the expression. + * + * The value of the variable may be changed during repeated evaluations of + * the expression, but its location in memory may not. + * + * @param osVariable The name of the variable + * @param pdfLocation The location of the variable's value + * + * @since 3.11 + */ + virtual void RegisterVariable(std::string_view osVariable, + double *pdfLocation) = 0; + + /** + * Register a vector to be used in the expression. + * + * The values and size of the vector may be changed during repeated evaluations + * of the expression, but its location in memory may not. + * + * @param osVariable The name of the vector + * @param padfLocation The location of the vector + * + * @since 3.11 + */ + virtual void RegisterVector(std::string_view osVariable, + std::vector *padfLocation) = 0; + + /** + * Compile the expression. + * + * If not called explicitly, the expression will be compiled the first time + * the expression is evaluated. + * + * @return CE_None if the expression can be successfully parsed and all + * symbols have been registe vrtexpression_exprtk.cpp + vrtexpression_muparser.cppred, CE_Failure otherwise. + * + * @since 3.11 + */ + virtual CPLErr Compile() = 0; + + /** + * Evaluate the expression. + * + * @return CE_None if the expression was successfully evaluated, CE_Failure otherwise. + * + * @since 3.11 + */ + virtual CPLErr Evaluate() = 0; + + /** + * Access the results from the last time the expression was evaluated. + * + * The returned vector may be reused on subsequent evaluations of the expression. + * + * @return a reference to the vector in which results are stored. + * + * @since 3.11 + */ + virtual const std::vector &Results() const = 0; +}; + +/*! @cond Doxygen_Suppress */ + +#if GDAL_VRT_ENABLE_EXPRTK + +/** + * Class to support evaluation of an expression using the exprtk library. + */ +class ExprtkExpression : public MathExpression +{ + public: + explicit ExprtkExpression(std::string_view osExpression); + + virtual ~ExprtkExpression(); + + void RegisterVariable(std::string_view osVariable, + double *pdfLocation) override; + + void RegisterVector(std::string_view osVariable, + std::vector *padfLocation) override; + + CPLErr Compile() override; + + CPLErr Evaluate() override; + + const std::vector &Results() const override; + + private: + class Impl; + + std::unique_ptr m_pImpl; +}; + +#endif + +#if GDAL_VRT_ENABLE_MUPARSER + +/** + * Class to support evaluation of an expression using the muparser library. + */ +class MuParserExpression : public MathExpression +{ + public: + explicit MuParserExpression(std::string_view osExpression); + + virtual ~MuParserExpression(); + + void RegisterVariable(std::string_view osVariable, + double *pdfLocation) override; + + void RegisterVector(std::string_view osVariable, + std::vector *padfLocation) override; + + CPLErr Compile() override; + + CPLErr Evaluate() override; + + const std::vector &Results() const override; + + private: + class Impl; + + std::unique_ptr m_pImpl; +}; + +#endif + +inline std::unique_ptr +MathExpression::Create([[maybe_unused]] const char *pszExpression, + const char *pszDialect) +{ + if (EQUAL(pszDialect, "exprtk")) + { +#if GDAL_VRT_ENABLE_EXPRTK + return std::make_unique(pszExpression); +#else + CPLError(CE_Failure, CPLE_IllegalArg, + "Dialect '%s' is not supported by this GDAL build. A GDAL " + "build with ExprTk is be needed.", + pszDialect); +#endif + } + else if (EQUAL(pszDialect, "muparser")) + { +#if GDAL_VRT_ENABLE_MUPARSER + return std::make_unique(pszExpression); +#else + CPLError(CE_Failure, CPLE_IllegalArg, + "Dialect '%s' is not supported by this GDAL build. A GDAL " + "build with muparser is be needed.", + pszDialect); +#endif + } + else + { + CPLError(CE_Failure, CPLE_IllegalArg, "Unknown expression dialect: %s", + pszDialect); + } + return nullptr; +} + +/*! @endcond */ + +} // namespace gdal diff --git a/frmts/vrt/vrtexpression_exprtk.cpp b/frmts/vrt/vrtexpression_exprtk.cpp new file mode 100644 index 000000000000..f3de3feab295 --- /dev/null +++ b/frmts/vrt/vrtexpression_exprtk.cpp @@ -0,0 +1,380 @@ +/****************************************************************************** + * + * Project: Virtual GDAL Datasets + * Purpose: Implementation of GDALExpressionEvaluator. + * Author: Daniel Baston + * + ****************************************************************************** + * Copyright (c) 2024, ISciences LLC + * + * SPDX-License-Identifier: MIT + ****************************************************************************/ + +#include "cpl_conv.h" +#include "cpl_error.h" +#include "cpl_string.h" +#include "vrtexpression.h" + +#define exprtk_disable_caseinsensitivity +#define exprtk_disable_rtl_io +#define exprtk_disable_rtl_io_file +#define exprtk_disable_rtl_vecops +#define exprtk_disable_string_capabilities + +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnull-dereference" +#endif + +#include + +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + +#include +#include +#include +#include +#include + +namespace gdal +{ + +/*! @cond Doxygen_Suppress */ +struct vector_access_check final : public exprtk::vector_access_runtime_check +{ + bool handle_runtime_violation(violation_context &context) override + { + auto nElements = (static_cast(context.end_ptr) - + static_cast(context.base_ptr)) / + context.type_size; + auto nIndexAccessed = (static_cast(context.access_ptr) - + static_cast(context.base_ptr)) / + context.type_size; + + std::ostringstream oss; + oss << "Attempted to access index " << nIndexAccessed + << " in a vector of " << nElements + << " elements when evaluating VRT expression."; + throw std::runtime_error(oss.str()); + } +}; + +struct loop_timeout_check final : public exprtk::loop_runtime_check +{ + using time_point_t = std::chrono::time_point; + + loop_timeout_check() : exprtk::loop_runtime_check() + { + double dfMaxLoopIterationSeconds = + CPLAtofM(CPLGetConfigOption("GDAL_EXPRTK_TIMEOUT_SECONDS", "1")); + max_duration = std::chrono::microseconds( + static_cast(dfMaxLoopIterationSeconds * 1e6)); + } + + void start_timer() + { + timeout_t = std::chrono::steady_clock::now() + max_duration; + } + + bool check() override + { + + if (++iterations >= max_iters_per_check) + { + if (std::chrono::steady_clock::now() > timeout_t) + { + timeout = true; + return false; + } + + iterations = 0; + } + + return true; + } + + void handle_runtime_violation(const violation_context &) override + { + std::ostringstream oss; + + // current version of exprtk does not report the correct + // violation in case of timeout, so we track the error category + // ourselves + if (timeout) + { + oss << "Expression evaluation time exceeded maximum of " + << static_cast(max_duration.count() / 1e6) + << " seconds. You can increase this threshold by setting the " + << "GDAL_EXPRTK_TIMEOUT_SECONDS configuration " + << "option."; + } + else + { + oss << "Exceeded maximum of " << max_loop_iterations + << " loop iterations."; + } + + throw std::runtime_error(oss.str()); + } + + static constexpr size_t max_iters_per_check = 10000; + size_t iterations = 0; + time_point_t timeout_t{}; + std::chrono::microseconds max_duration{}; + bool timeout{false}; +}; + +class ExprtkExpression::Impl +{ + public: + exprtk::expression m_oExpression{}; + exprtk::parser m_oParser{}; + exprtk::symbol_table m_oSymbolTable{}; + std::string m_osExpression{}; + + std::vector> m_aoVariables{}; + std::vector *>> m_aoVectors{}; + std::vector m_adfResults{}; + vector_access_check m_oVectorAccessCheck{}; + loop_timeout_check m_oLoopRuntimeCheck{}; + + bool m_bIsCompiled{false}; + + explicit Impl() + { + using settings_t = std::decay_t; + + m_oLoopRuntimeCheck.loop_set = loop_timeout_check::e_all_loops; + m_oLoopRuntimeCheck.max_loop_iterations = std::numeric_limits< + decltype(m_oLoopRuntimeCheck.max_loop_iterations)>::max(); + m_oParser.register_vector_access_runtime_check(m_oVectorAccessCheck); + m_oParser.register_loop_runtime_check(m_oLoopRuntimeCheck); + +#ifndef NDEBUG + // Only used for automated testing of GDAL_EXPRTK_TIMEOUT_SECONDS + m_oSymbolTable.add_function("sleep", sleep); +#endif + + int nMaxVectorLength = std::atoi( + CPLGetConfigOption("GDAL_EXPRTK_MAX_VECTOR_LENGTH", "100000")); + + if (nMaxVectorLength > 0) + { + m_oParser.settings().set_max_local_vector_size(nMaxVectorLength); + } + + bool bEnableLoops = + CPLTestBool(CPLGetConfigOption("GDAL_EXPRTK_ENABLE_LOOPS", "YES")); + if (!bEnableLoops) + { + m_oParser.settings().disable_control_structure( + settings_t::e_ctrl_for_loop); + m_oParser.settings().disable_control_structure( + settings_t::e_ctrl_while_loop); + m_oParser.settings().disable_control_structure( + settings_t::e_ctrl_repeat_loop); + } + } + + CPLErr compile() + { + int nMaxExpressionLength = std::atoi( + CPLGetConfigOption("GDAL_EXPRTK_MAX_EXPRESSION_LENGTH", "100000")); + if (m_osExpression.size() > + static_cast(nMaxExpressionLength)) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Expression length of %d exceeds maximum of %d set by " + "GDAL_EXPRTK_MAX_EXPRESSION_LENGTH", + static_cast(m_osExpression.size()), + nMaxExpressionLength); + return CE_Failure; + } + + for (const auto &[osVariable, pdfValueLoc] : m_aoVariables) + { + m_oSymbolTable.add_variable(osVariable, *pdfValueLoc); + } + + for (const auto &[osVariable, padfVectorLoc] : m_aoVectors) + { + m_oSymbolTable.add_vector(osVariable, *padfVectorLoc); + } + + m_oExpression.register_symbol_table(m_oSymbolTable); + bool bSuccess = m_oParser.compile(m_osExpression, m_oExpression); + + if (!bSuccess) + { + for (size_t i = 0; i < m_oParser.error_count(); i++) + { + const auto &oError = m_oParser.get_error(i); + + CPLError(CE_Warning, CPLE_AppDefined, + "Position: %02d " + "Type: [%s] " + "Message: %s\n", + static_cast(oError.token.position), + exprtk::parser_error::to_str(oError.mode).c_str(), + oError.diagnostic.c_str()); + } + + CPLError(CE_Failure, CPLE_AppDefined, + "Failed to parse expression."); + return CE_Failure; + } + + m_bIsCompiled = true; + + return CE_None; + } + + CPLErr evaluate() + { + if (!m_bIsCompiled) + { + auto eErr = compile(); + if (eErr != CE_None) + { + return eErr; + } + } + + m_adfResults.clear(); + double value; + try + { + value = m_oExpression.value(); // force evaluation + } + catch (const std::exception &e) + { + CPLError(CE_Failure, CPLE_AppDefined, "%s", e.what()); + return CE_Failure; + } + + m_oLoopRuntimeCheck.start_timer(); + const auto &results = m_oExpression.results(); + + // We follow a different method to get the result depending on + // how the expression was formed. If a "return" statement was + // used, the result will be accessible via the "result" object. + // If no "return" statement was used, the result is accessible + // from the "value" variable (and must not be a vector.) + if (results.count() == 0) + { + m_adfResults.resize(1); + m_adfResults[0] = value; + } + else if (results.count() == 1) + { + + if (results[0].type == exprtk::type_store::e_scalar) + { + m_adfResults.resize(1); + results.get_scalar(0, m_adfResults[0]); + } + else if (results[0].type == exprtk::type_store::e_vector) + { + results.get_vector(0, m_adfResults); + } + else + { + CPLError(CE_Failure, CPLE_AppDefined, + "Expression returned an unexpected type."); + return CE_Failure; + } + } + else + { + m_adfResults.resize(results.count()); + for (size_t i = 0; i < results.count(); i++) + { + if (results[i].type != exprtk::type_store::e_scalar) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Expression must return a vector or a list of " + "scalars."); + return CE_Failure; + } + else + { + results.get_scalar(i, m_adfResults[i]); + } + } + } + + return CE_None; + } + + private: +#ifndef NDEBUG + struct sleep_fn : public exprtk::ifunction + { + sleep_fn() : exprtk::ifunction(1) + { + } + + using exprtk::ifunction::operator(); + + double operator()(const double &seconds) override + { + std::this_thread::sleep_for( + std::chrono::microseconds(static_cast(seconds * 1e6))); + return 0; + } + }; + + sleep_fn sleep{}; +#endif +}; + +/** + * Define an expression to be evaluated using the exprtk library. + * + * @param osExpression the expression to evaluate. Refer to exprtk library documentation + * for details of the allowable syntax. + * + * @since 3.11 + */ +ExprtkExpression::ExprtkExpression(std::string_view osExpression) + : m_pImpl(std::make_unique()) +{ + m_pImpl->m_osExpression = osExpression; +} + +ExprtkExpression::~ExprtkExpression() +{ +} + +void ExprtkExpression::RegisterVariable(std::string_view osVariable, + double *pdfValue) +{ + m_pImpl->m_aoVariables.emplace_back(osVariable, pdfValue); +} + +void ExprtkExpression::RegisterVector(std::string_view osVariable, + std::vector *padfValue) +{ + m_pImpl->m_aoVectors.emplace_back(osVariable, padfValue); +} + +CPLErr ExprtkExpression::Compile() +{ + return m_pImpl->compile(); +} + +const std::vector &ExprtkExpression::Results() const +{ + return m_pImpl->m_adfResults; +} + +CPLErr ExprtkExpression::Evaluate() +{ + return m_pImpl->evaluate(); +} + +/*! @endcond Doxygen_Suppress */ + +} // namespace gdal diff --git a/frmts/vrt/vrtexpression_muparser.cpp b/frmts/vrt/vrtexpression_muparser.cpp new file mode 100644 index 000000000000..6245f7115aea --- /dev/null +++ b/frmts/vrt/vrtexpression_muparser.cpp @@ -0,0 +1,169 @@ +/****************************************************************************** + * + * Project: Virtual GDAL Datasets + * Purpose: Implementation of GDALExpressionEvaluator. + * Author: Daniel Baston + * + ****************************************************************************** + * Copyright (c) 2024, ISciences LLC + * + * SPDX-License-Identifier: MIT + ****************************************************************************/ + +#include "vrtexpression.h" +#include "cpl_string.h" + +#include +#include + +namespace gdal +{ + +/*! @cond Doxygen_Suppress */ +class MuParserExpression::Impl +{ + public: + explicit Impl(std::string_view osExpression) + : m_osExpression(std::string(osExpression)), m_oVectors{}, m_oParser{}, + m_adfResults{1}, m_bIsCompiled{false} + { + } + + void Register(std::string_view osVariable, double *pdfValue) + { + m_oParser.DefineVar(std::string(osVariable), pdfValue); + } + + CPLErr Compile() + { + try + { + CPLString tmpExpression(m_osExpression); + + for (const auto &[osVec, osElems] : m_oVectors) + { + tmpExpression.replaceAll(osVec, osElems); + } + + m_oParser.SetExpr(tmpExpression); + } + catch (const mu::Parser::exception_type &e) + { + CPLError(CE_Failure, CPLE_AppDefined, "%s", e.GetMsg().c_str()); + return CE_Failure; + } + catch (const std::exception &e) + { + CPLError(CE_Failure, CPLE_AppDefined, "%s", e.what()); + return CE_Failure; + } + + return CE_None; + } + + CPLErr Evaluate() + { + if (!m_bIsCompiled) + { + if (auto eErr = Compile(); eErr != CE_None) + { + return eErr; + } + + m_bIsCompiled = true; + } + + try + { + int nResults; + const double *dfResults = m_oParser.Eval(nResults); + m_adfResults.resize(nResults); + std::copy(dfResults, dfResults + nResults, m_adfResults.begin()); + } + catch (const mu::Parser::exception_type &e) + { + CPLError(CE_Failure, CPLE_AppDefined, "%s", e.GetMsg().c_str()); + return CE_Failure; + } + catch (const std::exception &e) + { + CPLError(CE_Failure, CPLE_AppDefined, "%s", e.what()); + return CE_Failure; + } + + return CE_None; + } + + CPLString m_osExpression; + std::map m_oVectors; + mu::Parser m_oParser; + std::vector m_adfResults; + bool m_bIsCompiled; +}; + +MuParserExpression::MuParserExpression(std::string_view osExpression) + : m_pImpl{std::make_unique(osExpression)} + +{ +} + +MuParserExpression::~MuParserExpression() +{ +} + +CPLErr MuParserExpression::Compile() +{ + return m_pImpl->Compile(); +} + +void MuParserExpression::RegisterVariable(std::string_view osVariable, + double *pdfValue) +{ + m_pImpl->Register(osVariable, pdfValue); +} + +void MuParserExpression::RegisterVector(std::string_view osVariable, + std::vector *padfValues) +{ + // muparser does not support vector variables, so we simulate them + // by creating a scalar variable for each element, and then replacing + // the name of the vector by a list of its elements before compiling + // the expression. + CPLString osElementVarName; + CPLString osElementsList; + std::string osVectorVarName(osVariable); + + int nElementVarNameLength = static_cast( + 4 + osVectorVarName.size() + std::log10(padfValues->size())); + osElementsList.reserve(padfValues->size() * + (1 + nElementVarNameLength)); // +1 for commas + + for (std::size_t i = 0; i < padfValues->size(); i++) + { + osElementVarName.Printf("__%s_%d", osVectorVarName.c_str(), + static_cast(i)); + RegisterVariable(osElementVarName, padfValues->data() + i); + + if (i > 0) + { + osElementsList += ","; + } + osElementsList += osElementVarName; + } + + m_pImpl->m_oVectors[std::string(osVariable)] = osElementsList; +} + +CPLErr MuParserExpression::Evaluate() +{ + return m_pImpl->Evaluate(); +} + +const std::vector &MuParserExpression::Results() const +{ + return m_pImpl->m_adfResults; +} + +/*! @endcond Doxygen_Suppress */ + +} // namespace gdal diff --git a/frmts/vrt/vrtprocesseddataset.cpp b/frmts/vrt/vrtprocesseddataset.cpp index ce84a6ac5c57..08ab4bc878c1 100644 --- a/frmts/vrt/vrtprocesseddataset.cpp +++ b/frmts/vrt/vrtprocesseddataset.cpp @@ -683,9 +683,11 @@ CPLErr VRTProcessedDataset::Init(const CPLXMLNode *psTree, // Should not happen frequently as pixel init functions are expected // to validate that they can accept the number of output bands provided // to them - CPLError(CE_Failure, CPLE_AppDefined, - "Number of output bands of last step is not consistent with " - "number of VRTProcessedRasterBand's"); + CPLError( + CE_Failure, CPLE_AppDefined, + "Number of output bands of last step (%d) is not consistent with " + "number of VRTProcessedRasterBand's (%d)", + nCurrentBandCount, nBands); return CE_Failure; } diff --git a/frmts/vrt/vrtprocesseddatasetfunctions.cpp b/frmts/vrt/vrtprocesseddatasetfunctions.cpp index f6fb184fb1f6..1f8465b979b2 100644 --- a/frmts/vrt/vrtprocesseddatasetfunctions.cpp +++ b/frmts/vrt/vrtprocesseddatasetfunctions.cpp @@ -13,10 +13,13 @@ #include "cpl_minixml.h" #include "cpl_string.h" #include "vrtdataset.h" +#include "vrtexpression.h" #include +#include #include #include +#include #include #include @@ -1468,6 +1471,275 @@ static CPLErr TrimmingProcess( return CE_None; } +/************************************************************************/ +/* ExpressionInit() */ +/************************************************************************/ + +namespace +{ + +class ExpressionData +{ + public: + ExpressionData(int nInBands, int nBatchSize, std::string_view osExpression, + std::string_view osDialect) + : m_nInBands(nInBands), m_nNominalBatchSize(nBatchSize), + m_nBatchCount(DIV_ROUND_UP(nInBands, nBatchSize)), m_adfResults{}, + m_osExpression(std::string(osExpression)), + m_osDialect(std::string(osDialect)), m_oNominalBatchEnv{}, + m_oPartialBatchEnv{} + { + } + + CPLErr Compile() + { + auto eErr = m_oNominalBatchEnv.Initialize(m_osExpression, m_osDialect, + m_nNominalBatchSize); + if (eErr != CE_None) + { + return eErr; + } + + const auto nPartialBatchSize = m_nInBands % m_nNominalBatchSize; + if (nPartialBatchSize) + { + eErr = m_oPartialBatchEnv.Initialize(m_osExpression, m_osDialect, + nPartialBatchSize); + } + + return eErr; + } + + CPLErr Evaluate(const double *padfInputs, size_t nExpectedOutBands) + { + m_adfResults.clear(); + + for (int iBatch = 0; iBatch < m_nBatchCount; iBatch++) + { + const auto nBandsRemaining = + static_cast(m_nInBands - (m_nNominalBatchSize * iBatch)); + const auto nBatchSize = + std::min(m_nNominalBatchSize, nBandsRemaining); + + auto &oEnv = GetEnv(nBatchSize); + + const double *pdfStart = padfInputs + iBatch * m_nNominalBatchSize; + const double *pdfEnd = pdfStart + nBatchSize; + + std::copy(pdfStart, pdfEnd, oEnv.m_adfValuesForPixel.begin()); + + if (auto eErr = oEnv.m_poExpression->Evaluate(); eErr != CE_None) + { + return eErr; + } + + const auto &adfResults = oEnv.m_poExpression->Results(); + if (m_nBatchCount > 1) + { + std::copy(adfResults.begin(), adfResults.end(), + std::back_inserter(m_adfResults)); + } + } + + if (nExpectedOutBands > 0) + { + if (Results().size() != static_cast(nExpectedOutBands)) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Expression returned %d values but " + "%d output bands were expected.", + static_cast(Results().size()), + static_cast(nExpectedOutBands)); + return CE_Failure; + } + } + + return CE_None; + } + + const std::vector &Results() const + { + if (m_nBatchCount == 1) + { + return m_oNominalBatchEnv.m_poExpression->Results(); + } + else + { + return m_adfResults; + } + } + + private: + const int m_nInBands; + const int m_nNominalBatchSize; + const int m_nBatchCount; + std::vector m_adfResults; + + const CPLString m_osExpression; + const CPLString m_osDialect; + + struct InvocationEnv + { + std::vector m_adfValuesForPixel; + std::unique_ptr m_poExpression; + + CPLErr Initialize(const CPLString &osExpression, + const CPLString &osDialect, int nBatchSize) + { + m_poExpression = + gdal::MathExpression::Create(osExpression, osDialect.c_str()); + // cppcheck-suppress knownConditionTrueFalse + if (m_poExpression == nullptr) + { + return CE_Failure; + } + + m_adfValuesForPixel.resize(nBatchSize); + + for (int i = 0; i < nBatchSize; i++) + { + std::string osVar = "B" + std::to_string(i + 1); + m_poExpression->RegisterVariable(osVar, + &m_adfValuesForPixel[i]); + } + + if (osExpression.ifind("BANDS") != std::string::npos) + { + m_poExpression->RegisterVector("BANDS", &m_adfValuesForPixel); + } + + return m_poExpression->Compile(); + } + }; + + InvocationEnv &GetEnv(int nBatchSize) + { + if (nBatchSize == m_nNominalBatchSize) + { + return m_oNominalBatchEnv; + } + else + { + return m_oPartialBatchEnv; + } + } + + InvocationEnv m_oNominalBatchEnv; + InvocationEnv m_oPartialBatchEnv; +}; + +} // namespace + +static CPLErr ExpressionInit(const char * /*pszFuncName*/, void * /*pUserData*/, + CSLConstList papszFunctionArgs, int nInBands, + GDALDataType eInDT, double * /* padfInNoData */, + int *pnOutBands, GDALDataType *peOutDT, + double ** /* ppadfOutNoData */, + const char * /* pszVRTPath */, + VRTPDWorkingDataPtr *ppWorkingData) +{ + CPLAssert(eInDT == GDT_Float64); + + *peOutDT = eInDT; + *ppWorkingData = nullptr; + + const char *pszBatchSize = + CSLFetchNameValue(papszFunctionArgs, "batch_size"); + auto nBatchSize = nInBands; + + if (pszBatchSize != nullptr) + { + nBatchSize = std::min(nInBands, std::atoi(pszBatchSize)); + } + + if (nBatchSize < 1) + { + CPLError(CE_Failure, CPLE_IllegalArg, "batch_size must be at least 1"); + return CE_Failure; + } + + const char *pszDialect = CSLFetchNameValue(papszFunctionArgs, "dialect"); + if (pszDialect == nullptr) + { + pszDialect = "muparser"; + } + + const char *pszExpression = + CSLFetchNameValue(papszFunctionArgs, "expression"); + + auto data = std::make_unique(nInBands, nBatchSize, + pszExpression, pszDialect); + + if (auto eErr = data->Compile(); eErr != CE_None) + { + return eErr; + } + + if (*pnOutBands == 0) + { + std::vector aDummyValues(nInBands); + if (auto eErr = data->Evaluate(aDummyValues.data(), 0); eErr != CE_None) + { + return eErr; + } + + *pnOutBands = static_cast(data->Results().size()); + } + + *ppWorkingData = data.release(); + + return CE_None; +} + +static void ExpressionFree(const char * /* pszFuncName */, + void * /* pUserData */, + VRTPDWorkingDataPtr pWorkingData) +{ + ExpressionData *data = static_cast(pWorkingData); + delete data; +} + +static CPLErr ExpressionProcess( + const char * /* pszFuncName */, void * /* pUserData */, + VRTPDWorkingDataPtr pWorkingData, CSLConstList /* papszFunctionArgs */, + int nBufXSize, int nBufYSize, const void *pInBuffer, + size_t /* nInBufferSize */, GDALDataType eInDT, int nInBands, + const double *CPL_RESTRICT /* padfInNoData */, void *pOutBuffer, + size_t /* nOutBufferSize */, GDALDataType eOutDT, int nOutBands, + const double *CPL_RESTRICT /* padfOutNoData */, double /* dfSrcXOff */, + double /* dfSrcYOff */, double /* dfSrcXSize */, double /* dfSrcYSize */, + const double /* adfSrcGT */[], const char * /* pszVRTPath "*/, + CSLConstList /* papszExtra */) +{ + ExpressionData *expr = static_cast(pWorkingData); + + const size_t nElts = static_cast(nBufXSize) * nBufYSize; + + CPL_IGNORE_RET_VAL(eInDT); + CPLAssert(eInDT == GDT_Float64); + const double *CPL_RESTRICT padfSrc = static_cast(pInBuffer); + + CPLAssert(eOutDT == GDT_Float64); + CPL_IGNORE_RET_VAL(eOutDT); + double *CPL_RESTRICT padfDst = static_cast(pOutBuffer); + + for (size_t i = 0; i < nElts; i++) + { + if (auto eErr = expr->Evaluate(padfSrc, nOutBands); eErr != CE_None) + { + return eErr; + } + + const auto &adfResults = expr->Results(); + std::copy(adfResults.begin(), adfResults.end(), padfDst); + + padfDst += nOutBands; + padfSrc += nInBands; + } + + return CE_None; +} + /************************************************************************/ /* GDALVRTRegisterDefaultProcessedDatasetFuncs() */ /************************************************************************/ @@ -1573,4 +1845,17 @@ void GDALVRTRegisterDefaultProcessedDatasetFuncs() "", GDT_Float64, nullptr, 0, nullptr, 0, TrimmingInit, TrimmingFree, TrimmingProcess, nullptr); + + GDALVRTRegisterProcessedDatasetFunc( + "Expression", nullptr, + "" + " " + " " + " " + "", + GDT_Float64, nullptr, 0, nullptr, 0, ExpressionInit, ExpressionFree, + ExpressionProcess, nullptr); } diff --git a/frmts/vrt/vrtsources.cpp b/frmts/vrt/vrtsources.cpp index c6dbbc2ec4e6..4418920ce14d 100644 --- a/frmts/vrt/vrtsources.cpp +++ b/frmts/vrt/vrtsources.cpp @@ -568,6 +568,7 @@ CPLErr VRTSimpleSource::XMLInit(const CPLXMLNode *psSrc, const char *pszVRTPath, m_poMapSharedSources = &oMapSharedSources; m_osResampling = CPLGetXMLValue(psSrc, "resampling", ""); + m_osName = CPLGetXMLValue(psSrc, "name", ""); /* -------------------------------------------------------------------- */ /* Prepare filename. */ diff --git a/port/cpl_known_config_options.h b/port/cpl_known_config_options.h index c866bad5b3b5..2239afa9be24 100644 --- a/port/cpl_known_config_options.h +++ b/port/cpl_known_config_options.h @@ -261,6 +261,10 @@ constexpr static const char* const apszKnownConfigOptions[] = "GDAL_ENABLE_TIFF_SPLIT", // from gtiffdataset_read.cpp "GDAL_ENABLE_WMS_CACHE", // from gdalwmsdataset.cpp "GDAL_ERROR_ON_LIBJPEG_WARNING", // from jpgdataset.cpp + "GDAL_EXPRTK_ENABLE_LOOPS", // from vrtexpression_exprtk.cpp + "GDAL_EXPRTK_MAX_EXPRESSION_LENGTH", // from vrtexpression_exprtk.cpp + "GDAL_EXPRTK_MAX_VECTOR_LENGTH", // from vrtexpression_exprtk.cpp + "GDAL_EXPRTK_TIMEOUT_SECONDS", // from vrtexpression_exprtk.cpp "GDAL_FILENAME_IS_UTF8", // from cpl_getexecpath.cpp, cpl_odbc.cpp, cpl_vsil_win32.cpp, cpl_vsisimple.cpp, cplgetsymbol.cpp, ecwcreatecopy.cpp, ecwdataset.cpp, gdalpython.cpp, netcdfdataset.cpp, netcdfmultidim.cpp, ogrxlsdatasource.cpp "GDAL_FORCE_CACHING", // from gdaldataset.cpp, gdalrasterband.cpp "GDAL_GCPS_TO_GEOTRANSFORM_APPROX_OK", // from gdal_misc.cpp