From 65ed3828d1d3decb9960a77123ac4e447f791cee Mon Sep 17 00:00:00 2001 From: Redmer Kronemeijer <12477216+redmer@users.noreply.github.com> Date: Fri, 27 Oct 2023 16:29:58 +0200 Subject: [PATCH 1/4] feat: rename bbox parameters fixes: Make `--bounding-box` less ideosyncratic #18 --- README.md | 10 ++++---- src/bounding-box.ts | 61 +++++++++++++++++++++++++++++++++++++-------- src/cli.ts | 12 ++++----- 3 files changed, 62 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 1ff7720..1c80343 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,8 @@ Options: --version Show version number [boolean] -i, --input GeoPackage file [string] -o, --output Output quads file [string] - --bounding-box Limit features to bounding box [string] - --bounding-box-crs Coordinate Reference System code [string] + --bbox Limit features to bounding box [string] + --bbox-crs Coordinate Reference System code [string] --only-layers Only output named feature layers and attribute ta bles [array] --base-iri Base IRI [string] @@ -32,9 +32,9 @@ Options: ## Options -Limit **large GeoPackages** with `--bounding-box`. +Limit **large GeoPackages** with `--bbox`. Supply a space separated list of coordinates as a string to limit the Features returned. -Provide the bounding box as WGS84 (GeoJSON default) or supply a CRS code (lookup via EPSG.io) or Projection WKT with `--bounding-box-crs`. +Provide the bounding box as WGS84 (GeoJSON default) or supply a CRS code (lookup via EPSG.io) or Projection WKT with `--bbox-crs`. You can also **limit** which feature **layers** (or attribute tables) are output with `--only-layers`. **NULL values** are never output and **binary values** are skipped, unless `--include-binary-values` is provided. @@ -60,7 +60,7 @@ That Geometry in turn has a `geo:asGeoJSON` and `geo:asWKT` representations of t Column metadata is very limited and most values are not typed properly. Example data abridged [from NGA][example.gpkg]: -the table `media`is a feature table, `nga_properties` is an attribute table. +the table `media` is a feature table, `nga_properties` is an attribute table. ```trig xyz:media { diff --git a/src/bounding-box.ts b/src/bounding-box.ts index 462d7b1..89f500b 100644 --- a/src/bounding-box.ts +++ b/src/bounding-box.ts @@ -42,20 +42,61 @@ export async function getWGS84Converter( } } -export function suppliedBoundingBox( +function spaceSepBbox( + bbstring: string, + srs: proj4.Converter | string, +): BoundingBox { + const [west, east, south, north] = bbstring + .split(" ", 4) + .map((c) => Number(c)); + const bb = new BoundingBox(west, east, south, north); + return bb.projectBoundingBox(srs, WGS84_CODE); +} + +function commaSepBbox( bbstring: string, - inCRS: proj4.Converter | string, -) { + srs: proj4.Converter | string, +): BoundingBox { + const parts = bbstring.split(","); + let west: string, + east: string, + __1: string, + south: string, + north: string, + __2: string; + if (parts.length == 4) [west, east, south, north] = parts; + else [west, east, __1, south, north, __2] = parts; + + const bb = new BoundingBox( + Number(west), + Number(east), + Number(south), + Number(north), + ); + return bb.projectBoundingBox(srs, WGS84_CODE); +} + +/** + * Convert a supplied bbox definition string to a {BoundingBox}. + * + * There are two types of bbox definition strings: + * 1. Four parts, space separated (deprecated) + * 2. Four or six parts, comma separated. (3rd axis ignored) + * + * @param bboxString Bouding box provided string + * @param srs The SRS in which to interpret this bboxstring + */ +export function suppliedBoundingBox( + bboxString: string, + srs: proj4.Converter | string, +): BoundingBox { try { - const [west, east, south, north] = bbstring - .split(" ", 4) - .map((c) => Number(c)); - const bb = new BoundingBox(west, east, south, north); - return bb.projectBoundingBox(inCRS, WGS84_CODE); + if (bboxString.includes(" ")) return spaceSepBbox(bboxString, srs); + return commaSepBbox(bboxString, srs); } catch (e) { Bye( - `Bounding box could not be parsed. Provide as a single space-separated string:`, - `"{min long (west)} {max long (east)} {min lat (south)} {max lat (north)}".`, + `Bounding box could not be parsed. Provide a single comma-separated string:`, + `"{min long (west)},{max long (east)},{min lat (south)},{max lat (north)}".`, ); } } diff --git a/src/cli.ts b/src/cli.ts index 5b52f89..51b868d 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -41,11 +41,11 @@ async function cli() { }) .option("output", { alias: "o", type: "string", desc: "Output quads file" }) .normalize(["input", "output"]) - .option("bounding-box", { + .option("bbox", { type: "string", desc: "Limit features to bounding box", }) - .option("bounding-box-crs", { + .option("bbox-crs", { type: "string", desc: `Coordinate Reference System code`, }) @@ -87,11 +87,11 @@ async function cli() { // If there's a bounding box CRS defined, first check if we can parse it. // This is less expensive than converting quads etc. // TODO: Can we remove this reference to WGS84? - const bboxConverter = argv.boundingBoxCrs - ? await getWGS84Converter(argv.boundingBoxCrs) + const bboxConverter = argv.bboxCrs + ? await getWGS84Converter(argv.bboxCrs) : await getWGS84Converter(WGS84_CODE); - const boundingBox = argv.boundingBox - ? suppliedBoundingBox(argv.boundingBox, bboxConverter) + const boundingBox = argv.bbox + ? suppliedBoundingBox(argv.bbox, bboxConverter) : undefined; // If there's a target file, open a write stream and determine the mimetype off of it. From f104558669882c3a4308d7d57797650a18a7d0b3 Mon Sep 17 00:00:00 2001 From: Redmer Kronemeijer <12477216+redmer@users.noreply.github.com> Date: Fri, 27 Oct 2023 16:38:31 +0200 Subject: [PATCH 2/4] 1.3.0 --- package-lock.json | 4 ++-- package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index 6e9d32b..681092f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@rdmr-eu/rdf-geopackage", - "version": "1.2.1", + "version": "1.3.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@rdmr-eu/rdf-geopackage", - "version": "1.2.1", + "version": "1.3.0", "license": "MPL-2.0", "dependencies": { "@ngageoint/geopackage": "^4.2.4", diff --git a/package.json b/package.json index 682eb50..87415bd 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@rdmr-eu/rdf-geopackage", - "version": "1.2.1", + "version": "1.3.0", "description": "Generate RDF out of a GeoPackage (for further processing)", "repository": "https://github.com/redmer/rdf-geopackage.git", "main": "dist/rdf-geopackage.js", From f12c14323965185cc85b80d17ac87da8ff19d745 Mon Sep 17 00:00:00 2001 From: Redmer Kronemeijer <12477216+redmer@users.noreply.github.com> Date: Tue, 31 Oct 2023 10:16:48 +0100 Subject: [PATCH 3/4] refactor: improve module interfacing (#25) This PR refactors 1. the data meta-model module interface 2. the default Facade-X data meta-model according to this interface 3. the GeoSPARQL output to be according to this interface (1) prepares for future modules by enabling more encapsulation of a module class. (2) improves the locality of the Facade-X output. That in turn makes it easier to make new modules based off the Facade-X module. (3) makes clearer how geometries are generated, when they're reprojected and how they're output. --- README.md | 105 +++++++----- package-lock.json | 34 +++- package.json | 3 +- src/cli-error.ts | 15 ++ src/cli.ts | 27 +-- src/geopackage.ts | 51 +++--- src/interfaces.ts | 46 ++++++ src/models/facade-x/facade-x.ts | 183 +++++++++++++++++++++ src/models/facade-x/rdf-attribute-table.ts | 27 --- src/models/facade-x/rdf-feature-table.ts | 108 ------------ src/models/facade-x/rdf-geopackage.ts | 56 ------- src/models/facade-x/rdf-table-common.ts | 70 -------- src/models/geosparql/geojson.ts | 57 +++++++ src/models/geosparql/wkt.ts | 43 +++++ src/models/models-registry.ts | 97 +++++++++++ src/models/models.ts | 42 ----- src/models/utils.ts | 20 +++ src/rdf-stream-override.ts | 4 +- 18 files changed, 595 insertions(+), 393 deletions(-) create mode 100644 src/interfaces.ts create mode 100644 src/models/facade-x/facade-x.ts delete mode 100644 src/models/facade-x/rdf-attribute-table.ts delete mode 100644 src/models/facade-x/rdf-feature-table.ts delete mode 100644 src/models/facade-x/rdf-geopackage.ts delete mode 100644 src/models/facade-x/rdf-table-common.ts create mode 100644 src/models/geosparql/geojson.ts create mode 100644 src/models/geosparql/wkt.ts create mode 100644 src/models/models-registry.ts delete mode 100644 src/models/models.ts create mode 100644 src/models/utils.ts diff --git a/README.md b/README.md index 1ff7720..acfda5f 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Generate RDF out of a GeoPackage (for further processing) Install using NPM locally `npm install --global @rdmr-eu/rdf-geopackage` as a command line tool. Check if it's installed correctly with `rdf-geopackage --help`. -That should return the following help info. +That should return the following help info: ```man Generate RDF from an OGC GeoPackage with rdf-geopackage @@ -19,76 +19,101 @@ Options: --version Show version number [boolean] -i, --input GeoPackage file [string] -o, --output Output quads file [string] + --format Override output format (default: nquads) + [choices: "nq", "nquads", "trig", "nt", "ntriples", "ttl", "turtle"] --bounding-box Limit features to bounding box [string] --bounding-box-crs Coordinate Reference System code [string] --only-layers Only output named feature layers and attribute ta bles [array] - --base-iri Base IRI [string] - --format Override output format (default: nquads) - [choices: "nq", "nquads", "trig", "nt", "ntriples", "ttl", "turtle"] --include-binary-values Output binary values [boolean] + --base-iri Base IRI [string] --model Data meta model [choices: "facade-x"] ``` ## Options -Limit **large GeoPackages** with `--bounding-box`. -Supply a space separated list of coordinates as a string to limit the Features returned. -Provide the bounding box as WGS84 (GeoJSON default) or supply a CRS code (lookup via EPSG.io) or Projection WKT with `--bounding-box-crs`. +Basic input and output serializations can be set with the following options: + +- `--input`: the path to the input GeoPackage file (required). With `-`, it reads the GeoPackage from stdin, e.g., piping a file with curl +- `--output`: path to the file output. By default, `rdf-geopackage` outputs _nquads_ to stdout. Its extension sets the serialization format, optionally with `.gz` to GZip the output. E.g., `--output myfile.ttl.gz` +- `--format`: set the output format explicitly. Provide a file extension with `.gz` to GZip the output. -You can also **limit** which feature **layers** (or attribute tables) are output with `--only-layers`. -**NULL values** are never output and **binary values** are skipped, unless `--include-binary-values` is provided. -Binary values are Base64-encoded string values with a `xsd:base64Binary` datatype. +Work with large GeoPackages by limiting the output features, output tables and binary values: -By default, **output** is directed to stdout as N-Quads. Provide `--output` to save the triples or quads to a file. -The **serialization format** is recognized from the file extension but can be overriden with `--format`. -Add `.gz` after the extension (e.g. `mydata.ttls.gz`) to **GZip** the output. +- `--bounding-box` limits the the output features to those in this area (default CRS: WGS84) +- `--bounding-box-crs` indicates the CRS for the aforementioned bounding box. Supply a EPSG code (web lookup with EPSG.io) or a projection WKT. +- `--only-layers` limits which feature layers (or attribute tables!) are output. +- `--include-binary-values` overrides the default of skipping binary values. These will be base64 encoded string values with a `^^xsd:base64Binary` data type. NULL values are never output. -Provide the path to the **input file** with `--input`. -You may also pipe in a file to rdf-geopackage. +Modify the model and types of the output triples or quads: -The generated quads follow a **data meta-model**, supplied by `--model` and by default `facade-x` with GeoSPARQL. -Override the **base IRI** with `--base-iri` to let subject-URLs not be derived from the present working directory. +- `--base-iri`: set the relative base for the output RDF data. By default, this value is derived from the present working directory. +- `--model`: the GeoPackage tables are not natively RDF data, so a module is programmed to generating triples according to a data meta-model. Included modules: + - default: [`facade-x`](#model-facade-x) -## Model: Facade-X +## RDF output -Facade-X is a data meta-model from the SPARQL-Anything project, that can represent tabular data easily. -The built-in data meta-model `facade-x` extends the tabular representation with [GeoSPARQL][geosparql] for geographical information from feature tables. +#### Model: Facade-X + +Facade-X is a data meta-model from the SPARQL-Anything project, that can easily represent tabular data. Facade-X uses RDF containers and blank nodes to represent tables and rows. -Features are `geo:Feature`s with a `geo:hasDefaultGeometry` that refers to a `geo:Geometry`. -That Geometry in turn has a `geo:asGeoJSON` and `geo:asWKT` representations of their geometry in WGS84 (GeoJSON-default). +Column metadata is currently very limited ([GH-24]) and many values are not typed properly. + +[GH-24]: https://github.com/redmer/rdf-geopackage/issues/24 + +#### Features, geometries and CRS’s + +Features and their geometries are represented using [GeoSPARQL][geosparql]. +Only rows from feature tables are a `geo:Feature`. + +A feature has zero or more geometries predicated with `geo:hasDefaultGeometry`. +There might be no geometry if the underlying library does not support the geometry type. +There may be multiple geometries if the feature is from a layer not in EPSG:4326. + +That's because a GeoJSON serialization (`geo:asGeoJSON`) is always (reprojected) in EPSG:4326. +A `geo:Geometry` can be in only one CRS, meaning that when the feature is not originally in EPSG:4326, other serializations should also be reprojected. +That is undesirable, so in these cases, `rdf-geopackage` generates a second `geo:Geometry` for the WKT serialization (`geo:asWKT`). + +[geosparql]: https://www.ogc.org/standard/geosparql/ + +#### Example RDF output -Column metadata is very limited and most values are not typed properly. Example data abridged [from NGA][example.gpkg]: -the table `media`is a feature table, `nga_properties` is an attribute table. +the table `media` is a feature table, `nga_properties` is an attribute table. + +[example.gpkg]: https://github.com/ngageoint/GeoPackage/blob/master/docs/examples/java/example.gpkg + +```turtle +prefix fx: <http://sparql.xyz/facade-x/ns/> +prefix geo: <http://www.opengis.net/ont/geosparql#> +prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +prefix xsd: <http://www.w3.org/2001/XMLSchema#> +prefix xyz: <http://sparql.xyz/facade-x/data/> + +xyz:nga_properties { # representing a table +xyz:nga_properties a fx:root ; # representing a table + rdf:_1 [ # the first row + xyz:id 14; + xyz:property "subject"; + xyz:value "Examples" + ] . +} -```trig xyz:media { xyz:media a fx:root ; rdf:_1 [ - a geo:Feature ; + a geo:Feature ; # a row from a feature table xyz:text "BIT Systems"; xyz:date "2023-01-23"; - geo:hasDefaultGeometry [ + geo:hasDefaultGeometry [ # single geometry as CRS is EPSG:4326 a geo:Geometry ; - geo:asWKT "POINT (-104.801918 39.720014)"^^geo:wktLiteral + geo:asWKT "POINT (-104.801918 39.720014)"^^geo:wktLiteral ; + geo:asGeoJSON "{\"coordinates\":[-104.801918,39.720014],\"type\":\"Point\"}"^^geo:geoJSONLiteral ] ] . } - -xyz:nga_properties { -xyz:nga_properties a fx:root ; - rdf:_1 [ - xyz:id 14; - xyz:property "subject"; - xyz:value "Examples" - ] . -} ``` -[geosparql]: https://www.ogc.org/standard/geosparql/ -[example.gpkg]: https://github.com/ngageoint/GeoPackage/blob/master/docs/examples/java/example.gpkg - # Acknowledgements This tool was developed for a project funded by the [_City Deal Openbare ruimte_][cdor], diff --git a/package-lock.json b/package-lock.json index 681092f..769c6cd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,14 +11,15 @@ "dependencies": { "@ngageoint/geopackage": "^4.2.4", "better-sqlite3": "^8.7.0", - "geojson": "^0.5.0", "json-stable-stringify": "^1.0.2", "n3": "^1.17.1", "node-fetch": "^3.3.2", "proj4": "^2.9.0", "rdf-data-factory": "^1.1.2", "rdf-literal": "^1.3.1", + "reproject": "^1.2.7", "supports-color": "^9.4.0", + "wkx": "^0.5.0", "yargs": "^17.7.2" }, "bin": { @@ -4010,14 +4011,6 @@ "node": ">=6.9.0" } }, - "node_modules/geojson": { - "version": "0.5.0", - "resolved": "https://registry.npmjs.org/geojson/-/geojson-0.5.0.tgz", - "integrity": "sha512-/Bx5lEn+qRF4TfQ5aLu6NH+UKtvIv7Lhc487y/c8BdludrCTpiWf9wyI0RTyqg49MFefIAvFDuEi5Dfd/zgNxQ==", - "engines": { - "node": ">= 0.10" - } - }, "node_modules/geojson-rbush": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/geojson-rbush/-/geojson-rbush-3.2.0.tgz", @@ -6280,6 +6273,21 @@ "regexp-tree": "bin/regexp-tree" } }, + "node_modules/reproject": { + "version": "1.2.7", + "resolved": "https://registry.npmjs.org/reproject/-/reproject-1.2.7.tgz", + "integrity": "sha512-x3wGpoHBsXDa1iyZZA3Nn52iTgzfLLPBLvsj1j4v3Kwcy3if5IRgi/s/Ayy/qAIm6BsjCwnuojShpfYG+FBAjA==", + "dependencies": { + "concat-stream": "^2.0.0", + "event-stream": "^4.0.0", + "geojson-stream": "0.1.0", + "minimist": "^1.2.5", + "proj4": "^2.6.2" + }, + "bin": { + "reproject": "cli.js" + } + }, "node_modules/require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", @@ -7188,6 +7196,14 @@ "resolved": "https://registry.npmjs.org/wkt-parser/-/wkt-parser-1.3.3.tgz", "integrity": "sha512-ZnV3yH8/k58ZPACOXeiHaMuXIiaTk1t0hSUVisbO0t4RjA5wPpUytcxeyiN2h+LZRrmuHIh/1UlrR9e7DHDvTw==" }, + "node_modules/wkx": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/wkx/-/wkx-0.5.0.tgz", + "integrity": "sha512-Xng/d4Ichh8uN4l0FToV/258EjMGU9MGcA0HV2d9B/ZpZB3lqQm7nkOdZdm5GhKtLLhAE7PiVQwN4eN+2YJJUg==", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/wrap-ansi": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", diff --git a/package.json b/package.json index 87415bd..5ae5e67 100644 --- a/package.json +++ b/package.json @@ -69,14 +69,15 @@ "dependencies": { "@ngageoint/geopackage": "^4.2.4", "better-sqlite3": "^8.7.0", - "geojson": "^0.5.0", "json-stable-stringify": "^1.0.2", "n3": "^1.17.1", "node-fetch": "^3.3.2", "proj4": "^2.9.0", "rdf-data-factory": "^1.1.2", "rdf-literal": "^1.3.1", + "reproject": "^1.2.7", "supports-color": "^9.4.0", + "wkx": "^0.5.0", "yargs": "^17.7.2" }, "jest": { diff --git a/src/cli-error.ts b/src/cli-error.ts index 725f1d5..93f2a0a 100644 --- a/src/cli-error.ts +++ b/src/cli-error.ts @@ -26,3 +26,18 @@ export function Warn(message: string, ...optionalParams: any[]): void { ); else console.warn(`# Warning: ${message}`, ...optionalParams); } + +let WARNINGS: Record<string, number> = {}; + +/** Collect warnings and output with call counts with OutputWarnCounts() */ +export function CountWarn(message: string): void { + const value = WARNINGS[message]; + WARNINGS[message] = value === undefined ? 1 : value + 1; +} + +/** Output collected warnings (CountWarn) with call counts */ +export function OutputWarnCounts(): void { + for (const [message, count] of Object.entries(WARNINGS)) + Warn(`${message}: ${count}`); + WARNINGS = {}; +} diff --git a/src/cli.ts b/src/cli.ts index 5b52f89..acb6505 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -20,11 +20,11 @@ import { Bye } from "./cli-error.js"; import { GeoPackageParser } from "./geopackage.js"; import { EXTENSION_MIMETYPES, - MimetypeValues, mimetypeForExtension, supportsGraphs, + type MimetypeValues, } from "./mimetypes.js"; -import { ModelRegistry } from "./models/models.js"; +import { ModuleRegistry, Registry } from "./models/models-registry.js"; import { FX, GEO, RDFNS, XSD, XYZ } from "./prefixes.js"; import { MergeGraphsStream } from "./rdf-stream-override.js"; @@ -41,6 +41,10 @@ async function cli() { }) .option("output", { alias: "o", type: "string", desc: "Output quads file" }) .normalize(["input", "output"]) + .option("format", { + desc: "Override output format (default: nquads)", + }) + .choices("format", Object.keys(EXTENSION_MIMETYPES)) .option("bounding-box", { type: "string", desc: "Limit features to bounding box", @@ -54,19 +58,15 @@ async function cli() { array: true, desc: "Only output named feature layers and attribute tables", }) - .option("base-iri", { type: "string", desc: "Base IRI" }) - .option("format", { - desc: "Override output format (default: nquads)", - }) .option("include-binary-values", { type: "boolean", desc: "Output binary values", }) - .choices("format", Object.keys(EXTENSION_MIMETYPES)) + .option("base-iri", { type: "string", desc: "Base IRI" }) .option("model", { desc: "Data meta model", }) - .choices("model", ModelRegistry.knownModels()) + .choices("model", ModuleRegistry.knownModels(Registry.Generic)) .strict(); const argv = await options.parse(); @@ -106,8 +106,11 @@ async function cli() { : mimetypeForExtension("nq"); // If no valid extension, fallback to nquads. const inTriples = !supportsGraphs(mimetype); - const wantsGzip: boolean = argv.output?.endsWith(".gz"); - const model: string = argv.model ?? ModelRegistry.knownModels()[0]; + const wantsGzip: boolean = + argv.output?.endsWith(".gz") ?? argv.format?.endsWith(".gz") ?? false; + const model: string = + argv.model ?? ModuleRegistry.knownModels(Registry.Generic)[0]; + const DF = new DataFactory(); const parser = new GeoPackageParser(input, { model, @@ -115,7 +118,10 @@ async function cli() { allowedLayers: argv.onlyLayers, baseIRI, includeBinaryValues: Boolean(argv.includeBinaryValues), + geoSPARQLModels: ModuleRegistry.knownModels(Registry.Geometry), + factory: DF, }); + const writer = new StreamWriter({ format: mimetype, prefixes: { @@ -130,7 +136,6 @@ async function cli() { // `Error parsing geometry`: the GeoPackage may output errors to console.log. // This line disables console.log by hackily overriding it. console.log = function () {}; - const DF = new DataFactory(); try { pipeline( diff --git a/src/geopackage.ts b/src/geopackage.ts index ef2819c..63d5dad 100644 --- a/src/geopackage.ts +++ b/src/geopackage.ts @@ -1,40 +1,34 @@ -import { BoundingBox, GeoPackage, GeoPackageAPI } from "@ngageoint/geopackage"; +import { GeoPackage, GeoPackageAPI } from "@ngageoint/geopackage"; import type * as RDF from "@rdfjs/types"; import { Readable } from "node:stream"; import { DataFactory } from "rdf-data-factory"; -import { quadsFromGeoPackage } from "./models/facade-x/rdf-geopackage.js"; -import { ModelRegistry, QuadsGeneratorFunc } from "./models/models.js"; +import type { CLIContext, RDFContext, RDFOptions } from "./interfaces.js"; +import { FacadeXWithGeoSparql } from "./models/facade-x/facade-x.js"; +import { GeoJSONSerializer } from "./models/geosparql/geojson.js"; +import { WktSerialization } from "./models/geosparql/wkt.js"; +import { + ModuleRegistry, + Registry, + type QuadsGen, +} from "./models/models-registry.js"; // Register known quad generating modules here. // I don't know how to make this a true plugin (but that's not really necessary either) // The order of models is important: the first model is the default. -const WellKnownModels = { "facade-x": quadsFromGeoPackage }; -for (const [modelName, func] of Object.entries(WellKnownModels)) - ModelRegistry.add(modelName, func); +for (const model of [new WktSerialization(), new GeoJSONSerializer()]) + ModuleRegistry.add(Registry.Geometry, model.id, model); -export interface GeoPackageOptions { - /** Pass a data factory or rdf-data-factory is used */ - dataFactory?: RDF.DataFactory; - /** The URL base for local URLs in this GeoPackage */ - baseIRI?: string; - /** Limit the processed feature layers and attribute tables */ - allowedLayers?: string[]; - /** Only process features within this EPSG:4326 bounding box. By default, all - * features are processed. */ - boundingBox?: BoundingBox; - /** Generate quads where the object/value is a binary (Base-64 encoded). */ - includeBinaryValues?: boolean; - /** Data meta model by which triples are generated */ - model: string; -} +for (const model of [new FacadeXWithGeoSparql()]) + ModuleRegistry.add(Registry.Generic, model.id, model); +/** Helper class to parse */ export class GeoPackageParser extends Readable implements RDF.Stream { - options: GeoPackageOptions; + options: CLIContext & RDFContext & RDFOptions; filepathOrBuffer: string | Buffer | Uint8Array; iterQuad: Generator<RDF.Quad>; gpkg: GeoPackage; shouldRead: boolean; - generator: QuadsGeneratorFunc; + generator: QuadsGen; /** * Read a GeoPackage and output a stream of RDF.Quads @@ -43,13 +37,16 @@ export class GeoPackageParser extends Readable implements RDF.Stream { */ constructor( filepathOrBuffer: string | Buffer | Uint8Array, - options: GeoPackageOptions, + options: CLIContext & RDFContext & RDFOptions, ) { super({ objectMode: true }); this.filepathOrBuffer = filepathOrBuffer; - this.options = { dataFactory: new DataFactory(), ...options }; - this.generator = ModelRegistry.get(this.options.model); + this.options = { + ...options, + factory: options.factory ?? new DataFactory(), + }; + this.generator = ModuleRegistry.get(Registry.Generic, this.options.model); this.shouldRead = false; } @@ -57,7 +54,7 @@ export class GeoPackageParser extends Readable implements RDF.Stream { GeoPackageAPI.open(this.filepathOrBuffer) .then((gpkg) => { this.gpkg = gpkg; - this.iterQuad = this.generator(this.gpkg, this.options); + this.iterQuad = this.generator.getQuads(this.gpkg, this.options); callback(); }) .catch(callback); diff --git a/src/interfaces.ts b/src/interfaces.ts new file mode 100644 index 0000000..0ce3b1b --- /dev/null +++ b/src/interfaces.ts @@ -0,0 +1,46 @@ +import type { + BoundingBox, + SpatialReferenceSystem, +} from "@ngageoint/geopackage"; +import type * as RDF from "@rdfjs/types"; + +/** Info on a table */ +export interface TableContext { + /** Name of the originating table */ + tableName: string; + /** Columns that are unique ID columns */ + tableIDColumns?: string[]; +} + +/** Info on a feature table */ +export interface FeatureTableContext extends TableContext { + /** Spatial Reference System of this table */ + srs: SpatialReferenceSystem; +} + +/** Info on the CLI parameters */ +export interface CLIContext { + /** Limit the processed feature layers and attribute tables */ + allowedLayers?: string[]; + /** Only process features within this EPSG:4326 bounding box. By default, all + * features are processed. */ + boundingBox?: BoundingBox; +} + +/** Context info for the generation of RDF */ +export interface RDFContext { + /** The base IRI for generated RDF */ + baseIRI: string; + /** An optional RDF/JS data factory */ + factory: RDF.DataFactory; +} + +/** Info on how to generate RDF */ +export interface RDFOptions { + /** Generate quads where the object/value is a (base64 encoded) binary */ + includeBinaryValues: boolean; + /** Data meta model identifier by which triples are generated */ + model: string; + /** Limit the generated GeoSPARQL serializations */ + geoSPARQLModels: string[]; +} diff --git a/src/models/facade-x/facade-x.ts b/src/models/facade-x/facade-x.ts new file mode 100644 index 0000000..cd4fbd2 --- /dev/null +++ b/src/models/facade-x/facade-x.ts @@ -0,0 +1,183 @@ +import type { GeoPackage, GeometryData } from "@ngageoint/geopackage"; +import type { DBValue } from "@ngageoint/geopackage/dist/lib/db/dbAdapter.js"; +import type { FeatureRow } from "@ngageoint/geopackage/dist/lib/features/user/featureRow.js"; +import type * as RDF from "@rdfjs/types"; +import { WGS84_CODE } from "../../bounding-box.js"; +import { CountWarn, OutputWarnCounts } from "../../cli-error.js"; +import type { + CLIContext, + FeatureTableContext, + RDFContext, + RDFOptions, + TableContext, +} from "../../interfaces.js"; +import { FX, GEO, RDFNS, XYZ } from "../../prefixes.js"; +import { enumerate } from "../../py-enumerate.js"; +import { ModuleRegistry, Registry, type QuadsGen } from "../models-registry.js"; +import { valueToTerm } from "../utils.js"; +import { queryAllFeatures } from "./featuredao-helper.js"; + +export class FacadeXWithGeoSparql implements QuadsGen { + DF: RDF.DataFactory<RDF.Quad>; + + get id() { + return "facade-x"; + } + + /** + * Iterate through all attribute tables and feature tables of a GeoPackage. + * Filter tables (allowedLayers) and features (boundingBox) and passes + * iterators on to specialized quad generating methods. + */ + *getQuads(geopackage: GeoPackage, ctx: CLIContext & RDFContext & RDFOptions) { + const { allowedLayers, baseIRI, boundingBox } = ctx; + this.DF = ctx.factory; + + for (const tableName of geopackage.getAttributesTables()) { + if (allowedLayers && !allowedLayers.includes(tableName)) continue; + + const dao = geopackage.getAttributeDao(tableName); + const iter = dao.queryForEach(); + + yield* this.quadsForAttributeTable(iter, { + ...ctx, + tableIDColumns: dao.idColumns, + tableName, + baseIRI, + includeBinaryValues: ctx.includeBinaryValues, + }); + } + + for (const tableName of geopackage.getFeatureTables()) { + if (allowedLayers && !allowedLayers.includes(tableName)) continue; + + const dao = geopackage.getFeatureDao(tableName); + + // The bounding box is optional, but useful for large GeoPackages + const it = boundingBox + ? dao.fastQueryBoundingBox(boundingBox, WGS84_CODE) + : queryAllFeatures(dao); + + yield* this.quadsForFeatureTable(it, { + ...ctx, + tableName, + baseIRI, + includeBinaryValues: ctx.includeBinaryValues, + srs: dao.srs, // table SRS + }); + } + + OutputWarnCounts(); + } + + /** A Facade-X node for each row is a blank node */ + getNodeForRow() { + return this.DF.blankNode(); + } + + /** A Facade-X node for the table is based off its baseIRI or else `xyz:` */ + getNodeForTable(tableName: string, base?: string): RDF.NamedNode { + const baseURL = base ?? XYZ("").value; + const tableURL = new URL(encodeURIComponent(tableName), baseURL); + return this.DF.namedNode(tableURL.href); + } + + /** Generate quads that represent the table */ + *quadsForTable(tableAndGraph: RDF.Quad_Subject & RDF.Quad_Graph) { + yield this.DF.quad(tableAndGraph, RDFNS("type"), FX("root"), tableAndGraph); + } + + /** Generate quads that represent table rows */ + *quadsForRowOfTable( + row: RDF.Quad_Subject, + tableAndGraph: RDF.Quad_Subject & RDF.Quad_Graph, + i: number, + ) { + yield this.DF.quad(tableAndGraph, RDFNS(`_${i}`), row, tableAndGraph); + } + + /** Iterate properties and generate Facade-X quads */ + *quadsForAttributes( + entry: Record<string, any>, + subject: RDF.Quad_Subject, + graph: RDF.Quad_Graph, + options: RDFOptions, + ) { + for (const [k, v] of Object.entries(entry)) { + const value = valueToTerm(v, options.includeBinaryValues, this.DF); + if (value) yield this.DF.quad(subject, XYZ(encodeURI(k)), value, graph); + } + } + + /** + * Check if there's a geometry and then get the geoSPARQLModels to generate + * the quads with feature geometries. + */ + *quadsForGeometry( + data: GeometryData, + feature: RDF.Quad_Subject, + graph: RDF.Quad_Graph, + options: FeatureTableContext & RDFOptions, + ) { + const geometry = data.geometry; + + // The underlying libraries (as of writing) do not support all + // types of geometries. {geoJSONData} and {origData.geometry} + // can therefore be empty. + // Still, the feature is a geo:Feature and should be output as such + yield this.DF.quad(feature, RDFNS("type"), GEO("Feature"), graph); + if (geometry === undefined || data.geometryError) + return CountWarn( + `Table "${options.tableName}": "${data.geometryError}"; skipped`, + ); + + const geom = this.DF.blankNode(); + for (const modelName of options.geoSPARQLModels) { + const geomCls = ModuleRegistry.get(Registry.Geometry, modelName); + yield* geomCls.getQuads( + geometry, + feature, + geomCls.requiresSeparateGeomSubject?.(options) + ? this.DF.blankNode() + : geom, + graph, + options, + this.DF, + ); + } + } + + /** Generate RDF quads from a GeoPackage attribute table */ + *quadsForAttributeTable( + iterator: IterableIterator<Record<string, DBValue>>, + options: TableContext & RDFOptions & RDFContext, + ) { + const graph = this.getNodeForTable(options.tableName, options.baseIRI); + + yield* this.quadsForTable(graph); + for (const [i, row] of enumerate(iterator, 1)) { + const subject = this.getNodeForRow(); + + yield* this.quadsForRowOfTable(subject, graph, i); + yield* this.quadsForAttributes(row, subject, graph, options); + } + } + + /** Quads that represent the contents of a feature table */ + *quadsForFeatureTable( + iterator: IterableIterator<FeatureRow>, + options: TableContext & FeatureTableContext & RDFOptions & RDFContext, + ) { + const graph = this.getNodeForTable(options.tableName, options.baseIRI); + + yield* this.quadsForTable(graph); + for (const [i, feature] of enumerate(iterator, 1)) { + const subject = this.getNodeForRow(); + + yield* this.quadsForRowOfTable(subject, graph, i); + if (feature.values) + yield* this.quadsForAttributes(feature.values, subject, graph, options); + yield* this.quadsForGeometry(feature.geometry, subject, graph, options); + } + } +} diff --git a/src/models/facade-x/rdf-attribute-table.ts b/src/models/facade-x/rdf-attribute-table.ts deleted file mode 100644 index 6b5da8c..0000000 --- a/src/models/facade-x/rdf-attribute-table.ts +++ /dev/null @@ -1,27 +0,0 @@ -import type { DBValue } from "@ngageoint/geopackage/dist/lib/db/dbAdapter.js"; -import { enumerate } from "../../py-enumerate.js"; -import { - QuadsFromTableOptions, - getRowNode, - getTableNode, - quadsForAttributes, - quadsForTableAndRow, -} from "./rdf-table-common.js"; - -/** Generate RDF quads from a GeoPackage attribute table */ -export function* quadsFromAttributeTable( - iterator: IterableIterator<Record<string, DBValue>>, - options: QuadsFromTableOptions, -) { - const graph = getTableNode(options.tableName); - - for (const [i, row] of enumerate(iterator, 1)) { - const subject = getRowNode( - `${options.tableName}_${row[options.tableIDColumns[0]] ?? i}`, - options.baseIRI, - ); - - yield* quadsForTableAndRow(graph, subject, i); - yield* quadsForAttributes(row, subject, graph, options); - } -} diff --git a/src/models/facade-x/rdf-feature-table.ts b/src/models/facade-x/rdf-feature-table.ts deleted file mode 100644 index ba8ef2a..0000000 --- a/src/models/facade-x/rdf-feature-table.ts +++ /dev/null @@ -1,108 +0,0 @@ -import { - GeoPackage, - GeometryData, - SpatialReferenceSystem, -} from "@ngageoint/geopackage"; -import type { FeatureRow } from "@ngageoint/geopackage/dist/lib/features/user/featureRow.js"; -import type * as RDF from "@rdfjs/types"; -import type { Feature } from "geojson"; -import stringify from "json-stable-stringify"; -import { DataFactory } from "rdf-data-factory"; -import { Warn } from "../../cli-error.js"; -import { GEO, RDFNS } from "../../prefixes.js"; -import { enumerate } from "../../py-enumerate.js"; -import { - QuadsFromTableOptions, - getRowNode, - getTableNode, - quadsForAttributes, - quadsForTableAndRow, -} from "./rdf-table-common.js"; - -const DF = new DataFactory(); - -function srsOpengisUrl(srs: SpatialReferenceSystem) { - const { organization, organization_coordsys_id: id } = srs; - - return `http://www.opengis.net/def/crs/${organization.toUpperCase()}/0/${id}`; -} - -/** Generate GeoSPARQL quads from a feature's geometry */ -export function* quadsForGeometry( - origData: GeometryData, - geoJSONData: Feature | undefined, - subject: RDF.Quad_Subject, - graph: RDF.Quad_Graph, - options: QuadsFromTableOptions, -) { - // The underlying libraries (as of writing) do not support all - // types of geometries. {geoJSONData} and {origData.geometry} - // can therefore be empty. - - yield DF.quad(subject, RDFNS("type"), GEO("Feature"), graph); - const geometry = origData.geometry; - - if ( - geometry === undefined || - origData.geometryError || - geoJSONData === undefined - ) - return Warn( - `Feature geometry type not supported in ${options.tableName} (_:${subject.value}) (skipped)`, - ); - - const geo = DF.blankNode(); - yield DF.quad(subject, GEO("hasDefaultGeometry"), geo, graph); - yield DF.quad(geo, RDFNS("type"), GEO("Geometry"), graph); - - const { srs } = options; - const wktLiteral = `<${srsOpengisUrl(srs)}> ${geometry.toWkt()}`; - - yield DF.quad( - geo, - GEO("asWKT"), - DF.literal(wktLiteral, GEO("wktLiteral")), - graph, - ); - - // Q: Is this the only identifier of WGS84 herein? - const isWGS84 = - `${srs.organization}:${srs.organization_coordsys_id}` == "EPSG:4326"; - - // See issue https://github.com/redmer/rdf-geopackage/issues/19 - const wgs84Geom = isWGS84 ? geo : DF.blankNode(); - yield DF.quad(subject, GEO("hasDefaultGeometry"), wgs84Geom, graph); - yield DF.quad(wgs84Geom, RDFNS("type"), GEO("Geometry"), graph); - - yield DF.quad( - wgs84Geom, - GEO("asGeoJSON"), - DF.literal(stringify(geoJSONData.geometry), GEO("geoJSONLiteral")), - graph, - ); -} - -/** Generate RDF quads from a GeoPackage feature table */ -export function* quadsFromFeatureTable( - iterator: IterableIterator<FeatureRow>, - options: QuadsFromTableOptions, -) { - const graph = getTableNode(options.tableName); - - for (const [i, feature] of enumerate(iterator, 1)) { - const subject = getRowNode( - `${options.tableName}_${feature.id ?? i}`, - options.baseIRI, - ); - - yield* quadsForTableAndRow(graph, subject, i); - yield* quadsForAttributes(feature.values, subject, graph, options); - yield* quadsForGeometry( - feature.geometry, - GeoPackage.parseFeatureRowIntoGeoJSON(feature, options.srs), - subject, - graph, - options, - ); - } -} diff --git a/src/models/facade-x/rdf-geopackage.ts b/src/models/facade-x/rdf-geopackage.ts deleted file mode 100644 index 0796036..0000000 --- a/src/models/facade-x/rdf-geopackage.ts +++ /dev/null @@ -1,56 +0,0 @@ -import { GeoPackage } from "@ngageoint/geopackage"; -import type * as RDF from "@rdfjs/types"; -import { WGS84_CODE } from "../../bounding-box.js"; -import { GeoPackageOptions } from "../../geopackage.js"; -import { queryAllFeatures } from "./featuredao-helper.js"; -import { quadsFromAttributeTable } from "./rdf-attribute-table.js"; -import { quadsFromFeatureTable } from "./rdf-feature-table.js"; - -/** - * Generate RDF quads from the GeoPackage. The quads are in a Facade-X-like model for their - * generic attributes; feature geometries are modelled with GeoSPARQL. - * - * @param filepath Path to the GeoPackage - * @param boundingBox Only process features that are within this EPSG:4326 bounding box. - * If not provided, all features are processed. - * @param allowedLayers If provided, only these layers are processed - */ -export function* quadsFromGeoPackage( - geopackage: GeoPackage, - options: GeoPackageOptions, -): Generator<RDF.Quad> { - const { boundingBox, baseIRI, allowedLayers } = options; - - for (const tableName of geopackage.getAttributesTables()) { - if (allowedLayers && !allowedLayers.includes(tableName)) continue; - - // The Data Access Object can query iteratively and provide metadata - const dao = geopackage.getAttributeDao(tableName); - // TODO: I can't seem to find table definitions. ColumnDao.Mimetype are empty... - const iter = dao.queryForEach(); - - yield* quadsFromAttributeTable(iter, { - tableIDColumns: dao.idColumns, - tableName, - baseIRI, - includeBinaryValues: options.includeBinaryValues, - }); - } - - for (const tableName of geopackage.getFeatureTables()) { - if (allowedLayers && !allowedLayers.includes(tableName)) continue; - // The bounding box is optional, but useful for large GeoPackages - const dao = geopackage.getFeatureDao(tableName); - - const it = boundingBox - ? dao.fastQueryBoundingBox(boundingBox, WGS84_CODE) - : queryAllFeatures(dao); - - yield* quadsFromFeatureTable(it, { - tableName, - baseIRI, - includeBinaryValues: options.includeBinaryValues, - srs: dao.srs, // table SRS - }); - } -} diff --git a/src/models/facade-x/rdf-table-common.ts b/src/models/facade-x/rdf-table-common.ts deleted file mode 100644 index df921ef..0000000 --- a/src/models/facade-x/rdf-table-common.ts +++ /dev/null @@ -1,70 +0,0 @@ -import type { SpatialReferenceSystem } from "@ngageoint/geopackage"; -import type { DBValue } from "@ngageoint/geopackage/dist/lib/db/dbAdapter.js"; -import type * as RDF from "@rdfjs/types"; -import { DataFactory } from "rdf-data-factory"; -import { toRdf } from "rdf-literal"; -import { FX, RDFNS, XSD, XYZ } from "../../prefixes.js"; - -export interface QuadsFromTableOptions { - /** Name of the originating table */ - tableName: string; - /** Columns that are unique ID columns */ - tableIDColumns?: string[]; - baseIRI: string; - /** See {@link GeoPackageOptions} */ - includeBinaryValues?: boolean; - /** Spatial Reference System of this table */ - srs?: SpatialReferenceSystem; -} - -const DF = new DataFactory(); - -/** Generate an RDF Literal from a value */ -export function valueToTerm( - value: DBValue, - includeBinaryValue: boolean, -): RDF.Quad_Object { - if (value == null) return undefined; - - if (value instanceof Buffer) - if (includeBinaryValue) - return DF.literal(value.toString("base64"), XSD("base64Binary")); - else return undefined; - - return toRdf(value); -} - -/** Generate the RDF NamedNode for the attribute or feature table */ -export function getTableNode(tableName: string, base?: string): RDF.NamedNode { - const baseURL = base ?? XYZ("").value; - const tableURL = new URL(encodeURIComponent(tableName), baseURL); - return DF.namedNode(tableURL.href); -} - -/** Generate the RDF Node for the row / feature */ -export function getRowNode(rowIdValue: string, base?: string) { - return DF.blankNode(); -} - -/** Generate Facade-X quads that represent the table its rows */ -export function* quadsForTableAndRow( - tableAndGraph: RDF.NamedNode, - row: RDF.NamedNode | RDF.BlankNode, - i: number, -) { - yield DF.quad(tableAndGraph, RDFNS("type"), FX("root"), tableAndGraph); - yield DF.quad(tableAndGraph, RDFNS(`_${i}`), row, tableAndGraph); -} - -/** Iterate properties and generate Facade-X quads */ -export function* quadsForAttributes( - entry: Record<string, any>, - subject: RDF.Quad_Subject, - graph: RDF.Quad_Graph, - options: QuadsFromTableOptions, -) { - for (const [k, v] of Object.entries(entry)) { - const value = valueToTerm(v, options.includeBinaryValues); - if (value) yield DF.quad(subject, XYZ(encodeURI(k)), value, graph); - } -} diff --git a/src/models/geosparql/geojson.ts b/src/models/geosparql/geojson.ts new file mode 100644 index 0000000..e3c383b --- /dev/null +++ b/src/models/geosparql/geojson.ts @@ -0,0 +1,57 @@ +import type * as RDF from "@rdfjs/types"; +import stringify from "json-stable-stringify"; +import reproject from "reproject"; +import type * as wkx from "wkx"; +import type { FeatureTableContext } from "../../interfaces.js"; +import { GEO, RDFNS } from "../../prefixes.js"; +import type { GeomQuadsGen } from "../models-registry.js"; + +export class GeoJSONSerializer implements GeomQuadsGen { + get id() { + return "geojson"; + } + + requiresSeparateGeomSubject(ctx: FeatureTableContext) { + return !this.isInEPSG4326(ctx); + } + + /** EPSG:4236 is the only allowed serialization of GeoJSON */ + isInEPSG4326(ctx: FeatureTableContext): boolean { + return ( + ctx.srs.organization.toLowerCase() == "epsg" && + ctx.srs.organization_coordsys_id == 4326 + ); + } + + *getQuads( + data: wkx.Geometry, + feature: RDF.Quad_Subject, + geom: RDF.Quad_Subject, + graph: RDF.Quad_Graph, + ctx: FeatureTableContext, + factory: RDF.DataFactory, + ) { + const { srs } = ctx; + const { literal, quad } = factory; + + yield quad(feature, GEO("hasDefaultGeometry"), geom, graph); + yield quad(geom, RDFNS("type"), GEO("Geometry"), graph); + + const payload = this.isInEPSG4326(ctx) + ? data.toGeoJSON() + : reproject.reproject( + data.toGeoJSON(), + // Ref: The line after <http://www.geopackage.org/spec121/#r117> + srs.definition_12_063 ?? srs.definition, + // GeoJSON is always in EPSG:4326 + "EPSG:4326", + ); + + yield quad( + geom, + GEO("asGeoJSON"), + literal(stringify(payload), GEO("geoJSONLiteral")), + graph, + ); + } +} diff --git a/src/models/geosparql/wkt.ts b/src/models/geosparql/wkt.ts new file mode 100644 index 0000000..0bdc049 --- /dev/null +++ b/src/models/geosparql/wkt.ts @@ -0,0 +1,43 @@ +import type { SpatialReferenceSystem } from "@ngageoint/geopackage"; +import type * as RDF from "@rdfjs/types"; +import type * as wkx from "wkx"; +import type { FeatureTableContext } from "../../interfaces.js"; +import { GEO, RDFNS } from "../../prefixes.js"; +import type { GeomQuadsGen } from "../models-registry.js"; + +export class WktSerialization implements GeomQuadsGen { + get id() { + return "wkt"; + } + + /** Calculate wktLiteral CRS prefix */ + srsOpengisUrl(srs: SpatialReferenceSystem) { + const { organization, organization_coordsys_id: id } = srs; + + // TODO: Determine if this is always valid. Issue GH-23 + return `http://www.opengis.net/def/crs/${organization.toUpperCase()}/0/${id}`; + } + + *getQuads( + data: wkx.Geometry, + feature: RDF.Quad_Subject, + geom: RDF.Quad_Subject, + graph: RDF.Quad_Graph, + ctx: FeatureTableContext, + factory: RDF.DataFactory, + ) { + const { literal, quad } = factory; + yield quad(feature, GEO("hasDefaultGeometry"), geom, graph); + yield quad(geom, RDFNS("type"), GEO("Geometry"), graph); + + const { srs } = ctx; + const wktLiteral = `<${this.srsOpengisUrl(srs)}> ${data.toWkt()}`; + + yield quad( + geom, + GEO("asWKT"), + literal(wktLiteral, GEO("wktLiteral")), + graph, + ); + } +} diff --git a/src/models/models-registry.ts b/src/models/models-registry.ts new file mode 100644 index 0000000..8059857 --- /dev/null +++ b/src/models/models-registry.ts @@ -0,0 +1,97 @@ +import type { GeoPackage } from "@ngageoint/geopackage"; +import type * as RDF from "@rdfjs/types"; +import type * as wkx from "wkx"; +import type { + CLIContext, + FeatureTableContext, + RDFContext, + RDFOptions, +} from "../interfaces.js"; + +/** Generic quads generator for anything but geometry literals */ +export interface QuadsGen { + id: string; + getQuads( + from: GeoPackage, + ctx: CLIContext & RDFContext & RDFOptions, + ): Generator<RDF.Quad>; +} + +/** Quads generator for geometry literals */ +export interface GeomQuadsGen { + id: string; + + /** + * The type signature of a quads generation function for different RDF + * geometry serializations. + * + * @param data The WKX Geometry + * @param feature The RDF term of the feature + * @param geom The RDF term of the geometry + * @param graph The RDF term of the graph of the quads + * @param ctx Any table context + * @param factory An RDF/JS DataFactory + */ + getQuads( + data: wkx.Geometry, + feature: RDF.Quad_Subject, + geom: RDF.Quad_Subject, + graph: RDF.Quad_Graph, + ctx: FeatureTableContext, + factory: RDF.DataFactory, + ): Generator<RDF.Quad>; + + /** Can this geo:Geometry not be combined with other serializations? */ + requiresSeparateGeomSubject?(ctx: FeatureTableContext): boolean; +} + +export enum Registry { + /** The registry for RDF geometry literal serializations */ + Geometry = "geom", + /** The registry for generic RDF attribute data models */ + Generic = "alg", +} + +// type RegistryType = "geom" | "alg"; + +/** Singleton registry of Quad generating models */ +export class ModuleRegistry { + private static MODEL_REGISTRY: { + geom: Record<string, GeomQuadsGen>; + alg: Record<string, QuadsGen>; + } = { geom: {}, alg: {} }; + + /** + * Register a quads generator + * + * @param name Name to register the model by + * @param cls Function that returns a quads generator + * @param type The type of registry. + */ + static add(type: Registry.Generic, name: string, cls: QuadsGen): void; + static add(type: Registry.Geometry, name: string, cls: GeomQuadsGen): void; + static add(type: Registry, name: string, cls: QuadsGen | GeomQuadsGen) { + this.MODEL_REGISTRY[type][name] = cls; + } + + /** + * Get a registered quads generator + * + * @param modelName Name of registered generator + * @param type The type of registry. + */ + static get(type: Registry.Generic, modelName: string): QuadsGen; + static get(type: Registry.Geometry, modelName: string): GeomQuadsGen; + static get(type: Registry, modelName: string): QuadsGen | GeomQuadsGen { + return this.MODEL_REGISTRY[type][modelName]; + } + + /** + * Return a list of known models for the registry type + * + * @param type The type of registry. + * */ + static knownModels(type: Registry): string[] { + return Object.keys(this.MODEL_REGISTRY[type]); + } +} diff --git a/src/models/models.ts b/src/models/models.ts deleted file mode 100644 index a236684..0000000 --- a/src/models/models.ts +++ /dev/null @@ -1,42 +0,0 @@ -import type { GeoPackage } from "@ngageoint/geopackage"; -import type * as RDF from "@rdfjs/types"; -import type { GeoPackageOptions } from "../geopackage.js"; - -/** - * The type signature of a quads generating function - * - * @param geopackage The GeoPackage instance - * @param options Options that may guide the generation of the quads. - */ -export type QuadsGeneratorFunc = ( - geopackage: GeoPackage, - options: GeoPackageOptions, -) => Generator<RDF.Quad>; - -/** Singleton registry of Quad generating models */ -export class ModelRegistry { - private static MODEL_REGISTRY: Record<string, QuadsGeneratorFunc> = {}; - /** - * Register a quads generator - * - * @param modelName Name to register the model by - * @param mainFunc Function that returns a quads generator - */ - static add(modelName: string, mainFunc: QuadsGeneratorFunc) { - this.MODEL_REGISTRY[modelName] = mainFunc; - } - - /** - * Get a registered quads generator - * - * @param modelName Name of registered generator - */ - static get(modelName: string): QuadsGeneratorFunc { - return this.MODEL_REGISTRY[modelName]; - } - - /** Return a list of known models */ - static knownModels(): string[] { - return Object.keys(this.MODEL_REGISTRY); - } -} diff --git a/src/models/utils.ts b/src/models/utils.ts new file mode 100644 index 0000000..13a79d5 --- /dev/null +++ b/src/models/utils.ts @@ -0,0 +1,20 @@ +import type { DBValue } from "@ngageoint/geopackage/dist/lib/db/dbAdapter.js"; +import type * as RDF from "@rdfjs/types"; +import { toRdf } from "rdf-literal"; +import { XSD } from "../prefixes.js"; + +/** Generate an RDF Literal from a value */ +export function valueToTerm( + value: DBValue, + includeBinaryValue: boolean, + factory: RDF.DataFactory, +): RDF.Quad_Object | undefined { + if (value == null) return undefined; + + if (value instanceof Uint8Array) + if (includeBinaryValue) + return factory.literal(value.toString("base64"), XSD("base64Binary")); + else return undefined; + + return toRdf(value); +} diff --git a/src/rdf-stream-override.ts b/src/rdf-stream-override.ts index bf26b5c..d9bc0c4 100644 --- a/src/rdf-stream-override.ts +++ b/src/rdf-stream-override.ts @@ -1,6 +1,6 @@ -import * as RDF from "@rdfjs/types"; +import type * as RDF from "@rdfjs/types"; import { DataFactory } from "rdf-data-factory"; -import { Transform, TransformCallback } from "stream"; +import { Transform, type TransformCallback } from "stream"; export interface OverrideGraphOptions { /** Override the context/graph of the quad into */ From 382afb73cead430b8c09e1e4fd69f775a7e7c7ca Mon Sep 17 00:00:00 2001 From: Redmer Kronemeijer <12477216+redmer@users.noreply.github.com> Date: Fri, 27 Oct 2023 16:29:58 +0200 Subject: [PATCH 4/4] feat: rename bbox parameters fixes: Make `--bounding-box` less ideosyncratic #18 --- README.md | 4 +-- src/bounding-box.ts | 61 +++++++++++++++++++++++++++++++++++++-------- src/cli.ts | 12 ++++----- 3 files changed, 59 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index acfda5f..d5f4a0a 100644 --- a/README.md +++ b/README.md @@ -21,8 +21,8 @@ Options: -o, --output Output quads file [string] --format Override output format (default: nquads) [choices: "nq", "nquads", "trig", "nt", "ntriples", "ttl", "turtle"] - --bounding-box Limit features to bounding box [string] - --bounding-box-crs Coordinate Reference System code [string] + --bbox Limit features to bounding box [string] + --bbox-crs Coordinate Reference System code [string] --only-layers Only output named feature layers and attribute ta bles [array] --include-binary-values Output binary values [boolean] diff --git a/src/bounding-box.ts b/src/bounding-box.ts index 462d7b1..89f500b 100644 --- a/src/bounding-box.ts +++ b/src/bounding-box.ts @@ -42,20 +42,61 @@ export async function getWGS84Converter( } } -export function suppliedBoundingBox( +function spaceSepBbox( + bbstring: string, + srs: proj4.Converter | string, +): BoundingBox { + const [west, east, south, north] = bbstring + .split(" ", 4) + .map((c) => Number(c)); + const bb = new BoundingBox(west, east, south, north); + return bb.projectBoundingBox(srs, WGS84_CODE); +} + +function commaSepBbox( bbstring: string, - inCRS: proj4.Converter | string, -) { + srs: proj4.Converter | string, +): BoundingBox { + const parts = bbstring.split(","); + let west: string, + east: string, + __1: string, + south: string, + north: string, + __2: string; + if (parts.length == 4) [west, east, south, north] = parts; + else [west, east, __1, south, north, __2] = parts; + + const bb = new BoundingBox( + Number(west), + Number(east), + Number(south), + Number(north), + ); + return bb.projectBoundingBox(srs, WGS84_CODE); +} + +/** + * Convert a supplied bbox definition string to a {BoundingBox}. + * + * There are two types of bbox definition strings: + * 1. Four parts, space separated (deprecated) + * 2. Four or six parts, comma separated. (3rd axis ignored) + * + * @param bboxString Bouding box provided string + * @param srs The SRS in which to interpret this bboxstring + */ +export function suppliedBoundingBox( + bboxString: string, + srs: proj4.Converter | string, +): BoundingBox { try { - const [west, east, south, north] = bbstring - .split(" ", 4) - .map((c) => Number(c)); - const bb = new BoundingBox(west, east, south, north); - return bb.projectBoundingBox(inCRS, WGS84_CODE); + if (bboxString.includes(" ")) return spaceSepBbox(bboxString, srs); + return commaSepBbox(bboxString, srs); } catch (e) { Bye( - `Bounding box could not be parsed. Provide as a single space-separated string:`, - `"{min long (west)} {max long (east)} {min lat (south)} {max lat (north)}".`, + `Bounding box could not be parsed. Provide a single comma-separated string:`, + `"{min long (west)},{max long (east)},{min lat (south)},{max lat (north)}".`, ); } } diff --git a/src/cli.ts b/src/cli.ts index acb6505..6b3ec7c 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -45,11 +45,11 @@ async function cli() { desc: "Override output format (default: nquads)", }) .choices("format", Object.keys(EXTENSION_MIMETYPES)) - .option("bounding-box", { + .option("bbox", { type: "string", desc: "Limit features to bounding box", }) - .option("bounding-box-crs", { + .option("bbox-crs", { type: "string", desc: `Coordinate Reference System code`, }) @@ -87,11 +87,11 @@ async function cli() { // If there's a bounding box CRS defined, first check if we can parse it. // This is less expensive than converting quads etc. // TODO: Can we remove this reference to WGS84? - const bboxConverter = argv.boundingBoxCrs - ? await getWGS84Converter(argv.boundingBoxCrs) + const bboxConverter = argv.bboxCrs + ? await getWGS84Converter(argv.bboxCrs) : await getWGS84Converter(WGS84_CODE); - const boundingBox = argv.boundingBox - ? suppliedBoundingBox(argv.boundingBox, bboxConverter) + const boundingBox = argv.bbox + ? suppliedBoundingBox(argv.bbox, bboxConverter) : undefined; // If there's a target file, open a write stream and determine the mimetype off of it.