From 966157cfdddb355740f6a0a8d05dd3abddc05b30 Mon Sep 17 00:00:00 2001 From: Binh Vu Date: Tue, 3 Mar 2020 12:09:22 -0800 Subject: [PATCH] fix drepr inference; minor change in the backend interface --- drepr/Cargo.lock | 2 +- drepr/README.md | 12 +- drepr/engine/Cargo.toml | 2 +- drepr/engine/src/alignments/dfs.rs | 13 +- drepr/engine/src/alignments/inference.rs | 20 +- drepr/engine/src/executors/classes_map/mod.rs | 6 +- drepr/engine/src/lang/alignment/mod.rs | 2 +- .../src/lang/alignment/range_alignment.rs | 4 +- .../src/lang/alignment/value_alignment.rs | 2 +- drepr/engine/src/writers/stream_writer/mod.rs | 3 + .../engine/tests/alignments/inference/mod.rs | 1 + .../inference/resources/s01/assertion.json | 46 ++ .../inference/resources/s01/model.json | 741 ++++++++++++++++++ .../inference/resources/s01/model.meta | 17 + .../inference/resources/s01/model.yml | 85 ++ .../alignments/inference/test_infer_func.rs | 71 ++ drepr/engine/tests/alignments/mod.rs | 3 +- drepr/engine/tests/{resources => }/setup.py | 9 +- drepr/engine/tests/test_executors.rs | 1 + drepr/{ => old_code}/tests/__init__.py | 0 drepr/{ => old_code}/tests/alignments/mod.rs | 0 .../alignments/test_dimensional_alignment.rs | 0 .../tests/alignments/test_value_alignment.rs | 0 drepr/{ => old_code}/tests/conftest.py | 0 drepr/{ => old_code}/tests/helpers.rs | 0 drepr/{ => old_code}/tests/iterators/mod.rs | 0 .../tests/iterators/test_insert_iterator.rs | 0 .../iterators/test_known_size_iterator.rs | 0 .../iterators/test_unknown_size_iterator.rs | 0 .../{ => old_code}/tests/pydrepr/__init__.py | 0 .../tests/pydrepr/test_engine.py | 0 .../{ => old_code}/tests/pydrepr/test_repr.py | 0 .../tests/pydrepr/test_repr_builder.py | 0 drepr/{ => old_code}/tests/readers/mod.rs | 0 drepr/{ => old_code}/tests/runner.rs | 0 pydrepr/drepr/__init__.py | 2 +- pydrepr/drepr/engine.py | 6 +- .../drepr/{graph.py => graph_deprecated.py} | 0 .../outputs/array_backend/array_backend.py | 2 +- pydrepr/drepr/outputs/base_output_sm.py | 4 +- .../outputs/graph_backend/graph_backend.py | 8 +- pydrepr/drepr/version.py | 6 +- .../examples/climate_data/geospatial_nb.ipynb | 23 +- pydrepr/tests/drepr/engine/__init__.py | 1 + pydrepr/tests/drepr/engine/conftest.py | 25 + .../drepr/engine/test_complete_description.py | 27 + pydrepr/tests/drepr/outputs/conftest.py | 6 +- .../{synthesis_s1 => s01_synthesis}/model.yml | 0 .../resource.json | 0 .../{synthesis_s2 => s02_synthesis}/model.yml | 0 .../resource.json | 0 .../{synthesis_s3 => s03_synthesis}/model.yml | 0 .../resource.json | 0 .../resources/s04_shorten_gldas/model.yml | 106 +++ version_manager.py | 6 +- 55 files changed, 1208 insertions(+), 54 deletions(-) create mode 100644 drepr/engine/tests/alignments/inference/mod.rs create mode 100644 drepr/engine/tests/alignments/inference/resources/s01/assertion.json create mode 100644 drepr/engine/tests/alignments/inference/resources/s01/model.json create mode 100644 drepr/engine/tests/alignments/inference/resources/s01/model.meta create mode 100644 drepr/engine/tests/alignments/inference/resources/s01/model.yml create mode 100644 drepr/engine/tests/alignments/inference/test_infer_func.rs rename drepr/engine/tests/{resources => }/setup.py (65%) rename drepr/{ => old_code}/tests/__init__.py (100%) rename drepr/{ => old_code}/tests/alignments/mod.rs (100%) rename drepr/{ => old_code}/tests/alignments/test_dimensional_alignment.rs (100%) rename drepr/{ => old_code}/tests/alignments/test_value_alignment.rs (100%) rename drepr/{ => old_code}/tests/conftest.py (100%) rename drepr/{ => old_code}/tests/helpers.rs (100%) rename drepr/{ => old_code}/tests/iterators/mod.rs (100%) rename drepr/{ => old_code}/tests/iterators/test_insert_iterator.rs (100%) rename drepr/{ => old_code}/tests/iterators/test_known_size_iterator.rs (100%) rename drepr/{ => old_code}/tests/iterators/test_unknown_size_iterator.rs (100%) rename drepr/{ => old_code}/tests/pydrepr/__init__.py (100%) rename drepr/{ => old_code}/tests/pydrepr/test_engine.py (100%) rename drepr/{ => old_code}/tests/pydrepr/test_repr.py (100%) rename drepr/{ => old_code}/tests/pydrepr/test_repr_builder.py (100%) rename drepr/{ => old_code}/tests/readers/mod.rs (100%) rename drepr/{ => old_code}/tests/runner.rs (100%) rename pydrepr/drepr/{graph.py => graph_deprecated.py} (100%) create mode 100644 pydrepr/tests/drepr/engine/__init__.py create mode 100644 pydrepr/tests/drepr/engine/conftest.py create mode 100644 pydrepr/tests/drepr/engine/test_complete_description.py rename pydrepr/tests/resources/{synthesis_s1 => s01_synthesis}/model.yml (100%) rename pydrepr/tests/resources/{synthesis_s1 => s01_synthesis}/resource.json (100%) rename pydrepr/tests/resources/{synthesis_s2 => s02_synthesis}/model.yml (100%) rename pydrepr/tests/resources/{synthesis_s2 => s02_synthesis}/resource.json (100%) rename pydrepr/tests/resources/{synthesis_s3 => s03_synthesis}/model.yml (100%) rename pydrepr/tests/resources/{synthesis_s3 => s03_synthesis}/resource.json (100%) create mode 100644 pydrepr/tests/resources/s04_shorten_gldas/model.yml diff --git a/drepr/Cargo.lock b/drepr/Cargo.lock index 4119519..8159a9d 100644 --- a/drepr/Cargo.lock +++ b/drepr/Cargo.lock @@ -234,7 +234,7 @@ dependencies = [ [[package]] name = "engine" -version = "1.0.7" +version = "1.0.8" dependencies = [ "chrono 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/drepr/README.md b/drepr/README.md index 5e7eeb9..8bbd976 100644 --- a/drepr/README.md +++ b/drepr/README.md @@ -62,12 +62,20 @@ On MacOS, cargo produces dynamic linking for libstd (`@rpath\libstd...`) You can check linking on MacOS using otool -### Awared Issues +### Known Issues 1. It is very slow to build Rust-cpython is very slow to build. You can disable the python feature if you are in the debug mode using `--features "disable-python readers/disable-python"` flag. -However, it only works if you are in the crate folder, cargo does not support passing `features` flag in the workspace folder yet ([see more](https://github.com/rust-lang/cargo/issues/5015)) +However, it only works if you are in the crate folder, cargo does not support passing `features` flag in the workspace folder yet ([see more](https://github.com/rust-lang/cargo/issues/5015)). +For example, you need to be in the engine folder to set the flag. + +Note: if you run test, disable python features will also significantly improve the building time but remember to change the working directory. Here is one example of the command: + +```.env +cargo test --features "disable-python readers/disable-python" --package engine --test main alignments::inference::test_infer_func::smoke_test -- --exact +``` + # Useful commands diff --git a/drepr/engine/Cargo.toml b/drepr/engine/Cargo.toml index a87c569..78df9a0 100644 --- a/drepr/engine/Cargo.toml +++ b/drepr/engine/Cargo.toml @@ -1,7 +1,7 @@ # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [package] name = "engine" -version = "1.0.7" # ___PKG_VERSION___: DO NOT MODIFY the version here. Update it via version_manager.py! +version = "1.0.8" # ___PKG_VERSION___: DO NOT MODIFY the version here. Update it via version_manager.py! authors = ["Binh Vu "] edition = "2018" diff --git a/drepr/engine/src/alignments/dfs.rs b/drepr/engine/src/alignments/dfs.rs index a8a6766..9db750b 100644 --- a/drepr/engine/src/alignments/dfs.rs +++ b/drepr/engine/src/alignments/dfs.rs @@ -1,7 +1,9 @@ use petgraph::visit::{GraphRef, IntoNeighbors, Visitable, VisitMap}; +use fnv::FnvHashSet; +use std::hash::Hash; /// THE FOLLOWING CODE IS COPIED FROM: https://docs.rs/petgraph/0.4.13/src/petgraph/visit/traversal.rs.html#38-43 -/// +/// so that DFS also yield the parent node /// /// Visit nodes of a graph in a depth-first-search (DFS) emitting nodes in /// preorder (when they are first discovered). @@ -42,7 +44,7 @@ pub struct CustomedDfs { } impl CustomedDfs - where N: Copy + PartialEq, + where N: Copy + PartialEq + Eq + Hash, VM: VisitMap, { /// Create a new **Dfs**, using the graph's visitor map, and put **start** @@ -75,18 +77,19 @@ impl CustomedDfs } /// Return the next node in the dfs, or **None** if the traversal is done. - pub fn next(&mut self, graph: G) -> Option<(N, N)> + pub fn next(&mut self, graph: G, revisit: &FnvHashSet) -> Option<(N, N)> where G: IntoNeighbors, { if let Some((parent_node, node)) = self.stack.pop() { for succ in graph.neighbors(node) { - if self.discovered.visit(succ) { + if self.discovered.visit(succ) || revisit.contains(&succ) { self.stack.push((node, succ)); } } - return Some((parent_node, node)); } None } + + } diff --git a/drepr/engine/src/alignments/inference.rs b/drepr/engine/src/alignments/inference.rs index 9f35c14..e27d304 100644 --- a/drepr/engine/src/alignments/inference.rs +++ b/drepr/engine/src/alignments/inference.rs @@ -4,6 +4,7 @@ use petgraph::prelude::*; use crate::lang::{AlignedDim, Alignment, Cardinality, Description, RangeAlignment}; use super::dfs::CustomedDfs; +use fnv::FnvHashSet; pub struct AlignmentInference<'a> { desc: &'a Description, @@ -161,28 +162,33 @@ impl<'a> AlignmentInference<'a> { let mut new_incoming_edges = vec![]; let mut dfs = CustomedDfs::new(&mg, u0); - if dfs.next(&mg).is_some() { + let mut revisit = FnvHashSet::default(); + + if dfs.next(&mg, &revisit).is_some() { // call next first to skip the u0 loop { // recording the length of the current stack // so that we know if we need to stop from exploring further from the next node // we can pop all of its children let stack_len = dfs.stack.len(); - let (u1, u2) = match dfs.next(&mg) { + let (u1, u2) = match dfs.next(&mg, &revisit) { None => break, Some((u1, u2)) => (u1, u2) }; - - if !mg.contains_edge(u0, u2) { + + if mg.contains_edge(u0, u1) && !mg.contains_edge(u0, u2) { // try to infer alignment function between u0 and u2 match self.infer_func(u0, u1, u2) { None => { - // don't haven't find any, hence we have to stop from exploring u2 + // haven't found any, hence we have to stop from exploring u2 // plus 1 because we take into account the u2 node, which was popped for _ in 0..(dfs.stack.len() + 1 - stack_len) { // remove all children of u2 dfs.stack.pop(); } + // mark this u2 as re-visited because it may be discovered from other nodes + // we should not have infinite recursive loop here + revisit.insert(u2); continue; } Some(afuncs) => { @@ -198,7 +204,7 @@ impl<'a> AlignmentInference<'a> { } } } - + n_new_edges += new_incoming_edges.len() + new_outgoing_edges.len(); for ui in new_outgoing_edges { @@ -219,7 +225,7 @@ impl<'a> AlignmentInference<'a> { /// Infer an alignment function of xid and zid given alignments between (xid, yid) and (yid, zid) /// /// If there is only one way to join values of xid and zid, then the chain join will be the correct one - fn infer_func(&self, xid: usize, yid: usize, zid: usize) -> Option> { + pub fn infer_func(&self, xid: usize, yid: usize, zid: usize) -> Option> { let f = &self.aligns[xid][yid]; let g = &self.aligns[yid][zid]; diff --git a/drepr/engine/src/executors/classes_map/mod.rs b/drepr/engine/src/executors/classes_map/mod.rs index 9379072..4dcc788 100644 --- a/drepr/engine/src/executors/classes_map/mod.rs +++ b/drepr/engine/src/executors/classes_map/mod.rs @@ -7,7 +7,9 @@ use crate::executors::classes_map::generic_algo::generic_class_map; use crate::executors::preprocessing::exec_preprocessing; use crate::lang::{Description, Resource}; use crate::writers::stream_writer::{OutputFormat}; -use crate::writers::stream_writer::{GraphJSONWriter, TTLStreamWriter, GraphPyWriter}; +use crate::writers::stream_writer::{GraphJSONWriter, TTLStreamWriter}; +#[cfg(not(feature = "disable-python"))] +use crate::writers::stream_writer::GraphPyWriter; use crate::writers::stream_writer::stream_writer::{StreamWriterResult, WriteResult}; use crate::execution_plans::classes_map_plan::class_map_plan::ClassMapExecStrategy; #[cfg(feature = "enable-exec-macro-cls-map")] @@ -85,6 +87,7 @@ pub fn classes_map(resource_files: &[PhysicalResource], desc: &Description, plan &format!("{}.edge", fpath), &desc.semantic_model)) } + #[cfg(not(feature = "disable-python"))] PhysicalOutput::File { fpath: _, format: OutputFormat::GraphPy } => { unimplemented!() } @@ -94,6 +97,7 @@ pub fn classes_map(resource_files: &[PhysicalResource], desc: &Description, plan PhysicalOutput::Memory { format: OutputFormat::GraphJSON } => { Box::new(GraphJSONWriter::write2str(&desc.semantic_model)) } + #[cfg(not(feature = "disable-python"))] PhysicalOutput::Memory { format: OutputFormat::GraphPy } => { Box::new(GraphPyWriter::write2mem(&desc.semantic_model)) } diff --git a/drepr/engine/src/lang/alignment/mod.rs b/drepr/engine/src/lang/alignment/mod.rs index d00a788..f893655 100644 --- a/drepr/engine/src/lang/alignment/mod.rs +++ b/drepr/engine/src/lang/alignment/mod.rs @@ -8,7 +8,7 @@ use readers::{is_enum_type_impl, as_enum_type_impl, into_enum_type_impl}; pub mod range_alignment; pub mod value_alignment; -#[derive(Deserialize, Debug, Clone, Serialize)] +#[derive(Deserialize, Debug, Clone, PartialEq, Eq, Serialize)] #[serde(tag = "type")] pub enum Alignment { #[serde(rename = "range")] diff --git a/drepr/engine/src/lang/alignment/range_alignment.rs b/drepr/engine/src/lang/alignment/range_alignment.rs index cabce7c..b5d7027 100644 --- a/drepr/engine/src/lang/alignment/range_alignment.rs +++ b/drepr/engine/src/lang/alignment/range_alignment.rs @@ -4,7 +4,7 @@ use crate::lang::alignment::Cardinality; use hashbrown::HashSet; use std::iter::FromIterator; -#[derive(Debug, Clone, Deserialize, PartialEq, Serialize)] +#[derive(Debug, Clone, Deserialize, PartialEq, Eq, Serialize)] pub struct AlignedDim { #[serde(rename = "source")] pub source_dim: usize, @@ -12,7 +12,7 @@ pub struct AlignedDim { pub target_dim: usize, } -#[derive(Deserialize, Debug, Clone, Serialize)] +#[derive(Deserialize, Debug, Clone, PartialEq, Eq, Serialize)] pub struct RangeAlignment { pub source: usize, pub target: usize, diff --git a/drepr/engine/src/lang/alignment/value_alignment.rs b/drepr/engine/src/lang/alignment/value_alignment.rs index 8bc61d1..0ba3c15 100644 --- a/drepr/engine/src/lang/alignment/value_alignment.rs +++ b/drepr/engine/src/lang/alignment/value_alignment.rs @@ -2,7 +2,7 @@ use serde::{Deserialize, Serialize}; use crate::lang::alignment::Cardinality; use crate::lang::description::Description; -#[derive(Deserialize, Serialize, Debug, Clone)] +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)] pub struct ValueAlignment { pub source: usize, pub target: usize, diff --git a/drepr/engine/src/writers/stream_writer/mod.rs b/drepr/engine/src/writers/stream_writer/mod.rs index 28a039d..208f05a 100644 --- a/drepr/engine/src/writers/stream_writer/mod.rs +++ b/drepr/engine/src/writers/stream_writer/mod.rs @@ -4,10 +4,12 @@ pub use self::stream_class_writer::StreamClassWriter; pub use self::stream_writer::{StreamWriter}; pub use self::turtle::TTLStreamWriter; pub use self::graph_json::GraphJSONWriter; +#[cfg(not(feature = "disable-python"))] pub use self::graph_py::GraphPyWriter; pub mod turtle; pub mod graph_json; +#[cfg(not(feature = "disable-python"))] pub mod graph_py; pub mod stream_writer; pub mod stream_class_writer; @@ -58,6 +60,7 @@ pub enum OutputFormat { TTL, #[serde(rename = "graph_json")] GraphJSON, + #[cfg(not(feature = "disable-python"))] #[serde(rename = "graph_py")] GraphPy } diff --git a/drepr/engine/tests/alignments/inference/mod.rs b/drepr/engine/tests/alignments/inference/mod.rs new file mode 100644 index 0000000..195226f --- /dev/null +++ b/drepr/engine/tests/alignments/inference/mod.rs @@ -0,0 +1 @@ +pub mod test_infer_func; \ No newline at end of file diff --git a/drepr/engine/tests/alignments/inference/resources/s01/assertion.json b/drepr/engine/tests/alignments/inference/resources/s01/assertion.json new file mode 100644 index 0000000..92d16c3 --- /dev/null +++ b/drepr/engine/tests/alignments/inference/resources/s01/assertion.json @@ -0,0 +1,46 @@ +{ + "alignments": [ + { + "pair": [9, 8], + "aligns": [ + {"type": "range", "source": 9, "target": 8, "aligned_dims": [ + {"source": 3, "target": 3}, {"source": 4, "target": 4} + ]} + ] + }, + { + "pair": [9, 0], + "aligns": [ + {"type": "range", "source": 9, "target": 0, "aligned_dims": [ + {"source": 3, "target": 2} + ]} + ] + }, + { + "pair": [8, 1], + "aligns": [ + {"type": "range", "source": 8, "target": 1, "aligned_dims": [ + {"source": 4, "target": 2} + ]} + ] + }, + { + "pair": [9, 1], + "aligns": [ + {"type": "range", "source": 9, "target": 1, "aligned_dims": [ + {"source": 4, "target": 2} + ]} + ] + } + ], + "infer_funcs": [ + { + "triple": [9, 8, 1], + "aligns": [ + {"type": "range", "source": 9, "target": 1, "aligned_dims": [ + {"source": 4, "target": 2} + ]} + ] + } + ] +} \ No newline at end of file diff --git a/drepr/engine/tests/alignments/inference/resources/s01/model.json b/drepr/engine/tests/alignments/inference/resources/s01/model.json new file mode 100644 index 0000000..25e33b5 --- /dev/null +++ b/drepr/engine/tests/alignments/inference/resources/s01/model.json @@ -0,0 +1,741 @@ +{ + "resources":[ + { + "type":"netcdf4", + "value":0 + } + ], + "preprocessing":[ + + ], + "attributes":[ + { + "id":0, + "resource_id":0, + "path":{ + "steps":[ + { + "type":"index", + "val":{ + "t":"str", + "c":"lat" + } + }, + { + "type":"index", + "val":{ + "t":"str", + "c":"data" + } + }, + { + "type":"range", + "start":0, + "end":null, + "step":1 + } + ] + }, + "unique":false, + "sorted":"none", + "vtype":"unspecified", + "missing_values":[ + + ] + }, + { + "id":1, + "resource_id":0, + "path":{ + "steps":[ + { + "type":"index", + "val":{ + "t":"str", + "c":"lon" + } + }, + { + "type":"index", + "val":{ + "t":"str", + "c":"data" + } + }, + { + "type":"range", + "start":0, + "end":null, + "step":1 + } + ] + }, + "unique":false, + "sorted":"none", + "vtype":"unspecified", + "missing_values":[ + + ] + }, + { + "id":2, + "resource_id":0, + "path":{ + "steps":[ + { + "type":"index", + "val":{ + "t":"str", + "c":"@" + } + }, + { + "type":"index", + "val":{ + "t":"str", + "c":"MAP_PROJECTION" + } + } + ] + }, + "unique":false, + "sorted":"none", + "vtype":"unspecified", + "missing_values":[ + + ] + }, + { + "id":3, + "resource_id":0, + "path":{ + "steps":[ + { + "type":"index", + "val":{ + "t":"str", + "c":"@" + } + }, + { + "type":"index", + "val":{ + "t":"str", + "c":"DX" + } + } + ] + }, + "unique":false, + "sorted":"none", + "vtype":"unspecified", + "missing_values":[ + + ] + }, + { + "id":4, + "resource_id":0, + "path":{ + "steps":[ + { + "type":"index", + "val":{ + "t":"str", + "c":"@" + } + }, + { + "type":"index", + "val":{ + "t":"str", + "c":"DY" + } + } + ] + }, + "unique":false, + "sorted":"none", + "vtype":"unspecified", + "missing_values":[ + + ] + }, + { + "id":5, + "resource_id":0, + "path":{ + "steps":[ + { + "type":"index", + "val":{ + "t":"str", + "c":"@" + } + }, + { + "type":"index", + "val":{ + "t":"str", + "c":"SOUTH_WEST_CORNER_LON" + } + } + ] + }, + "unique":false, + "sorted":"none", + "vtype":"unspecified", + "missing_values":[ + + ] + }, + { + "id":6, + "resource_id":0, + "path":{ + "steps":[ + { + "type":"index", + "val":{ + "t":"str", + "c":"@" + } + }, + { + "type":"index", + "val":{ + "t":"str", + "c":"SOUTH_WEST_CORNER_LAT" + } + } + ] + }, + "unique":false, + "sorted":"none", + "vtype":"unspecified", + "missing_values":[ + + ] + }, + { + "id":7, + "resource_id":0, + "path":{ + "steps":[ + { + "type":"index", + "val":{ + "t":"str", + "c":"time" + } + }, + { + "type":"index", + "val":{ + "t":"str", + "c":"data" + } + }, + { + "type":"index", + "val":{ + "t":"idx", + "c":0 + } + } + ] + }, + "unique":false, + "sorted":"none", + "vtype":"unspecified", + "missing_values":[ + + ] + }, + { + "id":8, + "resource_id":0, + "path":{ + "steps":[ + { + "type":"index", + "val":{ + "t":"str", + "c":"Rainf_f_tavg" + } + }, + { + "type":"index", + "val":{ + "t":"str", + "c":"data" + } + }, + { + "type":"index", + "val":{ + "t":"idx", + "c":0 + } + }, + { + "type":"range", + "start":0, + "end":null, + "step":1 + }, + { + "type":"range", + "start":0, + "end":null, + "step":1 + } + ] + }, + "unique":false, + "sorted":"none", + "vtype":"unspecified", + "missing_values":[ + { + "t":"I64", + "c":-9999 + } + ] + }, + { + "id":9, + "resource_id":0, + "path":{ + "steps":[ + { + "type":"index", + "val":{ + "t":"str", + "c":"Albedo_inst" + } + }, + { + "type":"index", + "val":{ + "t":"str", + "c":"data" + } + }, + { + "type":"index", + "val":{ + "t":"idx", + "c":0 + } + }, + { + "type":"range", + "start":0, + "end":null, + "step":1 + }, + { + "type":"range", + "start":0, + "end":null, + "step":1 + } + ] + }, + "unique":false, + "sorted":"none", + "vtype":"unspecified", + "missing_values":[ + { + "t":"I64", + "c":-9999 + } + ] + } + ], + "alignments":[ + { + "type":"range", + "source":8, + "target":0, + "aligned_dims":[ + { + "source":3, + "target":2 + } + ] + }, + { + "type":"range", + "source":8, + "target":1, + "aligned_dims":[ + { + "source":4, + "target":2 + } + ] + }, + { + "type":"range", + "source":8, + "target":7, + "aligned_dims":[ + + ] + }, + { + "type":"range", + "source":8, + "target":2, + "aligned_dims":[ + + ] + }, + { + "type":"range", + "source":2, + "target":5, + "aligned_dims":[ + + ] + }, + { + "type":"range", + "source":2, + "target":6, + "aligned_dims":[ + + ] + }, + { + "type":"range", + "source":2, + "target":3, + "aligned_dims":[ + + ] + }, + { + "type":"range", + "source":2, + "target":4, + "aligned_dims":[ + + ] + }, + { + "type":"range", + "source":8, + "target":9, + "aligned_dims":[ + { + "source":3, + "target":3 + }, + { + "source":4, + "target":4 + } + ] + } + ], + "semantic_model":{ + "nodes":[ + { + "type":"class_node", + "node_id":0, + "rel_label":"mint:Variable", + "abs_label":"https://mint.isi.edu/Variable" + }, + { + "type":"class_node", + "node_id":1, + "rel_label":"mint:Variable", + "abs_label":"https://mint.isi.edu/Variable" + }, + { + "type":"class_node", + "node_id":2, + "rel_label":"mint-geo:Raster", + "abs_label":"https://mint.isi.edu/geoRaster" + }, + { + "type":"data_node", + "node_id":3, + "attr_id":8, + "data_type":null + }, + { + "type":"data_node", + "node_id":4, + "attr_id":0, + "data_type":null + }, + { + "type":"data_node", + "node_id":5, + "attr_id":1, + "data_type":null + }, + { + "type":"data_node", + "node_id":6, + "attr_id":7, + "data_type":null + }, + { + "type":"data_node", + "node_id":7, + "attr_id":9, + "data_type":null + }, + { + "type":"data_node", + "node_id":8, + "attr_id":2, + "data_type":null + }, + { + "type":"data_node", + "node_id":9, + "attr_id":5, + "data_type":null + }, + { + "type":"data_node", + "node_id":10, + "attr_id":6, + "data_type":null + }, + { + "type":"data_node", + "node_id":11, + "attr_id":3, + "data_type":null + }, + { + "type":"data_node", + "node_id":12, + "attr_id":4, + "data_type":null + }, + { + "type":"literal_node", + "node_id":13, + "val":{ + "t":"Str", + "c":"atmosphere_water__precipitation_mass_flux" + }, + "data_type":null + }, + { + "type":"literal_node", + "node_id":14, + "val":{ + "t":"Str", + "c":"land_surface__albedo" + }, + "data_type":null + }, + { + "type":"literal_node", + "node_id":15, + "val":{ + "t":"I64", + "c":0 + }, + "data_type":null + }, + { + "type":"literal_node", + "node_id":16, + "val":{ + "t":"I64", + "c":0 + }, + "data_type":null + } + ], + "edges":[ + { + "edge_id":0, + "source":0, + "target":3, + "rel_label":"rdf:value", + "abs_label":"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", + "is_subject":false + }, + { + "edge_id":1, + "source":0, + "target":4, + "rel_label":"mint-geo:lat", + "abs_label":"https://mint.isi.edu/geolat", + "is_subject":false + }, + { + "edge_id":2, + "source":0, + "target":5, + "rel_label":"mint-geo:long", + "abs_label":"https://mint.isi.edu/geolong", + "is_subject":false + }, + { + "edge_id":3, + "source":0, + "target":6, + "rel_label":"mint:time", + "abs_label":"https://mint.isi.edu/time", + "is_subject":false + }, + { + "edge_id":4, + "source":0, + "target":2, + "rel_label":"mint-geo:raster", + "abs_label":"https://mint.isi.edu/georaster", + "is_subject":false + }, + { + "edge_id":5, + "source":0, + "target":13, + "rel_label":"mint:standardName", + "abs_label":"https://mint.isi.edu/standardName", + "is_subject":false + }, + { + "edge_id":6, + "source":1, + "target":7, + "rel_label":"rdf:value", + "abs_label":"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", + "is_subject":false + }, + { + "edge_id":7, + "source":1, + "target":4, + "rel_label":"mint-geo:lat", + "abs_label":"https://mint.isi.edu/geolat", + "is_subject":false + }, + { + "edge_id":8, + "source":1, + "target":5, + "rel_label":"mint-geo:long", + "abs_label":"https://mint.isi.edu/geolong", + "is_subject":false + }, + { + "edge_id":9, + "source":1, + "target":6, + "rel_label":"mint:time", + "abs_label":"https://mint.isi.edu/time", + "is_subject":false + }, + { + "edge_id":10, + "source":1, + "target":2, + "rel_label":"mint-geo:raster", + "abs_label":"https://mint.isi.edu/georaster", + "is_subject":false + }, + { + "edge_id":11, + "source":1, + "target":14, + "rel_label":"mint:standardName", + "abs_label":"https://mint.isi.edu/standardName", + "is_subject":false + }, + { + "edge_id":12, + "source":2, + "target":8, + "rel_label":"mint-geo:epsg", + "abs_label":"https://mint.isi.edu/geoepsg", + "is_subject":false + }, + { + "edge_id":13, + "source":2, + "target":9, + "rel_label":"mint-geo:x_0", + "abs_label":"https://mint.isi.edu/geox_0", + "is_subject":false + }, + { + "edge_id":14, + "source":2, + "target":10, + "rel_label":"mint-geo:y_0", + "abs_label":"https://mint.isi.edu/geoy_0", + "is_subject":false + }, + { + "edge_id":15, + "source":2, + "target":11, + "rel_label":"mint-geo:dx", + "abs_label":"https://mint.isi.edu/geodx", + "is_subject":false + }, + { + "edge_id":16, + "source":2, + "target":12, + "rel_label":"mint-geo:dy", + "abs_label":"https://mint.isi.edu/geody", + "is_subject":false + }, + { + "edge_id":17, + "source":2, + "target":15, + "rel_label":"mint-geo:x_slope", + "abs_label":"https://mint.isi.edu/geox_slope", + "is_subject":false + }, + { + "edge_id":18, + "source":2, + "target":16, + "rel_label":"mint-geo:y_slope", + "abs_label":"https://mint.isi.edu/geoy_slope", + "is_subject":false + } + ], + "prefixes":[ + [ + "mint", + "https://mint.isi.edu/" + ], + [ + "mint-geo", + "https://mint.isi.edu/geo" + ], + [ + "drepr", + "https://purl.org/drepr/1.0/" + ], + [ + "rdf", + "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + ], + [ + "rdfs", + "http://www.w3.org/2000/01/rdf-schema#" + ], + [ + "owl", + "http://www.w3.org/2002/07/owl#" + ] + ] + } +} \ No newline at end of file diff --git a/drepr/engine/tests/alignments/inference/resources/s01/model.meta b/drepr/engine/tests/alignments/inference/resources/s01/model.meta new file mode 100644 index 0000000..957ec38 --- /dev/null +++ b/drepr/engine/tests/alignments/inference/resources/s01/model.meta @@ -0,0 +1,17 @@ +{ + "attributes":{ + "latitude":0, + "longitude":1, + "projection":2, + "dx":3, + "dy":4, + "x_0":5, + "y_0":6, + "time":7, + "Rainf_f_tavg":8, + "Albedo_inst":9 + }, + "resources":{ + "default":0 + } +} \ No newline at end of file diff --git a/drepr/engine/tests/alignments/inference/resources/s01/model.yml b/drepr/engine/tests/alignments/inference/resources/s01/model.yml new file mode 100644 index 0000000..f2d2111 --- /dev/null +++ b/drepr/engine/tests/alignments/inference/resources/s01/model.yml @@ -0,0 +1,85 @@ +version: "2" +resources: netcdf4 +preprocessing: [] +attributes: + latitude: $.lat.data[:] + longitude: $.lon.data[:] + projection: $.@.MAP_PROJECTION + dx: $.@.DX + dy: $.@.DY + x_0: $.@.SOUTH_WEST_CORNER_LON + y_0: $.@.SOUTH_WEST_CORNER_LAT + time: $.time.data[0] + Rainf_f_tavg: + path: $.Rainf_f_tavg.data[0][:][:] + missing_values: [-9999] + Albedo_inst: + path: $.Albedo_inst.data[0][:][:] + missing_values: [-9999] +alignments: + - type: dimension + value: Rainf_f_tavg:3 <-> latitude:2 + - type: dimension + value: Rainf_f_tavg:4 <-> longitude:2 + - type: dimension + source: Rainf_f_tavg + target: time + aligned_dims: [] + - type: dimension + source: Rainf_f_tavg + target: projection + aligned_dims: [] + - type: dimension + source: projection + target: x_0 + aligned_dims: [] + - type: dimension + source: projection + target: y_0 + aligned_dims: [] + - type: dimension + source: projection + target: dx + aligned_dims: [] + - type: dimension + source: projection + target: dy + aligned_dims: [] + - type: dimension + source: Rainf_f_tavg + target: Albedo_inst + aligned_dims: [{ "source": 3, "target": 3 }, { "source": 4, "target": 4 }] +semantic_model: + mint:Variable:1: + properties: + - [rdf:value, Rainf_f_tavg] + - [mint-geo:lat, latitude] + - [mint-geo:long, longitude] + - [mint:time, time] + static_properties: + - [mint:standardName, atmosphere_water__precipitation_mass_flux] + links: + - [mint-geo:raster, mint-geo:Raster:1] + mint:Variable:2: + properties: + - [rdf:value, Albedo_inst] + - [mint-geo:lat, latitude] + - [mint-geo:long, longitude] + - [mint:time, time] + static_properties: + - [mint:standardName, land_surface__albedo] + links: + - [mint-geo:raster, mint-geo:Raster:1] + mint-geo:Raster:1: + properties: + - [mint-geo:epsg, projection] + - [mint-geo:x_0, x_0] + - [mint-geo:y_0, y_0] + - [mint-geo:dx, dx] + - [mint-geo:dy, dy] + static_properties: + - [mint-geo:x_slope, 0] + - [mint-geo:y_slope, 0] + prefixes: + mint: https://mint.isi.edu/ + mint-geo: https://mint.isi.edu/geo diff --git a/drepr/engine/tests/alignments/inference/test_infer_func.rs b/drepr/engine/tests/alignments/inference/test_infer_func.rs new file mode 100644 index 0000000..dd6ab1f --- /dev/null +++ b/drepr/engine/tests/alignments/inference/test_infer_func.rs @@ -0,0 +1,71 @@ +use engine::lang::{Description, Alignment}; +use engine::alignments::inference::AlignmentInference; +use std::path::Path; +use std::fs; +use std::fs::File; +use serde::{Deserialize}; + +/// Smoke test the inferences +#[test] +fn smoke_test() { + let scenarios = TestScenario::load(); + for scenario in scenarios { + let inference = AlignmentInference::new(&scenario.desc); + + for infer_fn in scenario.assertion.infer_funcs { + let aligns = inference.infer_func(infer_fn.triple.0, infer_fn.triple.1, infer_fn.triple.2); + assert!(aligns.is_some()); + assert_eq!(infer_fn.aligns, aligns.unwrap()); + } + + for align in scenario.assertion.alignments { + let aligns = inference.get_alignments(align.pair.0, align.pair.1); + assert_eq!(align.aligns, aligns); + } + } +} + +#[derive(Debug, Clone, Deserialize)] +struct Assertion { + alignments: Vec, + infer_funcs: Vec, +} + +#[derive(Debug, Clone, Deserialize)] +struct AlignmentAssertion { + pair: (usize, usize), + aligns: Vec +} + +#[derive(Debug, Clone, Deserialize)] +struct InferFnAssertion { + triple: (usize, usize, usize), + aligns: Vec +} + +struct TestScenario { + name: String, + desc: Description, + assertion: Assertion +} + +impl TestScenario { + pub fn load() -> Vec { + let test_resource_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/alignments/inference/resources"); + let mut scenarios = vec![]; + + for e0 in fs::read_dir(&test_resource_dir).unwrap() { + let dataset_dir = e0.unwrap().path(); + if dataset_dir.is_dir() { + // get model + let desc_file = dataset_dir.as_path().join("model.json").to_str().unwrap().to_string(); + let desc: Description = serde_json::from_reader(File::open(desc_file).unwrap()).expect("Invalid description file"); + let assertion: Assertion = serde_json::from_reader(File::open(dataset_dir.as_path().join("assertion.json")).unwrap()).expect("Invalid assertion file"); + scenarios.push(TestScenario { desc, name: dataset_dir.file_name().unwrap().to_str().unwrap().to_string(), assertion }); + } + } + + scenarios.sort_by_key(|s| s.name.clone()); + scenarios + } +} \ No newline at end of file diff --git a/drepr/engine/tests/alignments/mod.rs b/drepr/engine/tests/alignments/mod.rs index b1f56f5..165d288 100644 --- a/drepr/engine/tests/alignments/mod.rs +++ b/drepr/engine/tests/alignments/mod.rs @@ -1 +1,2 @@ -pub mod funcs; \ No newline at end of file +pub mod funcs; +pub mod inference; diff --git a/drepr/engine/tests/resources/setup.py b/drepr/engine/tests/setup.py similarity index 65% rename from drepr/engine/tests/resources/setup.py rename to drepr/engine/tests/setup.py index ee71c2d..47c1793 100644 --- a/drepr/engine/tests/resources/setup.py +++ b/drepr/engine/tests/setup.py @@ -1,5 +1,6 @@ import os from pathlib import Path +import glob from typing import List, Dict, Tuple, Callable, Any, Optional import ujson @@ -8,14 +9,16 @@ if __name__ == '__main__': # generate json models - for dataset_dir in Path(os.path.abspath(__file__)).parent.iterdir(): + test_dir = os.path.dirname(os.path.abspath(__file__)) + for model_file in glob.glob(os.path.join(test_dir, "**/resources/*/model.yml"), recursive=True): + dataset_dir = Path(model_file).parent if not dataset_dir.is_dir() or not (dataset_dir / "model.yml").exists(): continue - with open(dataset_dir / "model.json", "w") as f: + with open(str(dataset_dir / "model.json"), "w") as f: result = DRepr.parse_from_file(str(dataset_dir / "model.yml")).to_engine_format() f.write(ujson.dumps(result.model, indent=4, escape_forward_slashes=False)) - with open(dataset_dir / "model.meta", "w") as f: + with open(str(dataset_dir / "model.meta"), "w") as f: f.write(ujson.dumps({ "attributes": result.attribute_idmap, "resources": result.resource_idmap diff --git a/drepr/engine/tests/test_executors.rs b/drepr/engine/tests/test_executors.rs index 9289f42..7420076 100644 --- a/drepr/engine/tests/test_executors.rs +++ b/drepr/engine/tests/test_executors.rs @@ -26,6 +26,7 @@ pub fn test_executors() { resources: resources.clone(), description: description.clone(), output: PhysicalOutput::File { fpath: exc_output.clone(), format: output.1 }, + edges_optional: vec![true; description.semantic_model.edges.len()] }).exec(); let pred_output = fs::read_to_string(exc_output).unwrap(); diff --git a/drepr/tests/__init__.py b/drepr/old_code/tests/__init__.py similarity index 100% rename from drepr/tests/__init__.py rename to drepr/old_code/tests/__init__.py diff --git a/drepr/tests/alignments/mod.rs b/drepr/old_code/tests/alignments/mod.rs similarity index 100% rename from drepr/tests/alignments/mod.rs rename to drepr/old_code/tests/alignments/mod.rs diff --git a/drepr/tests/alignments/test_dimensional_alignment.rs b/drepr/old_code/tests/alignments/test_dimensional_alignment.rs similarity index 100% rename from drepr/tests/alignments/test_dimensional_alignment.rs rename to drepr/old_code/tests/alignments/test_dimensional_alignment.rs diff --git a/drepr/tests/alignments/test_value_alignment.rs b/drepr/old_code/tests/alignments/test_value_alignment.rs similarity index 100% rename from drepr/tests/alignments/test_value_alignment.rs rename to drepr/old_code/tests/alignments/test_value_alignment.rs diff --git a/drepr/tests/conftest.py b/drepr/old_code/tests/conftest.py similarity index 100% rename from drepr/tests/conftest.py rename to drepr/old_code/tests/conftest.py diff --git a/drepr/tests/helpers.rs b/drepr/old_code/tests/helpers.rs similarity index 100% rename from drepr/tests/helpers.rs rename to drepr/old_code/tests/helpers.rs diff --git a/drepr/tests/iterators/mod.rs b/drepr/old_code/tests/iterators/mod.rs similarity index 100% rename from drepr/tests/iterators/mod.rs rename to drepr/old_code/tests/iterators/mod.rs diff --git a/drepr/tests/iterators/test_insert_iterator.rs b/drepr/old_code/tests/iterators/test_insert_iterator.rs similarity index 100% rename from drepr/tests/iterators/test_insert_iterator.rs rename to drepr/old_code/tests/iterators/test_insert_iterator.rs diff --git a/drepr/tests/iterators/test_known_size_iterator.rs b/drepr/old_code/tests/iterators/test_known_size_iterator.rs similarity index 100% rename from drepr/tests/iterators/test_known_size_iterator.rs rename to drepr/old_code/tests/iterators/test_known_size_iterator.rs diff --git a/drepr/tests/iterators/test_unknown_size_iterator.rs b/drepr/old_code/tests/iterators/test_unknown_size_iterator.rs similarity index 100% rename from drepr/tests/iterators/test_unknown_size_iterator.rs rename to drepr/old_code/tests/iterators/test_unknown_size_iterator.rs diff --git a/drepr/tests/pydrepr/__init__.py b/drepr/old_code/tests/pydrepr/__init__.py similarity index 100% rename from drepr/tests/pydrepr/__init__.py rename to drepr/old_code/tests/pydrepr/__init__.py diff --git a/drepr/tests/pydrepr/test_engine.py b/drepr/old_code/tests/pydrepr/test_engine.py similarity index 100% rename from drepr/tests/pydrepr/test_engine.py rename to drepr/old_code/tests/pydrepr/test_engine.py diff --git a/drepr/tests/pydrepr/test_repr.py b/drepr/old_code/tests/pydrepr/test_repr.py similarity index 100% rename from drepr/tests/pydrepr/test_repr.py rename to drepr/old_code/tests/pydrepr/test_repr.py diff --git a/drepr/tests/pydrepr/test_repr_builder.py b/drepr/old_code/tests/pydrepr/test_repr_builder.py similarity index 100% rename from drepr/tests/pydrepr/test_repr_builder.py rename to drepr/old_code/tests/pydrepr/test_repr_builder.py diff --git a/drepr/tests/readers/mod.rs b/drepr/old_code/tests/readers/mod.rs similarity index 100% rename from drepr/tests/readers/mod.rs rename to drepr/old_code/tests/readers/mod.rs diff --git a/drepr/tests/runner.rs b/drepr/old_code/tests/runner.rs similarity index 100% rename from drepr/tests/runner.rs rename to drepr/old_code/tests/runner.rs diff --git a/pydrepr/drepr/__init__.py b/pydrepr/drepr/__init__.py index c7b2990..f6e9dfe 100644 --- a/pydrepr/drepr/__init__.py +++ b/pydrepr/drepr/__init__.py @@ -1,6 +1,6 @@ from drepr.version import __version__ from drepr.engine import execute, FileOutput, MemoryOutput, OutputFormat -from drepr.graph import Graph +from drepr.graph_deprecated import Graph from drepr.models import DRepr, DReprBuilder from drepr import outputs from drepr import models \ No newline at end of file diff --git a/pydrepr/drepr/engine.py b/pydrepr/drepr/engine.py index b290683..a95d42e 100644 --- a/pydrepr/drepr/engine.py +++ b/pydrepr/drepr/engine.py @@ -1,7 +1,7 @@ import copy -from dataclasses import dataclass, asdict +from dataclasses import dataclass from enum import Enum -from typing import Dict, Union, Tuple, NamedTuple, List +from typing import Dict, Union, Tuple, List import ujson, traceback @@ -23,7 +23,6 @@ def execute(ds_model: DRepr, output: "Output", debug: bool = False): ptr = None - if isinstance(resources, (str, tuple)): resources = {DEFAULT_RESOURCE_ID: resources} @@ -176,7 +175,6 @@ class OutputFormat(Enum): TTL = "ttl" GraphJSON = "graph_json" GraphPy = "graph_py" - NDArray = "ndarray" @dataclass diff --git a/pydrepr/drepr/graph.py b/pydrepr/drepr/graph_deprecated.py similarity index 100% rename from pydrepr/drepr/graph.py rename to pydrepr/drepr/graph_deprecated.py diff --git a/pydrepr/drepr/outputs/array_backend/array_backend.py b/pydrepr/drepr/outputs/array_backend/array_backend.py index 4d0620f..594854b 100644 --- a/pydrepr/drepr/outputs/array_backend/array_backend.py +++ b/pydrepr/drepr/outputs/array_backend/array_backend.py @@ -75,5 +75,5 @@ def c(self, class_uri: str) -> BaseLstOutputClass: def cid(self, class_id: str) -> ArrayClass: return self.classes[class_id] - def _get_sm(self) -> SemanticModel: + def get_sm(self) -> SemanticModel: return self.sm \ No newline at end of file diff --git a/pydrepr/drepr/outputs/base_output_sm.py b/pydrepr/drepr/outputs/base_output_sm.py index 1959cad..1f82a74 100644 --- a/pydrepr/drepr/outputs/base_output_sm.py +++ b/pydrepr/drepr/outputs/base_output_sm.py @@ -38,11 +38,11 @@ def cid(self, class_id: str) -> BaseOutputClass: pass @abstractmethod - def _get_sm(self) -> SemanticModel: + def get_sm(self) -> SemanticModel: pass def ns(self, uri: str) -> Namespace: - sm = self._get_sm() + sm = self.get_sm() if not hasattr(sm, '_inverted_prefixes'): sm._inverted_prefixes = {v: k for k, v in sm.prefixes.items()} diff --git a/pydrepr/drepr/outputs/graph_backend/graph_backend.py b/pydrepr/drepr/outputs/graph_backend/graph_backend.py index 09b00d5..b60d675 100644 --- a/pydrepr/drepr/outputs/graph_backend/graph_backend.py +++ b/pydrepr/drepr/outputs/graph_backend/graph_backend.py @@ -30,8 +30,10 @@ def __init__(self, class2nodes: Dict[str, List[dict]], drepr: DRepr): self.uri2classes[c.label].append(self.classes[c.node_id]) @classmethod - def from_drepr(cls, drepr_file: str, resources: Union[str, Dict[str, str]]) -> "GraphBackend": - ds_model = DRepr.parse_from_file(drepr_file) + def from_drepr(cls, ds_model: Union[DRepr, str], resources: Union[str, Dict[str, str]]) -> "GraphBackend": + if type(ds_model) is str: + ds_model = DRepr.parse_from_file(ds_model) + class2nodes = execute(ds_model, resources, MemoryOutput(OutputFormat.GraphPy)) return cls(class2nodes, ds_model) @@ -47,6 +49,6 @@ def c(self, class_uri: str) -> BaseLstOutputClass: def cid(self, class_id: str) -> GraphClass: return self.classes[class_id] - def _get_sm(self) -> SemanticModel: + def get_sm(self) -> SemanticModel: return self.sm diff --git a/pydrepr/drepr/version.py b/pydrepr/drepr/version.py index 5ed51b5..a221dff 100644 --- a/pydrepr/drepr/version.py +++ b/pydrepr/drepr/version.py @@ -1,3 +1,3 @@ -__version__ = "2.6" # ___PKG_VERSION___: DO NOT MODIFY the version here. Update it via version_manager.py! -__engine_version__ = "1.0.7" # ___PKG_VERSION___: DO NOT MODIFY the version here. Update it via version_manager.py! -__engine_release_tag__ = "2.6" # ___PKG_VERSION___: DO NOT MODIFY the version here. Update it via version_manager.py! +__version__ = "2.7" # ___PKG_VERSION___: DO NOT MODIFY the version here. Update it via version_manager.py! +__engine_version__ = "1.0.8" # ___PKG_VERSION___: DO NOT MODIFY the version here. Update it via version_manager.py! +__engine_release_tag__ = "2.7" # ___PKG_VERSION___: DO NOT MODIFY the version here. Update it via version_manager.py! diff --git a/pydrepr/examples/climate_data/geospatial_nb.ipynb b/pydrepr/examples/climate_data/geospatial_nb.ipynb index 5165eda..6026201 100644 --- a/pydrepr/examples/climate_data/geospatial_nb.ipynb +++ b/pydrepr/examples/climate_data/geospatial_nb.ipynb @@ -17,11 +17,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "jupyter": { - "source_hidden": true - } - }, + "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt, rdflib, pandas as pd, numpy as np, sys, os, random, math, fiona\n", @@ -58,7 +54,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "drepr version: 2.6\n" + "drepr version: 2.7\n" ] } ], @@ -198,14 +194,14 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "weather_dataset = {\n", " \"gldas\": {\"repr\": \"./gldas.yml\", \"data\": \"GLDAS_NOAH025_3H.A20080101.0000.021.nc4\"},\n", " \"gpm\": {\"repr\": \"./gpm.yml\", \"data\": \"3B-MO.MS.MRG.3IMERG.20080101-S000000-E235959.01.V06B.HDF5.nc4\"},\n", - "}['gpm']\n", + "}['gldas']\n", "variable = \"atmosphere_water__precipitation_mass_flux\"" ] }, @@ -218,7 +214,16 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from drepr.engine import complete_description" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ diff --git a/pydrepr/tests/drepr/engine/__init__.py b/pydrepr/tests/drepr/engine/__init__.py new file mode 100644 index 0000000..4fa6397 --- /dev/null +++ b/pydrepr/tests/drepr/engine/__init__.py @@ -0,0 +1 @@ +from typing import List, Dict, Tuple, Callable, Any, Optional \ No newline at end of file diff --git a/pydrepr/tests/drepr/engine/conftest.py b/pydrepr/tests/drepr/engine/conftest.py new file mode 100644 index 0000000..7eed39d --- /dev/null +++ b/pydrepr/tests/drepr/engine/conftest.py @@ -0,0 +1,25 @@ +from pathlib import Path +from typing import List, Dict, Tuple, Callable, Any, Optional + +import pytest + +from drepr import DRepr + + +def get_drepr(dataset_dir: Path): + return DRepr.parse_from_file(str(dataset_dir / "model.yml")) + + +@pytest.fixture() +def d_s04(resource_dir): + o = get_drepr(resource_dir / "s04_shorten_gldas") + o.__name__ = "d_s04" + return o + + +@pytest.fixture() +def d_s05(resource_dir): + o = get_drepr(resource_dir / "s05_full_gldas") + o.__name__ = "d_s05" + return o + diff --git a/pydrepr/tests/drepr/engine/test_complete_description.py b/pydrepr/tests/drepr/engine/test_complete_description.py new file mode 100644 index 0000000..a9767c7 --- /dev/null +++ b/pydrepr/tests/drepr/engine/test_complete_description.py @@ -0,0 +1,27 @@ +from typing import List, Dict, Tuple, Callable, Any, Optional + +from drepr import DRepr, outputs +from drepr.engine import complete_description + + +def test_complete_description(d_s04: DRepr): + model2subjects = { + "d_s04": { + ("mint:Variable:1", "dnode:Rainf_f_tavg"), + ("mint:Variable:2", "dnode:Albedo_inst") + } + } + + for ds_model in [d_s04]: + plan = complete_description(ds_model) + + # check all subjects are inferred correctly (which shows that all inferred alignments are correct) + subjects = model2subjects[ds_model.__name__] + assert len(subjects) > 0 + + for edge in plan.sm.edges.values(): + pair = (edge.source_id, edge.target_id) + if pair in subjects: + assert edge.is_subject is True + subjects.remove(pair) + assert len(subjects) == 0 diff --git a/pydrepr/tests/drepr/outputs/conftest.py b/pydrepr/tests/drepr/outputs/conftest.py index 62d552c..3c97263 100644 --- a/pydrepr/tests/drepr/outputs/conftest.py +++ b/pydrepr/tests/drepr/outputs/conftest.py @@ -17,14 +17,14 @@ def get_backends(dataset_dir: Path): @pytest.fixture() def s01(resource_dir): - return get_backends(resource_dir / "synthesis_s1") + return get_backends(resource_dir / "s01_synthesis") @pytest.fixture() def s02(resource_dir): - return get_backends(resource_dir / "synthesis_s2") + return get_backends(resource_dir / "s02_synthesis") @pytest.fixture() def s03(resource_dir): - return get_backends(resource_dir / "synthesis_s3") + return get_backends(resource_dir / "s03_synthesis") diff --git a/pydrepr/tests/resources/synthesis_s1/model.yml b/pydrepr/tests/resources/s01_synthesis/model.yml similarity index 100% rename from pydrepr/tests/resources/synthesis_s1/model.yml rename to pydrepr/tests/resources/s01_synthesis/model.yml diff --git a/pydrepr/tests/resources/synthesis_s1/resource.json b/pydrepr/tests/resources/s01_synthesis/resource.json similarity index 100% rename from pydrepr/tests/resources/synthesis_s1/resource.json rename to pydrepr/tests/resources/s01_synthesis/resource.json diff --git a/pydrepr/tests/resources/synthesis_s2/model.yml b/pydrepr/tests/resources/s02_synthesis/model.yml similarity index 100% rename from pydrepr/tests/resources/synthesis_s2/model.yml rename to pydrepr/tests/resources/s02_synthesis/model.yml diff --git a/pydrepr/tests/resources/synthesis_s2/resource.json b/pydrepr/tests/resources/s02_synthesis/resource.json similarity index 100% rename from pydrepr/tests/resources/synthesis_s2/resource.json rename to pydrepr/tests/resources/s02_synthesis/resource.json diff --git a/pydrepr/tests/resources/synthesis_s3/model.yml b/pydrepr/tests/resources/s03_synthesis/model.yml similarity index 100% rename from pydrepr/tests/resources/synthesis_s3/model.yml rename to pydrepr/tests/resources/s03_synthesis/model.yml diff --git a/pydrepr/tests/resources/synthesis_s3/resource.json b/pydrepr/tests/resources/s03_synthesis/resource.json similarity index 100% rename from pydrepr/tests/resources/synthesis_s3/resource.json rename to pydrepr/tests/resources/s03_synthesis/resource.json diff --git a/pydrepr/tests/resources/s04_shorten_gldas/model.yml b/pydrepr/tests/resources/s04_shorten_gldas/model.yml new file mode 100644 index 0000000..9a9782f --- /dev/null +++ b/pydrepr/tests/resources/s04_shorten_gldas/model.yml @@ -0,0 +1,106 @@ +version: "2" +resources: netcdf4 +preprocessing: + - type: pmap + path: $.@.MAP_PROJECTION + code: | + if value == "EQUIDISTANT CYLINDRICAL": + # https://epsg.io/32663 this is different to https://epsg.io/4326 + return 32663 + raise NotImplementedError() + - type: pmap + path: $.@.SOUTH_WEST_CORNER_LAT + code: | + dy = context.get_value(["@", "DY"]) + y_0 = value - dy/2 + return y_0 + - type: pmap + path: $.@.SOUTH_WEST_CORNER_LON + code: | + dx = context.get_value(["@", "DX"]) + x_0 = value - dx/2 + return x_0 +attributes: + latitude: $.lat.data[:] + longitude: $.lon.data[:] + projection: $.@.MAP_PROJECTION + dx: $.@.DX + dy: $.@.DY + x_0: $.@.SOUTH_WEST_CORNER_LON + y_0: $.@.SOUTH_WEST_CORNER_LAT + time: $.time.data[0] + # list of variables + Rainf_f_tavg: + path: $.Rainf_f_tavg.data[0][:][:] + missing_values: [-9999] + Albedo_inst: + path: $.Albedo_inst.data[0][:][:] + missing_values: [-9999] +alignments: + - type: dimension + value: Rainf_f_tavg:3 <-> latitude:2 + - type: dimension + value: Rainf_f_tavg:4 <-> longitude:2 + - type: dimension + source: Rainf_f_tavg + target: time + aligned_dims: [] + - type: dimension + source: Rainf_f_tavg + target: projection + aligned_dims: [] + - type: dimension + source: projection + target: x_0 + aligned_dims: [] + - type: dimension + source: projection + target: y_0 + aligned_dims: [] + - type: dimension + source: projection + target: dx + aligned_dims: [] + - type: dimension + source: projection + target: dy + aligned_dims: [] + - type: dimension + source: Rainf_f_tavg + target: Albedo_inst + aligned_dims: [{ "source": 3, "target": 3 }, { "source": 4, "target": 4 }] +semantic_model: + mint:Variable:1: + properties: + - [rdf:value, Rainf_f_tavg] + - [mint-geo:lat, latitude] + - [mint-geo:long, longitude] + - [mint:time, time] + static_properties: + - [mint:standardName, atmosphere_water__precipitation_mass_flux] + links: + - [mint-geo:raster, mint-geo:Raster:1] + mint:Variable:2: + properties: + - [rdf:value, Albedo_inst] + - [mint-geo:lat, latitude] + - [mint-geo:long, longitude] + - [mint:time, time] + static_properties: + - [mint:standardName, land_surface__albedo] + links: + - [mint-geo:raster, mint-geo:Raster:1] + mint-geo:Raster:1: + properties: + - [mint-geo:epsg, projection] + - [mint-geo:x_0, x_0] + - [mint-geo:y_0, y_0] + - [mint-geo:dx, dx] + - [mint-geo:dy, dy] + static_properties: + - [mint-geo:x_slope, 0] + - [mint-geo:y_slope, 0] + prefixes: + mint: https://mint.isi.edu/ + # do we need this? our latitude/longitude don't have CRS + mint-geo: https://mint.isi.edu/geo diff --git a/version_manager.py b/version_manager.py index 1f9f522..12fba74 100644 --- a/version_manager.py +++ b/version_manager.py @@ -4,10 +4,10 @@ # ================================================================================ # Note: update the version by changing variables before the `=...=` line -DREPR_PYLIB_VESRION = "2.6" -DREPR_ENGINE_VERSION = "1.0.7" +DREPR_PYLIB_VESRION = "2.7" +DREPR_ENGINE_VERSION = "1.0.8" # this tag marks the release which contains the pre-built engine in it. -DREPR_ENGINE_RELEASE_TAG = "2.6" +DREPR_ENGINE_RELEASE_TAG = "2.7" # ================================================================================