diff --git a/tests/test_util/mod.rs b/tests/test_util/mod.rs deleted file mode 100644 index 687dc72..0000000 --- a/tests/test_util/mod.rs +++ /dev/null @@ -1,245 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Utility functions to make testing DataFusion based crates easier - -use std::any::Any; -use std::{env, error::Error, path::PathBuf, sync::Arc}; - -use futures::Stream; - -/// Compares formatted output of a record batch with an expected -/// vector of strings, with the result of pretty formatting record -/// batches. This is a macro so errors appear on the correct line -/// -/// Designed so that failure output can be directly copy/pasted -/// into the test code as expected results. -/// -/// Expects to be called about like this: -/// -/// `assert_batch_eq!(expected_lines: &[&str], batches: &[RecordBatch])` -#[macro_export] -macro_rules! assert_batches_eq { - ($EXPECTED_LINES: expr, $CHUNKS: expr) => { - let expected_lines: Vec = - $EXPECTED_LINES.iter().map(|&s| s.into()).collect(); - - let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS) - .unwrap() - .to_string(); - - let actual_lines: Vec<&str> = formatted.trim().lines().collect(); - - assert_eq!( - expected_lines, actual_lines, - "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", - expected_lines, actual_lines - ); - }; -} - -/// Compares formatted output of a record batch with an expected -/// vector of strings in a way that order does not matter. -/// This is a macro so errors appear on the correct line -/// -/// Designed so that failure output can be directly copy/pasted -/// into the test code as expected results. -/// -/// Expects to be called about like this: -/// -/// `assert_batch_sorted_eq!(expected_lines: &[&str], batches: &[RecordBatch])` -#[macro_export] -macro_rules! assert_batches_sorted_eq { - ($EXPECTED_LINES: expr, $CHUNKS: expr) => { - let mut expected_lines: Vec = - $EXPECTED_LINES.iter().map(|&s| s.into()).collect(); - - // sort except for header + footer - let num_lines = expected_lines.len(); - if num_lines > 3 { - expected_lines.as_mut_slice()[2..num_lines - 1].sort_unstable() - } - - let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS) - .unwrap() - .to_string(); - // fix for windows: \r\n --> - - let mut actual_lines: Vec<&str> = formatted.trim().lines().collect(); - - // sort except for header + footer - let num_lines = actual_lines.len(); - if num_lines > 3 { - actual_lines.as_mut_slice()[2..num_lines - 1].sort_unstable() - } - - assert_eq!( - expected_lines, actual_lines, - "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", - expected_lines, actual_lines - ); - }; -} - -/// Returns the arrow test data directory, which is by default stored -/// in a git submodule rooted at `testing/data`. -/// -/// The default can be overridden by the optional environment -/// variable `ARROW_TEST_DATA` -/// -/// panics when the directory can not be found. -/// -/// Example: -/// ``` -/// let testdata = datafusion::test_util::arrow_test_data(); -/// let csvdata = format!("{}/csv/aggregate_test_100.csv", testdata); -/// assert!(std::path::PathBuf::from(csvdata).exists()); -/// ``` -pub fn arrow_test_data() -> String { - match get_data_dir("ARROW_TEST_DATA", "../../testing/data") { - Ok(pb) => pb.display().to_string(), - Err(err) => panic!("failed to get arrow data dir: {err}"), - } -} - -/// Returns the parquet test data directory, which is by default -/// stored in a git submodule rooted at -/// `parquet-testing/data`. -/// -/// The default can be overridden by the optional environment variable -/// `PARQUET_TEST_DATA` -/// -/// panics when the directory can not be found. -/// -/// Example: -/// ``` -/// let testdata = datafusion::test_util::parquet_test_data(); -/// let filename = format!("{}/binary.parquet", testdata); -/// assert!(std::path::PathBuf::from(filename).exists()); -/// ``` -pub fn parquet_test_data() -> String { - match get_data_dir("PARQUET_TEST_DATA", "../../parquet-testing/data") { - Ok(pb) => pb.display().to_string(), - Err(err) => panic!("failed to get parquet data dir: {err}"), - } -} - -/// Returns a directory path for finding test data. -/// -/// udf_env: name of an environment variable -/// -/// submodule_dir: fallback path (relative to CARGO_MANIFEST_DIR) -/// -/// Returns either: -/// The path referred to in `udf_env` if that variable is set and refers to a directory -/// The submodule_data directory relative to CARGO_MANIFEST_PATH -pub fn get_data_dir( - udf_env: &str, - submodule_data: &str, -) -> Result> { - // Try user defined env. - if let Ok(dir) = env::var(udf_env) { - let trimmed = dir.trim().to_string(); - if !trimmed.is_empty() { - let pb = PathBuf::from(trimmed); - if pb.is_dir() { - return Ok(pb); - } else { - return Err(format!( - "the data dir `{}` defined by env {} not found", - pb.display(), - udf_env - ) - .into()); - } - } - } - - // The env is undefined or its value is trimmed to empty, let's try default dir. - - // env "CARGO_MANIFEST_DIR" is "the directory containing the manifest of your package", - // set by `cargo run` or `cargo test`, see: - // https://doc.rust-lang.org/cargo/reference/environment-variables.html - let dir = env!("CARGO_MANIFEST_DIR"); - - let pb = PathBuf::from(dir).join(submodule_data); - if pb.is_dir() { - Ok(pb) - } else { - Err(format!( - "env `{}` is undefined or has empty value, and the pre-defined data dir `{}` not found\n\ - HINT: try running `git submodule update --init`", - udf_env, - pb.display(), - ).into()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::env; - - #[test] - fn test_data_dir() { - let udf_env = "get_data_dir"; - let cwd = env::current_dir().unwrap(); - - let existing_pb = cwd.join(".."); - let existing = existing_pb.display().to_string(); - let existing_str = existing.as_str(); - - let non_existing = cwd.join("non-existing-dir").display().to_string(); - let non_existing_str = non_existing.as_str(); - - env::set_var(udf_env, non_existing_str); - let res = get_data_dir(udf_env, existing_str); - assert!(res.is_err()); - - env::set_var(udf_env, ""); - let res = get_data_dir(udf_env, existing_str); - assert!(res.is_ok()); - assert_eq!(res.unwrap(), existing_pb); - - env::set_var(udf_env, " "); - let res = get_data_dir(udf_env, existing_str); - assert!(res.is_ok()); - assert_eq!(res.unwrap(), existing_pb); - - env::set_var(udf_env, existing_str); - let res = get_data_dir(udf_env, existing_str); - assert!(res.is_ok()); - assert_eq!(res.unwrap(), existing_pb); - - env::remove_var(udf_env); - let res = get_data_dir(udf_env, non_existing_str); - assert!(res.is_err()); - - let res = get_data_dir(udf_env, existing_str); - assert!(res.is_ok()); - assert_eq!(res.unwrap(), existing_pb); - } - - #[test] - fn test_happy() { - let res = arrow_test_data(); - assert!(PathBuf::from(res).is_dir()); - - let res = parquet_test_data(); - assert!(PathBuf::from(res).is_dir()); - } -}