Skip to content

Commit

Permalink
fix: escape column names correctly (#2007)
Browse files Browse the repository at this point in the history
we used to do \`nested.field\`, and it is wrong.

This is changed to \`nested\`.\`field\`
  • Loading branch information
chebbyChefNEQ authored Feb 28, 2024
1 parent ea621cc commit f628c1e
Showing 1 changed file with 73 additions and 1 deletion.
74 changes: 73 additions & 1 deletion rust/lance/src/dataset/scanner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,13 @@ pub struct Scanner {
fragments: Option<Vec<Fragment>>,
}

fn escape_column_name(name: &str) -> String {
name.split('.')
.map(|s| format!("`{}`", s))
.collect::<Vec<_>>()
.join(".")
}

impl Scanner {
pub fn new(dataset: Arc<Dataset>) -> Self {
let projection = dataset.schema().clone();
Expand Down Expand Up @@ -254,7 +261,7 @@ impl Scanner {
self.project_with_transform(
&columns
.iter()
.map(|c| (c.as_ref(), format!("`{}`", c.as_ref())))
.map(|c| (c.as_ref(), escape_column_name(c.as_ref())))
.collect::<Vec<_>>(),
)
}
Expand Down Expand Up @@ -3400,6 +3407,71 @@ mod test {
Ok(())
}

#[tokio::test]
async fn test_project_nested() -> Result<()> {
let struct_i_field = ArrowField::new("i", DataType::Int32, true);
let struct_o_field = ArrowField::new("o", DataType::Utf8, true);
let schema = Arc::new(ArrowSchema::new(vec![
ArrowField::new(
"struct",
DataType::Struct(vec![struct_i_field.clone(), struct_o_field.clone()].into()),
true,
),
ArrowField::new("s", DataType::Utf8, true),
]));

let input_batches: Vec<RecordBatch> = (0..5)
.map(|i| {
let struct_i_arr: Arc<Int32Array> =
Arc::new(Int32Array::from_iter_values(i * 20..(i + 1) * 20));
let struct_o_arr: Arc<StringArray> = Arc::new(StringArray::from_iter_values(
(i * 20..(i + 1) * 20).map(|v| format!("o-{:02}", v)),
));
RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(StructArray::from(vec![
(Arc::new(struct_i_field.clone()), struct_i_arr as ArrayRef),
(Arc::new(struct_o_field.clone()), struct_o_arr as ArrayRef),
])),
Arc::new(StringArray::from_iter_values(
(i * 20..(i + 1) * 20).map(|v| format!("s-{}", v)),
)),
],
)
.unwrap()
})
.collect();
let batches =
RecordBatchIterator::new(input_batches.clone().into_iter().map(Ok), schema.clone());
let test_dir = tempdir().unwrap();
let test_uri = test_dir.path().to_str().unwrap();
let write_params = WriteParams {
max_rows_per_file: 40,
max_rows_per_group: 10,
..Default::default()
};
Dataset::write(batches, test_uri, Some(write_params))
.await
.unwrap();

let dataset = Dataset::open(test_uri).await.unwrap();

let batches = dataset
.scan()
.project(&["struct.i"])
.unwrap()
.try_into_stream()
.await
.unwrap()
.try_collect::<Vec<_>>()
.await
.unwrap();
let batch = concat_batches(&batches[0].schema(), &batches).unwrap();
assert!(batch.column_by_name("struct.i").is_some());
Ok(())
}

#[tokio::test]
async fn test_plans() -> Result<()> {
// Create a vector dataset
Expand Down

0 comments on commit f628c1e

Please sign in to comment.