Skip to content

Commit

Permalink
Merge branch 'main' into concat-align-error
Browse files Browse the repository at this point in the history
  • Loading branch information
mcrumiller committed Jan 26, 2024
2 parents 6f92b44 + 93b5755 commit 4b36412
Show file tree
Hide file tree
Showing 220 changed files with 4,894 additions and 2,834 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ hashbrown = { version = "0.14", features = ["rayon", "ahash"] }
hex = "0.4.3"
indexmap = { version = "2", features = ["std"] }
itoa = "1.0.6"
itoap = { version = "1", features = ["simd"] }
atoi_simd = "0.15.5"
fast-float = { version = "0.2" }
memchr = "2.6"
Expand Down
21 changes: 12 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ else
VENV_BIN=$(VENV)/bin
endif

# Define command to filter pip warnings when running maturin
FILTER_PIP_WARNINGS=| grep -v "don't match your environment"; test $${PIPESTATUS[0]} -eq 0

.venv: ## Set up Python virtual environment and install requirements
python3 -m venv $(VENV)
$(MAKE) requirements
Expand All @@ -26,55 +29,55 @@ requirements: .venv ## Install/refresh Python project requirements
build: .venv ## Compile and install Python Polars for development
@unset CONDA_PREFIX && source $(VENV_BIN)/activate \
&& maturin develop -m py-polars/Cargo.toml \
| grep -v "don't match your environment" || true
$(FILTER_PIP_WARNINGS)

.PHONY: build-debug-opt
build-debug-opt: .venv ## Compile and install Python Polars with minimal optimizations turned on
@unset CONDA_PREFIX && source $(VENV_BIN)/activate \
&& maturin develop -m py-polars/Cargo.toml --profile opt-dev \
| grep -v "don't match your environment" || true
$(FILTER_PIP_WARNINGS)

.PHONY: build-debug-opt-subset
build-debug-opt-subset: .venv ## Compile and install Python Polars with minimal optimizations turned on and no default features
@unset CONDA_PREFIX && source $(VENV_BIN)/activate \
&& maturin develop -m py-polars/Cargo.toml --no-default-features --profile opt-dev \
| grep -v "don't match your environment" || true
$(FILTER_PIP_WARNINGS)

.PHONY: build-opt
build-opt: .venv ## Compile and install Python Polars with nearly full optimization on and debug assertions turned off, but with debug symbols on
@unset CONDA_PREFIX && source $(VENV_BIN)/activate \
&& maturin develop -m py-polars/Cargo.toml --profile debug-release \
| grep -v "don't match your environment" || true
$(FILTER_PIP_WARNINGS)

.PHONY: build-release
build-release: .venv ## Compile and install a faster Python Polars binary with full optimizations
@unset CONDA_PREFIX && source $(VENV_BIN)/activate \
&& maturin develop -m py-polars/Cargo.toml --release \
| grep -v "don't match your environment" || true
$(FILTER_PIP_WARNINGS)

.PHONY: build-native
build-native: .venv ## Same as build, except with native CPU optimizations turned on
@unset CONDA_PREFIX && source $(VENV_BIN)/activate \
&& maturin develop -m py-polars/Cargo.toml -- -C target-cpu=native \
| grep -v "don't match your environment" || true
$(FILTER_PIP_WARNINGS)

.PHONY: build-debug-opt-native
build-debug-opt-native: .venv ## Same as build-debug-opt, except with native CPU optimizations turned on
@unset CONDA_PREFIX && source $(VENV_BIN)/activate \
&& maturin develop -m py-polars/Cargo.toml --profile opt-dev -- -C target-cpu=native \
| grep -v "don't match your environment" || true
$(FILTER_PIP_WARNINGS)

.PHONY: build-opt-native
build-opt-native: .venv ## Same as build-opt, except with native CPU optimizations turned on
@unset CONDA_PREFIX && source $(VENV_BIN)/activate \
&& maturin develop -m py-polars/Cargo.toml --profile debug-release -- -C target-cpu=native \
| grep -v "don't match your environment" || true
$(FILTER_PIP_WARNINGS)

.PHONY: build-release-native
build-release-native: .venv ## Same as build-release, except with native CPU optimizations turned on
@unset CONDA_PREFIX && source $(VENV_BIN)/activate \
&& maturin develop -m py-polars/Cargo.toml --release -- -C target-cpu=native \
| grep -v "don't match your environment" || true
$(FILTER_PIP_WARNINGS)

.PHONY: clippy
clippy: ## Run clippy with all features
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ ethnum = { workspace = true }
atoi_simd = { workspace = true, optional = true }
fast-float = { workspace = true, optional = true }
itoa = { workspace = true, optional = true }
itoap = { workspace = true, optional = true }
ryu = { workspace = true, optional = true }

regex = { workspace = true, optional = true }
Expand Down Expand Up @@ -161,7 +162,7 @@ simd = []
# polars-arrow
timezones = []
dtype-array = []
dtype-decimal = ["atoi"]
dtype-decimal = ["atoi", "itoap"]
bigidx = []
nightly = []
performant = []
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-arrow/src/array/binview/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::sync::Arc;
use polars_error::PolarsResult;

use super::BinaryViewArrayGeneric;
use crate::array::binview::ViewType;
use crate::array::binview::{View, ViewType};
use crate::array::{FromFfi, ToFfi};
use crate::bitmap::align;
use crate::ffi;
Expand Down Expand Up @@ -60,7 +60,7 @@ impl<T: ViewType + ?Sized, A: ffi::ArrowArrayRef> FromFfi<A> for BinaryViewArray
let data_type = array.data_type().clone();

let validity = unsafe { array.validity() }?;
let views = unsafe { array.buffer::<u128>(1) }?;
let views = unsafe { array.buffer::<View>(1) }?;

// 2 - validity + views
let n_buffers = array.n_buffers();
Expand Down
23 changes: 11 additions & 12 deletions crates/polars-arrow/src/array/binview/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ use crate::array::iterator::NonNullValuesIter;
use crate::bitmap::utils::{BitmapIter, ZipValidity};
pub type BinaryViewArray = BinaryViewArrayGeneric<[u8]>;
pub type Utf8ViewArray = BinaryViewArrayGeneric<str>;
pub use view::View;

pub type MutablePlString = MutableBinaryViewArray<str>;
pub type MutablePlBinary = MutableBinaryViewArray<[u8]>;
Expand Down Expand Up @@ -106,7 +107,7 @@ impl ViewType for [u8] {

pub struct BinaryViewArrayGeneric<T: ViewType + ?Sized> {
data_type: ArrowDataType,
views: Buffer<u128>,
views: Buffer<View>,
buffers: Arc<[Buffer<u8>]>,
// Raw buffer access. (pointer, len).
raw_buffers: Arc<[(*const u8, usize)]>,
Expand Down Expand Up @@ -157,7 +158,7 @@ impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
/// - The offsets match the buffers.
pub unsafe fn new_unchecked(
data_type: ArrowDataType,
views: Buffer<u128>,
views: Buffer<View>,
buffers: Arc<[Buffer<u8>]>,
validity: Option<Bitmap>,
total_bytes_len: usize,
Expand All @@ -181,7 +182,7 @@ impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
/// The caller must ensure the invariants
pub unsafe fn new_unchecked_unknown_md(
data_type: ArrowDataType,
views: Buffer<u128>,
views: Buffer<View>,
buffers: Arc<[Buffer<u8>]>,
validity: Option<Bitmap>,
total_buffer_len: Option<usize>,
Expand All @@ -207,13 +208,13 @@ impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
self.buffers.iter().map(|buf| buf.len() as i64).collect()
}

pub fn views(&self) -> &Buffer<u128> {
pub fn views(&self) -> &Buffer<View> {
&self.views
}

pub fn try_new(
data_type: ArrowDataType,
views: Buffer<u128>,
views: Buffer<View>,
buffers: Arc<[Buffer<u8>]>,
validity: Option<Bitmap>,
) -> PolarsResult<Self> {
Expand Down Expand Up @@ -271,7 +272,7 @@ impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
#[inline]
pub unsafe fn value_unchecked(&self, i: usize) -> &T {
let v = *self.views.get_unchecked(i);
let len = v as u32;
let len = v.length;

// view layout:
// length: 4 bytes
Expand All @@ -287,11 +288,9 @@ impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
let ptr = self.views.as_ptr() as *const u8;
std::slice::from_raw_parts(ptr.add(i * 16 + 4), len as usize)
} else {
let buffer_idx = (v >> 64) as u32;
let offset = (v >> 96) as u32;
let (data_ptr, data_len) = *self.raw_buffers.get_unchecked(buffer_idx as usize);
let (data_ptr, data_len) = *self.raw_buffers.get_unchecked(v.buffer_idx as usize);
let data = std::slice::from_raw_parts(data_ptr, data_len);
let offset = offset as usize;
let offset = v.offset as usize;
data.get_unchecked(offset..offset + len as usize)
};
T::from_bytes_unchecked(bytes)
Expand All @@ -308,7 +307,7 @@ impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
}

pub fn len_iter(&self) -> impl Iterator<Item = u32> + '_ {
self.views.iter().map(|v| *v as u32)
self.views.iter().map(|v| v.length)
}

/// Returns an iterator of the non-null values.
Expand All @@ -317,7 +316,7 @@ impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
}

/// Returns an iterator of the non-null values.
pub fn non_null_views_iter(&self) -> NonNullValuesIter<'_, Buffer<u128>> {
pub fn non_null_views_iter(&self) -> NonNullValuesIter<'_, Buffer<View>> {
NonNullValuesIter::new(self.views(), self.validity())
}

Expand Down
34 changes: 17 additions & 17 deletions crates/polars-arrow/src/array/binview/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,18 @@ use polars_utils::slice::GetSaferUnchecked;
use crate::array::binview::iterator::MutableBinaryViewValueIter;
use crate::array::binview::view::validate_utf8_only;
use crate::array::binview::{BinaryViewArrayGeneric, ViewType};
use crate::array::{Array, MutableArray};
use crate::array::{Array, MutableArray, View};
use crate::bitmap::MutableBitmap;
use crate::buffer::Buffer;
use crate::datatypes::ArrowDataType;
use crate::legacy::trusted_len::TrustedLenPush;
use crate::trusted_len::TrustedLen;
use crate::types::NativeType;

const DEFAULT_BLOCK_SIZE: usize = 8 * 1024;

pub struct MutableBinaryViewArray<T: ViewType + ?Sized> {
pub(super) views: Vec<u128>,
pub(super) views: Vec<View>,
pub(super) completed_buffers: Vec<Buffer<u8>>,
pub(super) in_progress_buffer: Vec<u8>,
pub(super) validity: Option<MutableBitmap>,
Expand Down Expand Up @@ -89,12 +90,12 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
}

#[inline]
pub fn views_mut(&mut self) -> &mut Vec<u128> {
pub fn views_mut(&mut self) -> &mut Vec<View> {
&mut self.views
}

#[inline]
pub fn views(&self) -> &[u128] {
pub fn views(&self) -> &[View] {
&self.views
}

Expand Down Expand Up @@ -130,19 +131,17 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
/// - caller must allocate enough capacity
/// - caller must ensure the view and buffers match.
#[inline]
pub unsafe fn push_view(&mut self, v: u128, buffers: &[(*const u8, usize)]) {
let len = v as u32;
pub unsafe fn push_view(&mut self, v: View, buffers: &[(*const u8, usize)]) {
let len = v.length;
self.total_bytes_len += len as usize;
if len <= 12 {
debug_assert!(self.views.capacity() > self.views.len());
self.views.push_unchecked(v)
} else {
self.total_buffer_len += len as usize;
let buffer_idx = (v >> 64) as u32;
let offset = (v >> 96) as u32;
let (data_ptr, data_len) = *buffers.get_unchecked_release(buffer_idx as usize);
let (data_ptr, data_len) = *buffers.get_unchecked_release(v.buffer_idx as usize);
let data = std::slice::from_raw_parts(data_ptr, data_len);
let offset = offset as usize;
let offset = v.offset as usize;
let bytes = data.get_unchecked_release(offset..offset + len as usize);
let t = T::from_bytes_unchecked(bytes);
self.push_value_ignore_validity(t)
Expand Down Expand Up @@ -180,7 +179,7 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
payload[8..12].copy_from_slice(&buffer_idx.to_le_bytes());
payload[12..16].copy_from_slice(&offset.to_le_bytes());
}
let value = u128::from_le_bytes(payload);
let value = View::from_le_bytes(payload);
self.views.push(value);
}

Expand All @@ -200,7 +199,7 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
}

pub fn push_null(&mut self) {
self.views.push(0);
self.views.push(View::default());
match &mut self.validity {
Some(validity) => validity.push(false),
None => self.init_validity(true),
Expand All @@ -211,7 +210,8 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
if self.validity.is_none() && additional > 0 {
self.init_validity(false);
}
self.views.extend(std::iter::repeat(0).take(additional));
self.views
.extend(std::iter::repeat(View::default()).take(additional));
if let Some(validity) = &mut self.validity {
validity.extend_constant(additional, false);
}
Expand All @@ -234,7 +234,7 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
self.push_value_ignore_validity(v);
self.views.pop().unwrap()
})
.unwrap_or(0);
.unwrap_or_default();
self.views
.extend(std::iter::repeat(view_value).take(additional));
}
Expand Down Expand Up @@ -326,7 +326,7 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
#[inline]
pub unsafe fn value_unchecked(&self, i: usize) -> &T {
let v = *self.views.get_unchecked(i);
let len = v as u32;
let len = v.length;

// view layout:
// length: 4 bytes
Expand All @@ -341,8 +341,8 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
let ptr = self.views.as_ptr() as *const u8;
std::slice::from_raw_parts(ptr.add(i * 16 + 4), len as usize)
} else {
let buffer_idx = ((v >> 64) as u32) as usize;
let offset = (v >> 96) as u32;
let buffer_idx = v.buffer_idx as usize;
let offset = v.offset;

let data = if buffer_idx == self.completed_buffers.len() {
self.in_progress_buffer.as_slice()
Expand Down
Loading

0 comments on commit 4b36412

Please sign in to comment.