From ffc28f9157acba746b5b2eac7f8b8f535f6a44e6 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Wed, 18 Dec 2024 13:48:38 +0000 Subject: [PATCH] feat(allocator): `Vec::into_string` --- Cargo.lock | 1 + crates/oxc_allocator/Cargo.toml | 1 + crates/oxc_allocator/src/vec.rs | 35 ++++++++++++++++++++++++++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 622b708445953..f9707090841d0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1452,6 +1452,7 @@ dependencies = [ "bumpalo", "serde", "serde_json", + "simdutf8", ] [[package]] diff --git a/crates/oxc_allocator/Cargo.toml b/crates/oxc_allocator/Cargo.toml index ff524d718d3fb..734ba63d3ba49 100644 --- a/crates/oxc_allocator/Cargo.toml +++ b/crates/oxc_allocator/Cargo.toml @@ -21,6 +21,7 @@ doctest = false [dependencies] allocator-api2 = { workspace = true } bumpalo = { workspace = true, features = ["allocator-api2", "collections"] } +simdutf8 = { workspace = true } serde = { workspace = true, optional = true } diff --git a/crates/oxc_allocator/src/vec.rs b/crates/oxc_allocator/src/vec.rs index 2feea6d088a2c..983b38e281b99 100644 --- a/crates/oxc_allocator/src/vec.rs +++ b/crates/oxc_allocator/src/vec.rs @@ -15,8 +15,9 @@ use allocator_api2::vec; use bumpalo::Bump; #[cfg(any(feature = "serialize", test))] use serde::{ser::SerializeSeq, Serialize, Serializer}; +use simdutf8::basic::{from_utf8, Utf8Error}; -use crate::{Allocator, Box}; +use crate::{Allocator, Box, String}; /// A `Vec` without [`Drop`], which stores its data in the arena allocator. /// @@ -180,6 +181,38 @@ impl<'alloc, T> Vec<'alloc, T> { } } +impl<'alloc> Vec<'alloc, u8> { + /// Convert `Vec` into `String`. + /// + /// # Errors + /// Returns [`Err`] if the `Vec` does not comprise a valid UTF-8 string. + pub fn into_string(self) -> Result, Utf8Error> { + // Check vec comprises a valid UTF-8 string. + from_utf8(&self.0)?; + // SAFETY: We just checked it's a valid UTF-8 string + let s = unsafe { self.into_string_unchecked() }; + Ok(s) + } + + /// Convert `Vec` into [`String`], without checking bytes comprise a valid UTF-8 string. + /// + /// Does not copy the contents of the `Vec`, converts in place. This is a zero-cost operation. + /// + /// # SAFETY + /// Caller must ensure this `Vec` comprises a valid UTF-8 string. + #[expect(clippy::missing_safety_doc, clippy::unnecessary_safety_comment)] + #[inline] // `#[inline]` because this is a no-op at runtime + pub unsafe fn into_string_unchecked(self) -> String<'alloc> { + // Cannot use `bumpalo::String::from_utf8_unchecked` because it takes a `bumpalo::collections::Vec`, + // and our inner `Vec` type is `allocator_api2::vec::Vec`. + // SAFETY: Conversion is safe because both types store data in arena in same way. + // Lifetime of returned `String` is same as lifetime of original `Vec`. + let inner = ManuallyDrop::into_inner(self.0); + let (ptr, len, cap, bump) = inner.into_raw_parts_with_alloc(); + String::from_raw_parts_in(ptr, len, cap, bump) + } +} + impl<'alloc, T> ops::Deref for Vec<'alloc, T> { type Target = vec::Vec;