From 50377fc87b88de5c22368c692320d3ea6bcbed2a Mon Sep 17 00:00:00 2001 From: Felix Chern Date: Mon, 23 Dec 2024 15:50:47 -0800 Subject: [PATCH] Fix OSS builds with the following changes: 1. Use CTAD to allow TriStatePtr to be used with non-unique_ptr data. 2. Migrate from old WORKSPACE to MODULE.bazel 3. Use pip python rule to manage pip dependencies required by array_record_datasource. PiperOrigin-RevId: 709169059 --- BUILD | 7 + MODULE.bazel | 65 +++++++++ WORKSPACE | 168 ------------------------ cpp/BUILD | 3 + cpp/array_record_reader.cc | 8 +- cpp/array_record_reader.h | 16 ++- cpp/array_record_writer.cc | 4 +- cpp/array_record_writer.h | 14 +- cpp/tri_state_ptr.h | 146 +++++++++++--------- cpp/tri_state_ptr_test.cc | 34 ++++- oss/runner_common.sh | 6 +- python/BUILD | 3 +- python/array_record_data_source.py | 3 +- python/array_record_data_source_test.py | 5 +- python/array_record_module_test.py | 4 +- requirements.in | 20 +++ requirements_lock.txt | 26 ++++ 17 files changed, 269 insertions(+), 263 deletions(-) create mode 100644 MODULE.bazel delete mode 100644 WORKSPACE create mode 100644 requirements.in create mode 100644 requirements_lock.txt diff --git a/BUILD b/BUILD index f20b80d..6980907 100644 --- a/BUILD +++ b/BUILD @@ -1,6 +1,13 @@ +load("@rules_python//python:pip.bzl", "compile_pip_requirements") py_library( name = "setup", srcs = ["setup.py"], ) + +compile_pip_requirements( + name = "requirements", + src = "requirements.in", + requirements_txt = "requirements_lock.txt", +) diff --git a/MODULE.bazel b/MODULE.bazel new file mode 100644 index 0000000..39fe279 --- /dev/null +++ b/MODULE.bazel @@ -0,0 +1,65 @@ +############################################################################### +# Bazel now uses Bzlmod by default to manage external dependencies. +# Please consider migrating your external dependencies from WORKSPACE to MODULE.bazel. +# +# For more details, please check https://github.com/bazelbuild/bazel/issues/18958 +############################################################################### + +# TODO(fchern): automate version string alignment with setup.py +VERSION = "0.6.0" + +module( + name = "array_record", + version = VERSION, + repo_name = "com_google_array_record", +) + +bazel_dep(name = "abseil-cpp", version = "20240722.0", repo_name = "com_google_absl") +bazel_dep(name = "abseil-py", version = "2.1.0", repo_name = "com_google_absl_py") +bazel_dep(name = "pybind11_bazel", version = "2.12.0") +bazel_dep(name = "rules_proto", version = "7.0.2") +bazel_dep(name = "googletest", version = "1.15.2", repo_name = "com_google_googletest") +bazel_dep(name = "riegeli", version = "0.0.0-20240927-cdfb25a", repo_name = "com_google_riegeli") +bazel_dep(name = "platforms", version = "0.0.10") +bazel_dep(name = "protobuf", version = "29.0", repo_name = "com_google_protobuf") +bazel_dep(name = "rules_python", version = "0.40.0") + +http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +# V3.4.0, 20210818 +http_archive( + name = "eigen3", + build_file_content = + """ +cc_library( + name = 'eigen3', + srcs = [], + includes = ['.'], + hdrs = glob(['Eigen/**', 'unsupported/Eigen/**']), + visibility = ['//visibility:public'], +) +""", + sha256 = "b4c198460eba6f28d34894e3a5710998818515104d6e74e5cc331ce31e46e626", + strip_prefix = "eigen-3.4.0", + urls = [ + "https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.bz2", + ], +) + +PYTHON_VERSION = "3.10" + +python = use_extension("@rules_python//python/extensions:python.bzl", "python") +python.toolchain( + python_version = PYTHON_VERSION, +) + +pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip") + +# requirements_lock.txt is generated by +# bazel run //:requirements.update +pip.parse( + hub_name = "pypi", + python_version = PYTHON_VERSION, + requirements_lock = "//:requirements_lock.txt", +) +use_repo(pip, "pypi") diff --git a/WORKSPACE b/WORKSPACE deleted file mode 100644 index 2faaa49..0000000 --- a/WORKSPACE +++ /dev/null @@ -1,168 +0,0 @@ -workspace(name = "array_record") - -load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") -load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") - -# Abseil LTS 20230125.0 -http_archive( - name = "com_google_absl", - sha256 = "987ce98f02eefbaf930d6e38ab16aa05737234d7afbab2d5c4ea7adbe50c28ed", - strip_prefix = "abseil-cpp-20230802.1", - urls = [ - "https://github.com/abseil/abseil-cpp/archive/refs/tags/20230802.1.tar.gz", - ], -) - -# Version: pypi-v0.11.0, 2020/10/27 -git_repository( - name = "com_google_absl_py", - commit = "127c98870edf5f03395ce9cf886266fa5f24455e", - remote = "https://github.com/abseil/abseil-py", - shallow_since = "1673401277 -0800", -) - -# Needed by com_google_riegeli -http_archive( - name = "org_brotli", - sha256 = "84a9a68ada813a59db94d83ea10c54155f1d34399baf377842ff3ab9b3b3256e", - strip_prefix = "brotli-3914999fcc1fda92e750ef9190aa6db9bf7bdb07", - urls = ["https://github.com/google/brotli/archive/3914999fcc1fda92e750ef9190aa6db9bf7bdb07.zip"], # 2022-11-17 -) - -# GoogleTest/GoogleMock framework. Used by most unit-tests. -http_archive( - name = "com_google_googletest", - sha256 = "24e06e79a78ca5794ec6ad2bf0a1f05515cd1d05a9e10d9a6dc853078b2f3914", - strip_prefix = "googletest-main", - urls = ["https://github.com/google/googletest/archive/main.zip"], -) - -# V3.4.0, 20210818 -http_archive( - name = "eigen3", - build_file_content = - """ -cc_library( - name = 'eigen3', - srcs = [], - includes = ['.'], - hdrs = glob(['Eigen/**', 'unsupported/Eigen/**']), - visibility = ['//visibility:public'], -) -""", - sha256 = "b4c198460eba6f28d34894e3a5710998818515104d6e74e5cc331ce31e46e626", - strip_prefix = "eigen-3.4.0", - urls = [ - "https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.bz2", - ], -) - -# `pybind11_bazel` (https://github.com/pybind/pybind11_bazel): 20230130 -http_archive( - name = "pybind11_bazel", - sha256 = "b35f3abc3d52ee5c753fdeeb2b5129b99e796558754ca5d245e28e51c1072a21", - strip_prefix = "pybind11_bazel-5f458fa53870223a0de7eeb60480dd278b442698", - urls = ["https://github.com/pybind/pybind11_bazel/archive/5f458fa53870223a0de7eeb60480dd278b442698.tar.gz"], -) - -# V2.10.3, 20230130 -http_archive( - name = "pybind11", - build_file = "@pybind11_bazel//:pybind11.BUILD", - sha256 = "201966a61dc826f1b1879a24a3317a1ec9214a918c8eb035be2f30c3e9cfbdcb", - strip_prefix = "pybind11-2.10.3", - urls = ["https://github.com/pybind/pybind11/archive/refs/tags/v2.10.3.zip"], -) - -load("@pybind11_bazel//:python_configure.bzl", "python_configure") - -python_configure(name = "local_config_python") - -# proto_library, cc_proto_library, and java_proto_library rules implicitly -# depend on @com_google_protobuf for protoc and proto runtimes. -# This statement defines the @com_google_protobuf repo. -http_archive( - name = "com_google_protobuf", - sha256 = "dc167b7d23ec0d6e4a3d4eae1798de6c8d162e69fa136d39753aaeb7a6e1289d", - strip_prefix = "protobuf-23.1", - urls = ["https://github.com/protocolbuffers/protobuf/archive/v23.1.tar.gz"], -) - -load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") - -protobuf_deps() - -http_archive( - name = "com_google_riegeli", - sha256 = "5615438b3809fdd62266030e2c6f19c457a15bfb6ef3aa8132503e8584305f8a", - strip_prefix = "riegeli-254e6d74ee0d325676739fe5075e5a1a895624cf", - urls = [ - "https://github.com/google/riegeli/archive/254e6d74ee0d325676739fe5075e5a1a895624cf.tar.gz", - ], -) - -# Riegeli's dependencies -http_archive( - name = "net_zstd", - build_file = "@com_google_riegeli//third_party:net_zstd.BUILD", - sha256 = "b6c537b53356a3af3ca3e621457751fa9a6ba96daf3aebb3526ae0f610863532", - strip_prefix = "zstd-1.4.5/lib", - urls = ["https://github.com/facebook/zstd/archive/v1.4.5.zip"], # 2020-05-22 -) - -http_archive( - name = "lz4", - build_file = "@com_google_riegeli//third_party:lz4.BUILD", - sha256 = "4ec935d99aa4950eadfefbd49c9fad863185ac24c32001162c44a683ef61b580", - strip_prefix = "lz4-1.9.3/lib", - urls = ["https://github.com/lz4/lz4/archive/refs/tags/v1.9.3.zip"], # 2020-11-16 -) - -http_archive( - name = "snappy", - build_file = "@com_google_riegeli//third_party:snappy.BUILD", - sha256 = "7ee7540b23ae04df961af24309a55484e7016106e979f83323536a1322cedf1b", - strip_prefix = "snappy-1.2.0", - urls = ["https://github.com/google/snappy/archive/1.2.0.zip"], # 2024-04-04 -) - -http_archive( - name = "crc32c", - build_file = "@com_google_riegeli//third_party:crc32.BUILD", - sha256 = "338f1d9d95753dc3cdd882dfb6e176bbb4b18353c29c411ebcb7b890f361722e", - strip_prefix = "crc32c-1.1.0", - urls = ["https://github.com/google/crc32c/archive/1.1.0.zip"], # 2019-05-24 -) - -http_archive( - name = "zlib", - build_file = "@com_google_riegeli//third_party:zlib.BUILD", - sha256 = "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1", - strip_prefix = "zlib-1.2.11", - urls = ["http://zlib.net/fossils/zlib-1.2.11.tar.gz"], # 2017-01-15 -) - -http_archive( - name = "highwayhash", - build_file = "@com_google_riegeli//third_party:highwayhash.BUILD", - sha256 = "5380cb7cf19e7c9591f31792b7794d48084f6a3ab7c03d637cd6a32cf2ee8686", - strip_prefix = "highwayhash-a7f68e2f95fac08b24327d74747521cf634d5aff", - urls = ["https://github.com/google/highwayhash/archive/a7f68e2f95fac08b24327d74747521cf634d5aff.zip"], # 2023-08-09 -) - -# Tensorflow, 20230705 -http_archive( - name = "org_tensorflow", - sha256 = "63025cb60d00d9aa7a88807651305a38abb9bb144464e2419c03f13a089d19a6", - strip_prefix = "tensorflow-2.12.1", - urls = ["https://github.com/tensorflow/tensorflow/archive/v2.12.1.zip"], -) - -load("@org_tensorflow//tensorflow/tools/toolchains:cpus/aarch64/aarch64_compiler_configure.bzl", "aarch64_compiler_configure") # buildifier: disable=load-on-top - -# This import (along with the org_tensorflow archive) is necessary to provide the devtoolset-9 toolchain -load("@org_tensorflow//tensorflow/tools/toolchains/remote_config:configs.bzl", "initialize_rbe_configs") # buildifier: disable=load-on-top - -initialize_rbe_configs() - -aarch64_compiler_configure() diff --git a/cpp/BUILD b/cpp/BUILD index 2a500d3..7d3f7d6 100644 --- a/cpp/BUILD +++ b/cpp/BUILD @@ -77,7 +77,10 @@ cc_library( deps = [ ":common", "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/synchronization", + "@com_google_riegeli//riegeli/base:dependency", + "@com_google_riegeli//riegeli/base:initializer", ], ) diff --git a/cpp/array_record_reader.cc b/cpp/array_record_reader.cc index c57aa57..cd7a39a 100644 --- a/cpp/array_record_reader.cc +++ b/cpp/array_record_reader.cc @@ -342,8 +342,8 @@ absl::Status ArrayRecordReaderBase::ParallelReadRecords( uint64_t chunk_idx_start = buf_idx * state_->chunk_group_size; // inclusive index, not the conventional exclusive index. uint64_t last_chunk_idx = - std::min((buf_idx + 1) * state_->chunk_group_size - 1, - state_->chunk_offsets.size() - 1); + std::min((buf_idx + 1) * state_->chunk_group_size - 1, + state_->chunk_offsets.size() - 1); uint64_t buf_len = state_->ChunkEndOffset(last_chunk_idx) - state_->chunk_offsets[chunk_idx_start]; AR_ENDO_JOB( @@ -708,8 +708,8 @@ bool ArrayRecordReaderBase::ReadAheadFromBuffer(uint64_t buffer_idx) { chunk_offsets.reserve(state_->chunk_group_size); uint64_t chunk_start = buffer_to_add * state_->chunk_group_size; uint64_t chunk_end = - std::min(state_->chunk_offsets.size(), - (buffer_to_add + 1) * state_->chunk_group_size); + std::min(state_->chunk_offsets.size(), + (buffer_to_add + 1) * state_->chunk_group_size); for (uint64_t chunk_idx = chunk_start; chunk_idx < chunk_end; ++chunk_idx) { chunk_offsets.push_back(state_->chunk_offsets[chunk_idx]); } diff --git a/cpp/array_record_reader.h b/cpp/array_record_reader.h index 962a67e..bbe3a0e 100644 --- a/cpp/array_record_reader.h +++ b/cpp/array_record_reader.h @@ -32,6 +32,7 @@ limitations under the License. #ifndef ARRAY_RECORD_CPP_ARRAY_RECORD_READER_H_ #define ARRAY_RECORD_CPP_ARRAY_RECORD_READER_H_ +#include #include #include #include @@ -46,8 +47,8 @@ limitations under the License. #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "cpp/common.h" -#include "cpp/tri_state_ptr.h" #include "cpp/thread_pool.h" +#include "cpp/tri_state_ptr.h" #include "google/protobuf/message_lite.h" #include "riegeli/base/initializer.h" #include "riegeli/base/object.h" @@ -293,7 +294,7 @@ class ArrayRecordReaderBase : public riegeli::Object { void Initialize(); - virtual TriStatePtr::SharedRef get_backing_reader() + virtual TriStatePtrBase::SharedRef get_backing_reader() const = 0; private: @@ -345,24 +346,27 @@ class ArrayRecordReader : public ArrayRecordReaderBase { Options options = Options(), ARThreadPool* pool = nullptr) : ArrayRecordReaderBase(std::move(options), pool), - main_reader_(std::make_unique>( + main_reader_(std::make_unique>( std::move(src))) { Initialize(); } protected: - TriStatePtr::SharedRef get_backing_reader() const override { + TriStatePtrBase::SharedRef get_backing_reader() + const override { return main_reader_->MakeShared(); } void Done() override { - if (main_reader_ == nullptr) return; + if (main_reader_ == nullptr) { + return; + } auto unique = main_reader_->WaitAndMakeUnique(); if (!unique->Close()) Fail(unique->status()); } private: - std::unique_ptr> main_reader_; + std::unique_ptr> main_reader_; }; template diff --git a/cpp/array_record_writer.cc b/cpp/array_record_writer.cc index 48691ba..f74fe64 100644 --- a/cpp/array_record_writer.cc +++ b/cpp/array_record_writer.cc @@ -265,7 +265,7 @@ class ArrayRecordWriterBase::SubmitChunkCallback // Aggregate the offsets information and write it to the file. void WriteFooterAndPostscript( - TriStatePtr::SharedRef writer); + TriStatePtrBase::SharedRef writer); private: const Options options_; @@ -489,7 +489,7 @@ void ArrayRecordWriterBase::SubmitChunkCallback::operator()( } void ArrayRecordWriterBase::SubmitChunkCallback::WriteFooterAndPostscript( - TriStatePtr::SharedRef writer) { + TriStatePtrBase::SharedRef writer) { // Flushes prior chunks writer->SubmitFutureChunks(true); // Footer and postscript must pad to block boundary diff --git a/cpp/array_record_writer.h b/cpp/array_record_writer.h index 1f374e9..19c7f4f 100644 --- a/cpp/array_record_writer.h +++ b/cpp/array_record_writer.h @@ -317,7 +317,7 @@ class ArrayRecordWriterBase : public riegeli::Object { ArrayRecordWriterBase(ArrayRecordWriterBase&& other) noexcept; ArrayRecordWriterBase& operator=(ArrayRecordWriterBase&& other) noexcept; - virtual TriStatePtr::SharedRef get_writer() = 0; + virtual TriStatePtrBase::SharedRef get_writer() = 0; // Initializes and validates the underlying writer states. void Initialize(); @@ -380,8 +380,9 @@ class ArrayRecordWriter : public ArrayRecordWriterBase { Options options = Options(), ARThreadPool* pool = nullptr) : ArrayRecordWriterBase(std::move(options), pool), - main_writer_(std::make_unique>( - std::make_unique>(std::move(dest)))) { + main_writer_(std::make_unique>>( + riegeli::Maker>(std::move(dest)))) { auto writer = get_writer(); if (!writer->ok()) { Fail(writer->status()); @@ -391,11 +392,12 @@ class ArrayRecordWriter : public ArrayRecordWriterBase { } protected: - TriStatePtr::SharedRef get_writer() final { + TriStatePtrBase::SharedRef get_writer() final { return main_writer_->MakeShared(); } void Done() override { + // TODO add an empty checking method? if (main_writer_ == nullptr) return; ArrayRecordWriterBase::Done(); if (!ok()) { @@ -407,7 +409,9 @@ class ArrayRecordWriter : public ArrayRecordWriterBase { } private: - std::unique_ptr> main_writer_; + std::unique_ptr< + TriStatePtr>> + main_writer_; }; template diff --git a/cpp/tri_state_ptr.h b/cpp/tri_state_ptr.h index 197e129..c7efebb 100644 --- a/cpp/tri_state_ptr.h +++ b/cpp/tri_state_ptr.h @@ -20,76 +20,35 @@ limitations under the License. #include #include -#include #include +#include "absl/base/nullability.h" #include "absl/base/thread_annotations.h" #include "absl/synchronization/mutex.h" #include "cpp/common.h" +#include "riegeli/base/dependency.h" +#include "riegeli/base/initializer.h" namespace array_record { -/** TriStatePtr is a wrapper around a pointer that allows for a unique and - * shared reference. - * - * There are three states: - * - * - NoRef: The object does not have shared or unique references. - * - Sharing: The object is shared. - * - Unique: The object is referenced by a unique pointer wrapper. - * - * The state transition from NoRef to Shared when MakeShared is called. - * An internal refernce count is incremented when a SharedRef is created. - * - * SharedRef ref = MakeShared(); -- - * NoRef ----------------------------> Sharing / | MakeShared() - * All SharedRef deallocated <-- - * <---------------------------- - * - * The state can also transition to Unique when WaitAndMakeUnique is called. - * We can only hold one unique reference at a time. - * - * UniqueRef ref = WaitAndMakeUnique(); - * NoRef ----------------------------> Unique - * The UniqueRef is deallocated - * <---------------------------- - * - * Other than the state transition above, state transitions methods would block - * until the specified state is possible. On deallocation, the destructor blocks - * until the state is NoRef. - * - * Example usage: - * - * TriStatePtr main(riegeli::Maker(...)); - * // Create a shared reference to work on other threads. - * pool->Schedule([refobj = foo_ptr.MakeShared()] { - * refobj->FooMethod(); - * }); - * - * // Blocks until refobj is out of scope. - * auto unique_ref = main.WaitAndMakeUnique(); - * unique_ref->CleanupFoo(); - * - */ template -class TriStatePtr { +class TriStatePtrBase { public: - DECLARE_IMMOBILE_CLASS(TriStatePtr); - TriStatePtr() = default; + // TriStatePtrBase(BaseT* ptr) : ptr_(ptr) { + // if (ptr == nullptr) + // LOG(FATAL) << "ptr is null"; + // } - ~TriStatePtr() { + ~TriStatePtrBase() { absl::MutexLock l(&mu_); mu_.Await(absl::Condition( +[](State* sharing_state) { return *sharing_state == State::kNoRef; }, &state_)); } - // explicit TriStatePtr(std::unique_ptr ptr) : ptr_(std::move(ptr)) {} - explicit TriStatePtr(std::unique_ptr ptr) : ptr_(std::move(ptr)) {} - class SharedRef { public: - SharedRef(TriStatePtr* parent) : parent_(parent) {} + SharedRef(TriStatePtrBase* parent) : parent_(parent) {} SharedRef(const SharedRef& other) : parent_(other.parent_) { parent_->ref_count_++; @@ -121,32 +80,32 @@ class TriStatePtr { } } - const BaseT& operator*() const { return *parent_->ptr_.get(); } - const BaseT* operator->() const { return parent_->ptr_.get(); } - BaseT& operator*() { return *parent_->ptr_.get(); } - BaseT* operator->() { return parent_->ptr_.get(); } + const BaseT& operator*() const { return *parent_->ptr_; } + const BaseT* operator->() const { return parent_->ptr_; } + BaseT& operator*() { return *parent_->ptr_; } + BaseT* operator->() { return parent_->ptr_; } private: - TriStatePtr* parent_ = nullptr; + TriStatePtrBase* parent_ = nullptr; }; class UniqueRef { public: DECLARE_MOVE_ONLY_CLASS(UniqueRef); - UniqueRef(TriStatePtr* parent) : parent_(parent) {} + UniqueRef(TriStatePtrBase* parent) : parent_(parent) {} ~UniqueRef() { absl::MutexLock l(&parent_->mu_); parent_->state_ = State::kNoRef; } - const BaseT& operator*() const { return *parent_->ptr_.get(); } - const BaseT* operator->() const { return parent_->ptr_.get(); } - BaseT& operator*() { return *parent_->ptr_.get(); } - BaseT* operator->() { return parent_->ptr_.get(); } + const BaseT& operator*() const { return *parent_->ptr_; } + const BaseT* operator->() const { return parent_->ptr_; } + BaseT& operator*() { return *parent_->ptr_; } + BaseT* operator->() { return parent_->ptr_; } private: - TriStatePtr* parent_; + TriStatePtrBase* parent_; }; SharedRef MakeShared() { @@ -179,11 +138,72 @@ class TriStatePtr { return state_; } + protected: + BaseT* ptr_; + private: mutable absl::Mutex mu_; std::atomic_int32_t ref_count_ = 0; State state_ ABSL_GUARDED_BY(mu_) = State::kNoRef; - std::unique_ptr ptr_; +}; + +/** TriStatePtr is a wrapper around a pointer that allows for a unique and + * shared reference. + * + * There are three states: + * + * - NoRef: The object does not have shared or unique references. + * - Sharing: The object is shared. + * - Unique: The object is referenced by a unique pointer wrapper. + * + * The state transition from NoRef to Shared when MakeShared is called. + * An internal refernce count is incremented when a SharedRef is created. + * + * SharedRef ref = MakeShared(); -- + * NoRef ----------------------------> Sharing / | MakeShared() + * All SharedRef deallocated <-- + * <---------------------------- + * + * The state can also transition to Unique when WaitAndMakeUnique is called. + * We can only hold one unique reference at a time. + * + * UniqueRef ref = WaitAndMakeUnique(); + * NoRef ----------------------------> Unique + * The UniqueRef is deallocated + * <---------------------------- + * + * Other than the state transition above, state transitions methods would block + * until the specified state is possible. On deallocation, the destructor blocks + * until the state is NoRef. + * + * Example usage 1 (using rieglie::Maker): + * + * TriStatePtr main(riegeli::Maker(...)); + * // Create a shared reference to work on other threads. + * pool->Schedule([refobj = foo_ptr.MakeShared()] { + * refobj->FooMethod(); + * }); + * + * // Blocks until refobj is out of scope. + * auto unique_ref = main.WaitAndMakeUnique(); + * unique_ref->CleanupFoo(); + * + * Example usage 2 (using unique_ptr): + * + * TriStatePtr> main( + * std::make_unique(...)); + */ +template +class TriStatePtr : public TriStatePtrBase { + public: + explicit TriStatePtr(riegeli::Initializer base) + : dependency_(std::move(base)) { + absl::Nonnull ptr = dependency_.get(); + TriStatePtrBase::ptr_ = ptr; + } + + private: + riegeli::Dependency dependency_; }; } // namespace array_record diff --git a/cpp/tri_state_ptr_test.cc b/cpp/tri_state_ptr_test.cc index b596150..6aedc1f 100644 --- a/cpp/tri_state_ptr_test.cc +++ b/cpp/tri_state_ptr_test.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "cpp/tri_state_ptr.h" +#include #include "gtest/gtest.h" #include "absl/synchronization/notification.h" @@ -53,9 +54,9 @@ class TriStatePtrTest : public testing::Test { ARThreadPool* pool_; }; -TEST_F(TriStatePtrTest, SanityTest) { - TriStatePtr foo_main(riegeli::Maker(1).UniquePtr()); - EXPECT_EQ(foo_main.state(), TriStatePtr::State::kNoRef); +TEST_F(TriStatePtrTest, SanityTestMaker) { + TriStatePtr foo_main(riegeli::Maker(1)); + EXPECT_EQ(foo_main.state(), TriStatePtrBase::State::kNoRef); absl::Notification notification; { pool_->Schedule( @@ -67,12 +68,35 @@ TEST_F(TriStatePtrTest, SanityTest) { EXPECT_EQ(second_foo_shared->value(), 2); }); } - EXPECT_EQ(foo_main.state(), TriStatePtr::State::kSharing); + EXPECT_EQ(foo_main.state(), TriStatePtrBase::State::kSharing); notification.Notify(); auto foo_unique = foo_main.WaitAndMakeUnique(); foo_unique->mul_value(3); EXPECT_EQ(foo_unique->value(), 6); - EXPECT_EQ(foo_main.state(), TriStatePtr::State::kUnique); + EXPECT_EQ(foo_main.state(), TriStatePtrBase::State::kUnique); +} + +TEST_F(TriStatePtrTest, SanityTestUniquePtr) { + TriStatePtr> foo_main( + std::make_unique(1)); + EXPECT_EQ(foo_main.state(), TriStatePtrBase::State::kNoRef); + absl::Notification notification; + { + pool_->Schedule( + [foo_shared = foo_main.MakeShared(), ¬ification]() mutable { + notification.WaitForNotification(); + EXPECT_EQ(foo_shared->value(), 1); + const auto second_foo_shared = foo_shared; + foo_shared->add_value(1); + EXPECT_EQ(second_foo_shared->value(), 2); + }); + } + EXPECT_EQ(foo_main.state(), TriStatePtrBase::State::kSharing); + notification.Notify(); + auto foo_unique = foo_main.WaitAndMakeUnique(); + foo_unique->mul_value(3); + EXPECT_EQ(foo_unique->value(), 6); + EXPECT_EQ(foo_main.state(), TriStatePtrBase::State::kUnique); } } // namespace diff --git a/oss/runner_common.sh b/oss/runner_common.sh index a35f5c0..1ebc289 100644 --- a/oss/runner_common.sh +++ b/oss/runner_common.sh @@ -11,7 +11,7 @@ function build_and_test_array_record_linux() { # Using a previous version of Blaze to avoid: # https://github.com/bazelbuild/bazel/issues/8622 - export BAZEL_VERSION="5.4.0" + export BAZEL_VERSION="8.0.0" # Build wheels for multiple Python minor versions. PYTHON_MAJOR_VERSION=3 @@ -86,7 +86,7 @@ function build_and_test_array_record_macos() { # Set up Bazel. # Using a previous version of Bazel to avoid: # https://github.com/bazelbuild/bazel/issues/8622 - export BAZEL_VERSION="5.4.0" + export BAZEL_VERSION="8.0.0" update_bazel_macos ${BAZEL_VERSION} bazel --version @@ -99,4 +99,4 @@ function build_and_test_array_record_macos() { bash ${SOURCE_DIR}/oss/build_whl.sh ls ${SOURCE_DIR}/all_dist/*.whl -} \ No newline at end of file +} diff --git a/python/BUILD b/python/BUILD index 58ed6ce..cfcad43 100644 --- a/python/BUILD +++ b/python/BUILD @@ -1,6 +1,7 @@ # Python binding for ArrayRecord load("@pybind11_bazel//:build_defs.bzl", "pybind_extension") +load("@pypi//:requirements.bzl", "requirement") package(default_visibility = ["//visibility:public"]) @@ -35,7 +36,7 @@ py_library( srcs = ["array_record_data_source.py"], data = [":array_record_module.so"], deps = [ - # Implicit etils (/epath) dependency. + requirement("etils"), ], ) diff --git a/python/array_record_data_source.py b/python/array_record_data_source.py index 9ffe588..ac88149 100644 --- a/python/array_record_data_source.py +++ b/python/array_record_data_source.py @@ -46,7 +46,8 @@ def __getitem__(self, record_keys: Sequence[int]) -> Sequence[T]: from absl import logging from etils import epath -from . import array_record_module +from python import array_record_module + # TODO(jolesiak): Decide what to do with these flags, e.g., remove them (could # be appropriate if we decide to use asyncio) or move them somewhere else and diff --git a/python/array_record_data_source_test.py b/python/array_record_data_source_test.py index 5196d99..a8c2115 100644 --- a/python/array_record_data_source_test.py +++ b/python/array_record_data_source_test.py @@ -24,9 +24,8 @@ from absl.testing import flagsaver from absl.testing import parameterized -from array_record.python import array_record_data_source -from array_record.python import array_record_module - +from python import array_record_data_source +from python import array_record_module FLAGS = flags.FLAGS diff --git a/python/array_record_module_test.py b/python/array_record_module_test.py index 3c3eaa6..452e517 100644 --- a/python/array_record_module_test.py +++ b/python/array_record_module_test.py @@ -18,8 +18,8 @@ from absl.testing import absltest -from array_record.python.array_record_module import ArrayRecordReader -from array_record.python.array_record_module import ArrayRecordWriter +from python.array_record_module import ArrayRecordReader +from python.array_record_module import ArrayRecordWriter class ArrayRecordModuleTest(absltest.TestCase): diff --git a/requirements.in b/requirements.in new file mode 100644 index 0000000..81d772a --- /dev/null +++ b/requirements.in @@ -0,0 +1,20 @@ +# Copyright 2024 The ArrayRecord Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This is the list of our Python third_party dependencies that Bazel should +# pull from PyPi. +# Note that requirements.txt must be re-generated using +# bazel run //:requirements.update in the OSS version. + +etils[epath] \ No newline at end of file diff --git a/requirements_lock.txt b/requirements_lock.txt new file mode 100644 index 0000000..c10476c --- /dev/null +++ b/requirements_lock.txt @@ -0,0 +1,26 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# bazel run //:requirements.update +# +etils[epath,epy]==1.11.0 \ + --hash=sha256:a394cf3476bcec51c221426a70c39cd1006e889456ba41e4d7f12fd6814be7a5 \ + --hash=sha256:aff3278a3be7fddf302dfd80335e9f924244666c71239cd91e836f3d055f1c4a + # via -r requirements.in +fsspec==2024.12.0 \ + --hash=sha256:670700c977ed2fb51e0d9f9253177ed20cbde4a3e5c0283cc5385b5870c8533f \ + --hash=sha256:b520aed47ad9804237ff878b504267a3b0b441e97508bd6d2d8774e3db85cee2 + # via etils +importlib-resources==6.4.5 \ + --hash=sha256:980862a1d16c9e147a59603677fa2aa5fd82b87f223b6cb870695bcfce830065 \ + --hash=sha256:ac29d5f956f01d5e4bb63102a5a19957f1b9175e45649977264a1416783bb717 + # via etils +typing-extensions==4.12.2 \ + --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ + --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 + # via etils +zipp==3.21.0 \ + --hash=sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4 \ + --hash=sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931 + # via etils