Skip to content

Commit

Permalink
Fix OSS builds with the following changes:
Browse files Browse the repository at this point in the history
1. Use CTAD to allow TriStatePtr to be used with non-unique_ptr data.
2. Migrate from old WORKSPACE to MODULE.bazel
3. Use pip python rule to manage pip dependencies required by array_record_datasource.

PiperOrigin-RevId: 709206948
  • Loading branch information
ArrayRecord Team authored and copybara-github committed Dec 24, 2024
1 parent 50377fc commit 89f82b8
Show file tree
Hide file tree
Showing 17 changed files with 263 additions and 269 deletions.
7 changes: 0 additions & 7 deletions BUILD
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
load("@rules_python//python:pip.bzl", "compile_pip_requirements")


py_library(
name = "setup",
srcs = ["setup.py"],
)

compile_pip_requirements(
name = "requirements",
src = "requirements.in",
requirements_txt = "requirements_lock.txt",
)
65 changes: 0 additions & 65 deletions MODULE.bazel

This file was deleted.

168 changes: 168 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
workspace(name = "array_record")

load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")

# Abseil LTS 20230125.0
http_archive(
name = "com_google_absl",
sha256 = "987ce98f02eefbaf930d6e38ab16aa05737234d7afbab2d5c4ea7adbe50c28ed",
strip_prefix = "abseil-cpp-20230802.1",
urls = [
"https://github.com/abseil/abseil-cpp/archive/refs/tags/20230802.1.tar.gz",
],
)

# Version: pypi-v0.11.0, 2020/10/27
git_repository(
name = "com_google_absl_py",
commit = "127c98870edf5f03395ce9cf886266fa5f24455e",
remote = "https://github.com/abseil/abseil-py",
shallow_since = "1673401277 -0800",
)

# Needed by com_google_riegeli
http_archive(
name = "org_brotli",
sha256 = "84a9a68ada813a59db94d83ea10c54155f1d34399baf377842ff3ab9b3b3256e",
strip_prefix = "brotli-3914999fcc1fda92e750ef9190aa6db9bf7bdb07",
urls = ["https://github.com/google/brotli/archive/3914999fcc1fda92e750ef9190aa6db9bf7bdb07.zip"], # 2022-11-17
)

# GoogleTest/GoogleMock framework. Used by most unit-tests.
http_archive(
name = "com_google_googletest",
sha256 = "24e06e79a78ca5794ec6ad2bf0a1f05515cd1d05a9e10d9a6dc853078b2f3914",
strip_prefix = "googletest-main",
urls = ["https://github.com/google/googletest/archive/main.zip"],
)

# V3.4.0, 20210818
http_archive(
name = "eigen3",
build_file_content =
"""
cc_library(
name = 'eigen3',
srcs = [],
includes = ['.'],
hdrs = glob(['Eigen/**', 'unsupported/Eigen/**']),
visibility = ['//visibility:public'],
)
""",
sha256 = "b4c198460eba6f28d34894e3a5710998818515104d6e74e5cc331ce31e46e626",
strip_prefix = "eigen-3.4.0",
urls = [
"https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.bz2",
],
)

# `pybind11_bazel` (https://github.com/pybind/pybind11_bazel): 20230130
http_archive(
name = "pybind11_bazel",
sha256 = "b35f3abc3d52ee5c753fdeeb2b5129b99e796558754ca5d245e28e51c1072a21",
strip_prefix = "pybind11_bazel-5f458fa53870223a0de7eeb60480dd278b442698",
urls = ["https://github.com/pybind/pybind11_bazel/archive/5f458fa53870223a0de7eeb60480dd278b442698.tar.gz"],
)

# V2.10.3, 20230130
http_archive(
name = "pybind11",
build_file = "@pybind11_bazel//:pybind11.BUILD",
sha256 = "201966a61dc826f1b1879a24a3317a1ec9214a918c8eb035be2f30c3e9cfbdcb",
strip_prefix = "pybind11-2.10.3",
urls = ["https://github.com/pybind/pybind11/archive/refs/tags/v2.10.3.zip"],
)

load("@pybind11_bazel//:python_configure.bzl", "python_configure")

python_configure(name = "local_config_python")

# proto_library, cc_proto_library, and java_proto_library rules implicitly
# depend on @com_google_protobuf for protoc and proto runtimes.
# This statement defines the @com_google_protobuf repo.
http_archive(
name = "com_google_protobuf",
sha256 = "dc167b7d23ec0d6e4a3d4eae1798de6c8d162e69fa136d39753aaeb7a6e1289d",
strip_prefix = "protobuf-23.1",
urls = ["https://github.com/protocolbuffers/protobuf/archive/v23.1.tar.gz"],
)

load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps")

protobuf_deps()

http_archive(
name = "com_google_riegeli",
sha256 = "5615438b3809fdd62266030e2c6f19c457a15bfb6ef3aa8132503e8584305f8a",
strip_prefix = "riegeli-254e6d74ee0d325676739fe5075e5a1a895624cf",
urls = [
"https://github.com/google/riegeli/archive/254e6d74ee0d325676739fe5075e5a1a895624cf.tar.gz",
],
)

# Riegeli's dependencies
http_archive(
name = "net_zstd",
build_file = "@com_google_riegeli//third_party:net_zstd.BUILD",
sha256 = "b6c537b53356a3af3ca3e621457751fa9a6ba96daf3aebb3526ae0f610863532",
strip_prefix = "zstd-1.4.5/lib",
urls = ["https://github.com/facebook/zstd/archive/v1.4.5.zip"], # 2020-05-22
)

http_archive(
name = "lz4",
build_file = "@com_google_riegeli//third_party:lz4.BUILD",
sha256 = "4ec935d99aa4950eadfefbd49c9fad863185ac24c32001162c44a683ef61b580",
strip_prefix = "lz4-1.9.3/lib",
urls = ["https://github.com/lz4/lz4/archive/refs/tags/v1.9.3.zip"], # 2020-11-16
)

http_archive(
name = "snappy",
build_file = "@com_google_riegeli//third_party:snappy.BUILD",
sha256 = "7ee7540b23ae04df961af24309a55484e7016106e979f83323536a1322cedf1b",
strip_prefix = "snappy-1.2.0",
urls = ["https://github.com/google/snappy/archive/1.2.0.zip"], # 2024-04-04
)

http_archive(
name = "crc32c",
build_file = "@com_google_riegeli//third_party:crc32.BUILD",
sha256 = "338f1d9d95753dc3cdd882dfb6e176bbb4b18353c29c411ebcb7b890f361722e",
strip_prefix = "crc32c-1.1.0",
urls = ["https://github.com/google/crc32c/archive/1.1.0.zip"], # 2019-05-24
)

http_archive(
name = "zlib",
build_file = "@com_google_riegeli//third_party:zlib.BUILD",
sha256 = "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1",
strip_prefix = "zlib-1.2.11",
urls = ["http://zlib.net/fossils/zlib-1.2.11.tar.gz"], # 2017-01-15
)

http_archive(
name = "highwayhash",
build_file = "@com_google_riegeli//third_party:highwayhash.BUILD",
sha256 = "5380cb7cf19e7c9591f31792b7794d48084f6a3ab7c03d637cd6a32cf2ee8686",
strip_prefix = "highwayhash-a7f68e2f95fac08b24327d74747521cf634d5aff",
urls = ["https://github.com/google/highwayhash/archive/a7f68e2f95fac08b24327d74747521cf634d5aff.zip"], # 2023-08-09
)

# Tensorflow, 20230705
http_archive(
name = "org_tensorflow",
sha256 = "63025cb60d00d9aa7a88807651305a38abb9bb144464e2419c03f13a089d19a6",
strip_prefix = "tensorflow-2.12.1",
urls = ["https://github.com/tensorflow/tensorflow/archive/v2.12.1.zip"],
)

load("@org_tensorflow//tensorflow/tools/toolchains:cpus/aarch64/aarch64_compiler_configure.bzl", "aarch64_compiler_configure") # buildifier: disable=load-on-top

# This import (along with the org_tensorflow archive) is necessary to provide the devtoolset-9 toolchain
load("@org_tensorflow//tensorflow/tools/toolchains/remote_config:configs.bzl", "initialize_rbe_configs") # buildifier: disable=load-on-top

initialize_rbe_configs()

aarch64_compiler_configure()
3 changes: 0 additions & 3 deletions cpp/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,7 @@ cc_library(
deps = [
":common",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/base:nullability",
"@com_google_absl//absl/synchronization",
"@com_google_riegeli//riegeli/base:dependency",
"@com_google_riegeli//riegeli/base:initializer",
],
)

Expand Down
8 changes: 4 additions & 4 deletions cpp/array_record_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,8 @@ absl::Status ArrayRecordReaderBase::ParallelReadRecords(
uint64_t chunk_idx_start = buf_idx * state_->chunk_group_size;
// inclusive index, not the conventional exclusive index.
uint64_t last_chunk_idx =
std::min<uint64_t>((buf_idx + 1) * state_->chunk_group_size - 1,
state_->chunk_offsets.size() - 1);
std::min((buf_idx + 1) * state_->chunk_group_size - 1,
state_->chunk_offsets.size() - 1);
uint64_t buf_len = state_->ChunkEndOffset(last_chunk_idx) -
state_->chunk_offsets[chunk_idx_start];
AR_ENDO_JOB(
Expand Down Expand Up @@ -708,8 +708,8 @@ bool ArrayRecordReaderBase::ReadAheadFromBuffer(uint64_t buffer_idx) {
chunk_offsets.reserve(state_->chunk_group_size);
uint64_t chunk_start = buffer_to_add * state_->chunk_group_size;
uint64_t chunk_end =
std::min<uint64_t>(state_->chunk_offsets.size(),
(buffer_to_add + 1) * state_->chunk_group_size);
std::min(state_->chunk_offsets.size(),
(buffer_to_add + 1) * state_->chunk_group_size);
for (uint64_t chunk_idx = chunk_start; chunk_idx < chunk_end; ++chunk_idx) {
chunk_offsets.push_back(state_->chunk_offsets[chunk_idx]);
}
Expand Down
16 changes: 6 additions & 10 deletions cpp/array_record_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ limitations under the License.
#ifndef ARRAY_RECORD_CPP_ARRAY_RECORD_READER_H_
#define ARRAY_RECORD_CPP_ARRAY_RECORD_READER_H_

#include <cstddef>
#include <cstdint>
#include <memory>
#include <optional>
Expand All @@ -47,8 +46,8 @@ limitations under the License.
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "cpp/common.h"
#include "cpp/thread_pool.h"
#include "cpp/tri_state_ptr.h"
#include "cpp/thread_pool.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
Expand Down Expand Up @@ -294,7 +293,7 @@ class ArrayRecordReaderBase : public riegeli::Object {

void Initialize();

virtual TriStatePtrBase<riegeli::Reader>::SharedRef get_backing_reader()
virtual TriStatePtr<riegeli::Reader>::SharedRef get_backing_reader()
const = 0;

private:
Expand Down Expand Up @@ -346,27 +345,24 @@ class ArrayRecordReader : public ArrayRecordReaderBase {
Options options = Options(),
ARThreadPool* pool = nullptr)
: ArrayRecordReaderBase(std::move(options), pool),
main_reader_(std::make_unique<TriStatePtr<riegeli::Reader, Src>>(
main_reader_(std::make_unique<TriStatePtr<riegeli::Reader>>(
std::move(src))) {
Initialize();
}

protected:
TriStatePtrBase<riegeli::Reader>::SharedRef get_backing_reader()
const override {
TriStatePtr<riegeli::Reader>::SharedRef get_backing_reader() const override {
return main_reader_->MakeShared();
}

void Done() override {
if (main_reader_ == nullptr) {
return;
}
if (main_reader_ == nullptr) return;
auto unique = main_reader_->WaitAndMakeUnique();
if (!unique->Close()) Fail(unique->status());
}

private:
std::unique_ptr<TriStatePtr<riegeli::Reader, Src>> main_reader_;
std::unique_ptr<TriStatePtr<riegeli::Reader>> main_reader_;
};

template <typename Src>
Expand Down
4 changes: 2 additions & 2 deletions cpp/array_record_writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ class ArrayRecordWriterBase::SubmitChunkCallback

// Aggregate the offsets information and write it to the file.
void WriteFooterAndPostscript(
TriStatePtrBase<SequencedChunkWriterBase>::SharedRef writer);
TriStatePtr<SequencedChunkWriterBase>::SharedRef writer);

private:
const Options options_;
Expand Down Expand Up @@ -489,7 +489,7 @@ void ArrayRecordWriterBase::SubmitChunkCallback::operator()(
}

void ArrayRecordWriterBase::SubmitChunkCallback::WriteFooterAndPostscript(
TriStatePtrBase<SequencedChunkWriterBase>::SharedRef writer) {
TriStatePtr<SequencedChunkWriterBase>::SharedRef writer) {
// Flushes prior chunks
writer->SubmitFutureChunks(true);
// Footer and postscript must pad to block boundary
Expand Down
Loading

0 comments on commit 89f82b8

Please sign in to comment.