Skip to content

Commit

Permalink
Move down_cast from the tensorflow to the tsl namespace
Browse files Browse the repository at this point in the history
No functionality changes are intended

PiperOrigin-RevId: 568886635
  • Loading branch information
majnemer authored and Google-ML-Automation committed Nov 15, 2024
1 parent 5b847d8 commit 432b67e
Show file tree
Hide file tree
Showing 38 changed files with 166 additions and 176 deletions.
6 changes: 4 additions & 2 deletions third_party/tsl/tsl/platform/default/casts.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,16 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

// IWYU pragma: private, include "tsl/platform/casts.h"

#ifndef TENSORFLOW_TSL_PLATFORM_DEFAULT_CASTS_H_
#define TENSORFLOW_TSL_PLATFORM_DEFAULT_CASTS_H_

#include <assert.h> // for use with down_cast<>

#include <type_traits>

namespace tensorflow {
namespace tsl {

// An "upcast", i.e. a conversion from a pointer to an object to a pointer to a
// base subobject, always succeeds if the base is unambiguous and accessible,
Expand Down Expand Up @@ -87,6 +89,6 @@ inline To down_cast(From& f) {
return static_cast<To>(f);
}

} // namespace tensorflow
} // namespace tsl

#endif // TENSORFLOW_TSL_PLATFORM_DEFAULT_CASTS_H_
1 change: 1 addition & 0 deletions xla/pjrt/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -911,6 +911,7 @@ xla_cc_test(
"//xla/hlo/parser:hlo_parser",
"//xla/pjrt/cpu:cpu_client",
"@com_google_googletest//:gtest_main",
"@tsl//tsl/platform:casts",
"@tsl//tsl/platform:env",
"@tsl//tsl/platform:test",
],
Expand Down
2 changes: 1 addition & 1 deletion xla/pjrt/c/pjrt_c_api_wrapper_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1676,7 +1676,7 @@ PJRT_Error* PJRT_Buffer_GetMemoryLayout(
std::unique_ptr<xla::PjRtLayout> pjrt_layout =
args->buffer->buffer->layout();
xla::PjRtXlaLayout* pjrt_xla_layout =
tensorflow::down_cast<xla::PjRtXlaLayout*>(pjrt_layout.get());
tsl::down_cast<xla::PjRtXlaLayout*>(pjrt_layout.get());
CHECK(pjrt_xla_layout != nullptr) << "Got unexpected layout type";
const xla::Layout& xla_layout = pjrt_xla_layout->xla_layout();

Expand Down
40 changes: 19 additions & 21 deletions xla/pjrt/cpu/cpu_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ absl::StatusOr<std::unique_ptr<TfrtCpuBuffer>> AllocateDestinationBufferAndAvs(
// buffer.
absl::InlinedVector<tsl::AsyncValueRef<CpuEvent>, 4> definition_events;
AbstractTfrtCpuBuffer::AllocateAvsAndEvents(shape, avs, &definition_events);
return AllocateDestinationBuffer(
shape, std::move(definition_events),
tensorflow::down_cast<TfrtCpuDevice*>(device), client);
return AllocateDestinationBuffer(shape, std::move(definition_events),
tsl::down_cast<TfrtCpuDevice*>(device),
client);
}

const char kCpuPlatformName[] = "cpu";
Expand Down Expand Up @@ -469,7 +469,7 @@ TfrtCpuClient::TfrtCpuClient(
// do not promise that memory space ids and device ids are the same.
const int id = device->id();
auto memory_space = std::make_unique<UnpinnedHostMemorySpace>(id, device);
tensorflow::down_cast<TfrtCpuDevice*>(device)->AttachMemorySpace(
tsl::down_cast<TfrtCpuDevice*>(device)->AttachMemorySpace(
memory_space.get());
memory_spaces_.push_back(memory_space.get());
owned_memory_spaces_.push_back(std::move(memory_space));
Expand Down Expand Up @@ -642,7 +642,7 @@ TfrtCpuClient::DeserializeExecutable(absl::string_view serialized,
&num_replicas, &num_partitions, &device_assignment));

auto cpu_executable_ptr =
tensorflow::down_cast<cpu::CpuExecutable*>(executable.get());
tsl::down_cast<cpu::CpuExecutable*>(executable.get());

// `buffer_table[result_slice.index()]` points to result buffer:
// If output is a tuple, it points to the buffer index table.
Expand Down Expand Up @@ -845,7 +845,7 @@ absl::StatusOr<std::unique_ptr<PjRtLoadedExecutable>> TfrtCpuClient::Compile(
eigen_intraop_device()->getPool()->NumThreads(),
customize_hlo_module_config_));
auto cpu_executable_ptr =
tensorflow::down_cast<cpu::CpuExecutable*>(cpu_executable.get());
tsl::down_cast<cpu::CpuExecutable*>(cpu_executable.get());

// `buffer_table[result_slice.index()]` points to result buffer:
// If output is a tuple, it points to the buffer index table.
Expand Down Expand Up @@ -921,8 +921,7 @@ TfrtCpuClient::CreateViewOfDeviceBuffer(
std::move(on_delete_callback));
return std::unique_ptr<PjRtBuffer>(std::make_unique<TfrtCpuBuffer>(
shape, std::move(tracked_device_buffer), this,
tensorflow::down_cast<TfrtCpuDevice*>(device),
*device->default_memory_space()));
tsl::down_cast<TfrtCpuDevice*>(device), *device->default_memory_space()));
}

absl::StatusOr<std::unique_ptr<PjRtBuffer>> TfrtCpuClient::CreateErrorBuffer(
Expand All @@ -938,7 +937,7 @@ absl::StatusOr<std::unique_ptr<PjRtBuffer>> TfrtCpuClient::CreateErrorBuffer(
absl::InlinedVector<tsl::AsyncValueRef<CpuEvent>, 4>{
tsl::AsyncValueRef<CpuEvent>(
tsl::MakeErrorAsyncValueRef(std::move(error)))}),
this, tensorflow::down_cast<TfrtCpuDevice*>(device),
this, tsl::down_cast<TfrtCpuDevice*>(device),
*device->default_memory_space());
}

Expand All @@ -953,15 +952,15 @@ TfrtCpuClient::CreateUninitializedBuffer(const Shape& shape,
tsl::profiler::TraceMe traceme("TfrtCpuClient::CreateUninitializedBuffer");
VLOG(1) << "TfrtCpuClient::CreateUninitializedBuffer: shape: "
<< shape.DebugString() << " device: " << device->DebugString();
return AllocateDestinationBuffer(
shape, /*definition_events=*/{},
tensorflow::down_cast<TfrtCpuDevice*>(device), this);
return AllocateDestinationBuffer(shape, /*definition_events=*/{},
tsl::down_cast<TfrtCpuDevice*>(device),
this);
}

absl::StatusOr<std::unique_ptr<PjRtClient::AsyncHostToDeviceTransferManager>>
TfrtCpuClient::CreateBuffersForAsyncHostToDevice(absl::Span<const Shape> shapes,
PjRtDevice* device) {
auto* tfrt_device = tensorflow::down_cast<TfrtCpuDevice*>(device);
auto* tfrt_device = tsl::down_cast<TfrtCpuDevice*>(device);
return TfrtCpuAsyncHostToDeviceTransferManager::Create(shapes, tfrt_device,
this);
}
Expand Down Expand Up @@ -997,8 +996,7 @@ absl::StatusOr<std::unique_ptr<PjRtBuffer>> TfrtCpuClient::BufferFromHostBuffer(

return std::unique_ptr<PjRtBuffer>(std::make_unique<TfrtCpuBuffer>(
shape, std::move(tracked_device_buffer), this,
tensorflow::down_cast<TfrtCpuDevice*>(device),
*device->default_memory_space()));
tsl::down_cast<TfrtCpuDevice*>(device), *device->default_memory_space()));
}

absl::StatusOr<std::unique_ptr<PjRtBuffer>> TfrtCpuClient::BufferFromHostBuffer(
Expand Down Expand Up @@ -1044,7 +1042,7 @@ TfrtCpuClient::BufferFromHostLiteral(const LiteralSlice& literal,
TF_ASSIGN_OR_RETURN(
std::unique_ptr<TfrtCpuBuffer> output_buffer,
AllocateDestinationBufferAndAvs(
shape, &avs, tensorflow::down_cast<TfrtCpuDevice*>(device), this));
shape, &avs, tsl::down_cast<TfrtCpuDevice*>(device), this));

output_buffer->CopyFromLiteral(literal, shape, &avs, async_work_runner());

Expand Down Expand Up @@ -1119,7 +1117,7 @@ absl::StatusOr<std::unique_ptr<PjRtBuffer>> TfrtCpuBuffer::CopyToDevice(

return std::unique_ptr<PjRtBuffer>(std::make_unique<TfrtCpuBuffer>(
on_device_shape_, std::move(tracked_device_buffer), client(),
tensorflow::down_cast<TfrtCpuDevice*>(dst_device),
tsl::down_cast<TfrtCpuDevice*>(dst_device),
*dst_device->default_memory_space()));
}

Expand Down Expand Up @@ -1382,7 +1380,7 @@ absl::StatusOr<PjRtLoadedExecutable::Result> TfrtCpuExecutable::ExecuteHelper(
PjRtGlobalDeviceId global_device_id(device_id);
TF_ASSIGN_OR_RETURN(PjRtDevice * pjrt_device,
client_->LookupDevice(global_device_id));
device = tensorflow::down_cast<TfrtCpuDevice*>(pjrt_device);
device = tsl::down_cast<TfrtCpuDevice*>(pjrt_device);
device_assignment = device_assignment_;
} else {
CHECK(device_assignment_ == nullptr);
Expand Down Expand Up @@ -1435,7 +1433,7 @@ absl::StatusOr<PjRtLoadedExecutable::Result> TfrtCpuExecutable::ExecuteHelper(
donation_clashes.reserve(argument_handles.size());
for (int i = 0; i < argument_handles.size(); ++i) {
PjRtBuffer* handle = argument_handles[i];
auto* tfrt_buffer = tensorflow::down_cast<TfrtCpuBuffer*>(handle);
auto* tfrt_buffer = tsl::down_cast<TfrtCpuBuffer*>(handle);
if (tfrt_buffer->device() != device) {
return InvalidArgument(
"Buffer passed to Execute() as argument %d to replica %d is on "
Expand Down Expand Up @@ -1522,7 +1520,7 @@ absl::StatusOr<PjRtLoadedExecutable::Result> TfrtCpuExecutable::ExecuteHelper(
}

auto* cpu_executable =
tensorflow::down_cast<cpu::CpuExecutable*>(cpu_executable_.get());
tsl::down_cast<cpu::CpuExecutable*>(cpu_executable_.get());
// `buffer_alloc` and `buffer_alloc_and_copy` are used to do real memory
// allocation and copy work.
BufferAlloc buffer_alloc;
Expand Down Expand Up @@ -2090,7 +2088,7 @@ TfrtCpuExecutable::ExecutePortable(
/*replica=*/0,
/*partition=*/0, RunId(), options,
/*last_collective_launch_event=*/tsl::AsyncValueRef<CpuEvent>(),
fill_future, tensorflow::down_cast<TfrtCpuDevice*>(device)));
fill_future, tsl::down_cast<TfrtCpuDevice*>(device)));
returned_future = std::move(result.future);
return std::move(result.buffers);
}
Expand Down
24 changes: 11 additions & 13 deletions xla/pjrt/gpu/se_gpu_pjrt_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,7 @@ class AsyncHostToDeviceTransferManager
// buffers, because the invariants of this class ensure that the buffer
// definition event will not fire until after all of this class' uses of
// the TrackedDeviceBuffer have completed.
auto* se_buffer =
tensorflow::down_cast<PjRtStreamExecutorBuffer*>(buffer.get());
auto* se_buffer = tsl::down_cast<PjRtStreamExecutorBuffer*>(buffer.get());
DCHECK(se_buffer);
auto hold = se_buffer->GetBufferWithUsageHold();
buffer_ptrs.push_back(hold.buffer());
Expand Down Expand Up @@ -243,7 +242,7 @@ class AsyncHostToDeviceTransferManager
"AsyncHostToDeviceTransferManager::TransferLiteralToBuffer");
auto* stream = device_->local_device_state()->host_to_device_stream();
auto* se_client =
tensorflow::down_cast<PjRtStreamExecutorClient*>(device_->client());
tsl::down_cast<PjRtStreamExecutorClient*>(device_->client());
DCHECK(se_client);

TransferManager* transfer_manager =
Expand Down Expand Up @@ -330,8 +329,7 @@ class AsyncHostToDeviceTransferManager
bool is_last_transfer, absl::AnyInvocable<void() &&> on_done) override {
auto* stream = device_->local_device_state()->host_to_device_stream();

auto* client =
tensorflow::down_cast<PjRtStreamExecutorClient*>(device_->client());
auto* client = tsl::down_cast<PjRtStreamExecutorClient*>(device_->client());
bool should_stage_host_to_device_transfers =
client->should_stage_host_to_device_transfers();
std::shared_ptr<void> staging_buffer;
Expand Down Expand Up @@ -516,12 +514,12 @@ StreamExecutorGpuClient::StreamExecutorGpuClient(
const int id = device->id();
auto memory_space =
std::make_unique<StreamExecutorGpuHbmMemorySpace>(id, device);
tensorflow::down_cast<PjRtStreamExecutorDevice*>(device)->AttachMemorySpace(
tsl::down_cast<PjRtStreamExecutorDevice*>(device)->AttachMemorySpace(
memory_space.get());
owned_memory_spaces_.push_back(std::move(memory_space));
auto pinned =
std::make_unique<PinnedHostMemorySpace>(basePinnedId + id, device);
tensorflow::down_cast<PjRtStreamExecutorDevice*>(device)->AttachMemorySpace(
tsl::down_cast<PjRtStreamExecutorDevice*>(device)->AttachMemorySpace(
pinned.get());
owned_memory_spaces_.push_back(std::move(pinned));
}
Expand Down Expand Up @@ -558,7 +556,7 @@ StreamExecutorGpuClient::CreateBuffersForAsyncHostToDevice(
std::optional<absl::Span<const std::optional<Layout>>> device_layouts,
PjRtDevice* device) {
auto* stream_executor_device =
tensorflow::down_cast<PjRtStreamExecutorDevice*>(device);
tsl::down_cast<PjRtStreamExecutorDevice*>(device);
return xla::AsyncHostToDeviceTransferManager::Create(
shape_specs, std::move(device_layouts), stream_executor_device, this,
/*memory_space=*/nullptr);
Expand Down Expand Up @@ -586,7 +584,7 @@ StreamExecutorGpuClient::CreateBuffersForAsyncHostToDevice(
CHECK_EQ(memory_space->devices().size(), 1);
PjRtDevice* device = memory_space->devices()[0];
auto* stream_executor_device =
tensorflow::down_cast<PjRtStreamExecutorDevice*>(device);
tsl::down_cast<PjRtStreamExecutorDevice*>(device);
return xla::AsyncHostToDeviceTransferManager::Create(
shape_specs, std::move(device_layouts), stream_executor_device, this,
memory_space);
Expand Down Expand Up @@ -624,7 +622,7 @@ StreamExecutorGpuClient::GetDefaultDeviceAssignment(int num_replicas,
PjRtFuture<> StreamExecutorGpuClient::CopyRawSubBufferToHost(
PjRtBuffer* pjrt_buffer, PjRtFuture<void*> dst, int64_t offset,
int64_t transfer_size) {
auto* buffer = tensorflow::down_cast<PjRtStreamExecutorBuffer*>(pjrt_buffer);
auto* buffer = tsl::down_cast<PjRtStreamExecutorBuffer*>(pjrt_buffer);
DCHECK(buffer);
PjRtStreamExecutorDevice* device = buffer->device();
LocalDeviceState* local_device = device->local_device_state();
Expand Down Expand Up @@ -785,7 +783,7 @@ StreamExecutorGpuClient::Compile(const XlaComputation& computation,
#if defined(GOOGLE_CUDA) || defined(TENSORFLOW_USE_ROCM)
for (const PjRtDevice* device : addressable_devices()) {
LocalDeviceState* local_device_state =
tensorflow::down_cast<const PjRtStreamExecutorDevice*>(device)
tsl::down_cast<const PjRtStreamExecutorDevice*>(device)
->local_device_state();
int64_t free_memory, total_memory;
if (local_device_state != nullptr) {
Expand Down Expand Up @@ -813,7 +811,7 @@ StreamExecutorGpuClient::LoadSerialized(absl::string_view serialized,
absl::StatusOr<std::unique_ptr<PjRtLoadedExecutable>>
StreamExecutorGpuClient::Load(std::unique_ptr<PjRtExecutable> executable) {
auto se_executable = absl::WrapUnique(
tensorflow::down_cast<StreamExecutorExecutable*>(executable.release()));
tsl::down_cast<StreamExecutorExecutable*>(executable.release()));

CompileOptions compile_options = se_executable->compile_options();
CompileOptions input_options = compile_options;
Expand Down Expand Up @@ -1214,7 +1212,7 @@ absl::StatusOr<tsl::AllocatorStats> StreamExecutorGpuDevice::GetAllocatorStats()
}

auto* allocator_adapter = dynamic_cast<se::MultiDeviceAdapter*>(
tensorflow::down_cast<PjRtStreamExecutorClient*>(client())->allocator());
tsl::down_cast<PjRtStreamExecutorClient*>(client())->allocator());
if (!allocator_adapter) {
return Unimplemented(
"GetAllocatorStats() is only implemented with MultiDeviceAdapter "
Expand Down
2 changes: 1 addition & 1 deletion xla/pjrt/gpu/se_gpu_pjrt_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ class StreamExecutorGpuClient : public xla::PjRtStreamExecutorClient {
std::unique_ptr<PjRtExecutable> executable,
const LoadOptions& load_options) override {
return absl::WrapUnique<PjRtLoadedExecutable>(
tensorflow::down_cast<PjRtLoadedExecutable*>(executable.release()));
tsl::down_cast<PjRtLoadedExecutable*>(executable.release()));
}

// TODO(b/296466237): Unify `Load` method after (de)serialization and tests on
Expand Down
2 changes: 1 addition & 1 deletion xla/pjrt/gpu/se_gpu_pjrt_client_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1072,7 +1072,7 @@ TEST(StreamExecutorGpuClientTest, MockNcclClientWithGpuTopologyTest) {
TF_ASSERT_OK_AND_ASSIGN(const xla::PjRtTopologyDescription* topology,
client->GetTopologyDescription());
const StreamExecutorGpuTopologyDescription& gpu_topology =
tensorflow::down_cast<const xla::StreamExecutorGpuTopologyDescription&>(
tsl::down_cast<const xla::StreamExecutorGpuTopologyDescription&>(
*topology);

EXPECT_EQ(gpu_topology.gpu_topology().num_slices(), 2);
Expand Down
6 changes: 2 additions & 4 deletions xla/pjrt/gpu/se_gpu_pjrt_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,9 @@ bool IsGpuClient(const PjRtClient& client) {
bool IsSameTopology(const PjRtTopologyDescription& topology1,
const PjRtTopologyDescription& topology2) {
const StreamExecutorGpuTopologyDescription& gpu_topology1 =
tensorflow::down_cast<const StreamExecutorGpuTopologyDescription&>(
topology1);
tsl::down_cast<const StreamExecutorGpuTopologyDescription&>(topology1);
const StreamExecutorGpuTopologyDescription& gpu_topology2 =
tensorflow::down_cast<const StreamExecutorGpuTopologyDescription&>(
topology2);
tsl::down_cast<const StreamExecutorGpuTopologyDescription&>(topology2);
return gpu_topology1 == gpu_topology2;
}

Expand Down
8 changes: 4 additions & 4 deletions xla/pjrt/gpu/se_gpu_pjrt_compiler_aot_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ TEST(StreamExecutorGpuCompilerTest, SuccessAotCompileMlirAndLoad) {
TF_ASSERT_OK_AND_ASSIGN(auto client,
GetStreamExecutorGpuClient(GpuClientOptions()));
auto se_client = absl::WrapUnique(
tensorflow::down_cast<StreamExecutorGpuClient*>(client.release()));
tsl::down_cast<StreamExecutorGpuClient*>(client.release()));
Compiler::TargetConfig gpu_target_config = xla::Compiler::TargetConfig(
se_client->client()->backend().default_stream_executor());
StreamExecutorGpuCompiler compiler;
Expand Down Expand Up @@ -119,7 +119,7 @@ TEST(StreamExecutorGpuCompilerTest, SuccessAotCompileXlaAndLoad) {
TF_ASSERT_OK_AND_ASSIGN(auto client,
GetStreamExecutorGpuClient(GpuClientOptions()));
auto se_client = absl::WrapUnique(
tensorflow::down_cast<StreamExecutorGpuClient*>(client.release()));
tsl::down_cast<StreamExecutorGpuClient*>(client.release()));
#if GOOGLE_CUDA
auto gpu_compiler = gpu::NVPTXCompiler();
#elif TENSORFLOW_USE_ROCM
Expand Down Expand Up @@ -152,7 +152,7 @@ TEST(StreamExecutorGpuCompilerTest, SuccessLoadFromSerializedExecutable) {
TF_ASSERT_OK_AND_ASSIGN(auto client,
GetStreamExecutorGpuClient(GpuClientOptions()));
auto se_client = absl::WrapUnique(
tensorflow::down_cast<StreamExecutorGpuClient*>(client.release()));
tsl::down_cast<StreamExecutorGpuClient*>(client.release()));
StreamExecutorGpuCompiler compiler;
xla::CompileOptions opts;
opts.target_config = Compiler::TargetConfig(
Expand Down Expand Up @@ -189,7 +189,7 @@ TEST(StreamExecutorGpuCompilerTest, SuccessSerializeDeserialize) {
TF_ASSERT_OK_AND_ASSIGN(auto client,
GetStreamExecutorGpuClient(GpuClientOptions()));
auto se_client = absl::WrapUnique(
tensorflow::down_cast<StreamExecutorGpuClient*>(client.release()));
tsl::down_cast<StreamExecutorGpuClient*>(client.release()));
StreamExecutorGpuCompiler compiler;
xla::CompileOptions opts;
opts.target_config = Compiler::TargetConfig(
Expand Down
Loading

0 comments on commit 432b67e

Please sign in to comment.