Skip to content

Commit

Permalink
Remove unused arg hlo_execution_profile from Executable::Execute*OnSt…
Browse files Browse the repository at this point in the history
…ream.

PiperOrigin-RevId: 695944312
  • Loading branch information
IllogicalMoose authored and Google-ML-Automation committed Nov 13, 2024
1 parent 0532590 commit 4f0e89b
Show file tree
Hide file tree
Showing 24 changed files with 51 additions and 569 deletions.
1 change: 0 additions & 1 deletion xla/backends/interpreter/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ cc_library(
"//xla/hlo/ir:hlo",
"//xla/service:dynamic_dimension_inference",
"//xla/service:executable",
"//xla/service:hlo_execution_profile",
"//xla/service:maybe_owning_device_memory",
"//xla/service:shaped_buffer",
"//xla/service:transfer_manager",
Expand Down
4 changes: 1 addition & 3 deletions xla/backends/interpreter/executable_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ limitations under the License.
#include "xla/layout_util.h"
#include "xla/literal.h"
#include "xla/service/executable.h"
#include "xla/service/hlo_execution_profile.h"
#include "xla/service/maybe_owning_device_memory.h"
#include "xla/service/service_executable_run_options.h"
#include "xla/service/shaped_buffer.h"
Expand Down Expand Up @@ -57,8 +56,7 @@ InterpreterExecutableBase::InterpreterExecutableBase(

absl::StatusOr<ExecutionOutput> InterpreterExecutableBase::ExecuteAsyncOnStream(
const ServiceExecutableRunOptions* run_options,
std::vector<ExecutionInput> arguments,
HloExecutionProfile* hlo_execution_profile) {
std::vector<ExecutionInput> arguments) {
se::Stream* stream = run_options->stream();
se::StreamExecutor* executor = stream->parent();
const se::Platform* platform = executor->GetPlatform();
Expand Down
4 changes: 1 addition & 3 deletions xla/backends/interpreter/executable_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ limitations under the License.
#include "xla/literal.h"
#include "xla/service/dynamic_dimension_inference.h"
#include "xla/service/executable.h"
#include "xla/service/hlo_execution_profile.h"
#include "xla/service/service_executable_run_options.h"
#include "xla/shape.h"
#include "xla/stream_executor/device_memory_allocator.h"
Expand All @@ -44,8 +43,7 @@ class InterpreterExecutableBase : public Executable {

absl::StatusOr<ExecutionOutput> ExecuteAsyncOnStream(
const ServiceExecutableRunOptions* run_options,
std::vector<ExecutionInput> arguments,
HloExecutionProfile* hlo_execution_profile) override;
std::vector<ExecutionInput> arguments) override;

protected:
virtual absl::StatusOr<Literal> Evaluate(
Expand Down
1 change: 0 additions & 1 deletion xla/service/cpu/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,6 @@ cc_library(
"//xla/service:custom_call_status",
"//xla/service:custom_call_status_internal",
"//xla/service:executable",
"//xla/service:hlo_execution_profile",
"//xla/service:hlo_profile_printer_data_cc",
"//xla/service:hlo_value",
"//xla/service:maybe_owning_device_memory",
Expand Down
50 changes: 11 additions & 39 deletions xla/service/cpu/cpu_executable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ limitations under the License.
#include "xla/service/custom_call_status.h"
#include "xla/service/custom_call_status_internal.h"
#include "xla/service/executable.h"
#include "xla/service/hlo_execution_profile.h"
#include "xla/service/hlo_value.h"
#include "xla/service/maybe_owning_device_memory.h"
#include "xla/service/service_executable_run_options.h"
Expand Down Expand Up @@ -290,17 +289,11 @@ CpuExecutable::CreateBufferTable(se::DeviceMemoryAllocator* memory_allocator,

absl::Status CpuExecutable::ExecuteComputeFunction(
const ExecutableRunOptions* run_options,
absl::Span<MaybeOwningDeviceMemory const> buffers,
HloExecutionProfile* hlo_execution_profile) {
absl::Span<MaybeOwningDeviceMemory const> buffers) {
uint64_t start_micros = tsl::Env::Default()->NowMicros();

size_t profile_counters_size =
hlo_execution_profile ? hlo_execution_profile->profile_counters().size()
: 0;
int64_t* profile_counters =
hlo_execution_profile
? hlo_execution_profile->mutable_profile_counters()->data()
: nullptr;
size_t profile_counters_size = 0;
int64_t* profile_counters = nullptr;

// Call the computation function following the calling convention. See the
// definition of 'ComputeFunctionType' for the details of the calling
Expand Down Expand Up @@ -329,12 +322,6 @@ absl::Status CpuExecutable::ExecuteComputeFunction(
const double nanoseconds = (end_micros - start_micros) * 1000.0;
run_options->execution_profile()->set_compute_time_ns(
std::max(nanoseconds, 1.0));
// If hlo profiling was disabled then the cycle count is left empty.
if (hlo_execution_profile) {
run_options->execution_profile()->set_compute_cycle_count(
hlo_execution_profile->total_cycles_executed(
*module().entry_computation()));
}
}
};

Expand All @@ -356,17 +343,11 @@ absl::Status CpuExecutable::ExecuteComputeFunction(

absl::Status CpuExecutable::ExecuteThunks(
const ExecutableRunOptions* run_options,
absl::Span<MaybeOwningDeviceMemory const> buffers,
HloExecutionProfile* hlo_execution_profile) {
absl::Span<MaybeOwningDeviceMemory const> buffers) {
uint64_t start_ns = tsl::Env::Default()->NowNanos();

size_t profile_counters_size =
hlo_execution_profile ? hlo_execution_profile->profile_counters().size()
: 0;
int64_t* profile_counters =
hlo_execution_profile
? hlo_execution_profile->mutable_profile_counters()->data()
: nullptr;
size_t profile_counters_size = 0;
int64_t* profile_counters = nullptr;

BufferAllocations allocations(buffers);

Expand Down Expand Up @@ -412,12 +393,6 @@ absl::Status CpuExecutable::ExecuteThunks(
uint64_t end_ns = tsl::Env::Default()->NowNanos();
run_options->execution_profile()->set_compute_time_ns(
std::max<int64_t>(end_ns - start_ns, 1));
// If hlo profiling was disabled then the cycle count is left empty.
if (hlo_execution_profile) {
run_options->execution_profile()->set_compute_cycle_count(
hlo_execution_profile->total_cycles_executed(
*module().entry_computation()));
}
}

return ABSL_PREDICT_FALSE(executed_event.IsError())
Expand Down Expand Up @@ -527,8 +502,7 @@ absl::StatusOr<ExecutionOutput> CpuExecutable::CreateResultShapedBuffer(

absl::StatusOr<ExecutionOutput> CpuExecutable::ExecuteAsyncOnStream(
const ServiceExecutableRunOptions* run_options,
std::vector<ExecutionInput> arguments,
HloExecutionProfile* hlo_execution_profile) {
std::vector<ExecutionInput> arguments) {
if (GetRootValueSet().IsAmbiguous()) {
return Unimplemented("Points-to set of root instruction is ambiguous");
}
Expand Down Expand Up @@ -576,15 +550,14 @@ absl::StatusOr<ExecutionOutput> CpuExecutable::ExecuteAsyncOnStream(
CpuExecutable* executable;
ServiceExecutableRunOptions run_options;
std::shared_ptr<std::vector<MaybeOwningDeviceMemory>> task_buffers;
HloExecutionProfile* hlo_execution_profile;

absl::Status operator()() {
if (executable->has_compute_function()) {
return executable->ExecuteComputeFunction(
&run_options.run_options(), *task_buffers, hlo_execution_profile);
return executable->ExecuteComputeFunction(&run_options.run_options(),
*task_buffers);
} else if (executable->has_thunks()) {
return executable->ExecuteThunks(&run_options.run_options(),
*task_buffers, hlo_execution_profile);
*task_buffers);
} else {
return Internal("No compute function or thunks found.");
}
Expand All @@ -593,8 +566,7 @@ absl::StatusOr<ExecutionOutput> CpuExecutable::ExecuteAsyncOnStream(
host_stream->EnqueueTaskWithStatus(
AsyncRunTask{this, *run_options,
std::make_shared<std::vector<MaybeOwningDeviceMemory>>(
std::move(buffers)),
hlo_execution_profile});
std::move(buffers))});

MarkToBeReleasedArguments(absl::MakeSpan(arguments), result);
return std::move(result);
Expand Down
10 changes: 3 additions & 7 deletions xla/service/cpu/cpu_executable.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ limitations under the License.
#include "xla/service/custom_call_status.h"
#include "xla/service/custom_call_status_internal.h"
#include "xla/service/executable.h"
#include "xla/service/hlo_execution_profile.h"
#include "xla/service/hlo_profile_printer_data.pb.h"
#include "xla/service/hlo_value.h"
#include "xla/service/maybe_owning_device_memory.h"
Expand Down Expand Up @@ -89,21 +88,18 @@ class CpuExecutable : public Executable {

absl::StatusOr<ExecutionOutput> ExecuteAsyncOnStream(
const ServiceExecutableRunOptions* run_options,
std::vector<ExecutionInput> arguments,
HloExecutionProfile* hlo_execution_profile) override;
std::vector<ExecutionInput> arguments) override;

// Calls the generated function performing the computation with the given
// arguments using the supplied buffers.
absl::Status ExecuteComputeFunction(
const ExecutableRunOptions* run_options,
absl::Span<MaybeOwningDeviceMemory const> buffers,
HloExecutionProfile* hlo_execution_profile);
absl::Span<MaybeOwningDeviceMemory const> buffers);

// Calls emitted thunk sequence with the given arguments using the supplied
// buffers.
absl::Status ExecuteThunks(const ExecutableRunOptions* run_options,
absl::Span<MaybeOwningDeviceMemory const> buffers,
HloExecutionProfile* hlo_execution_profile);
absl::Span<MaybeOwningDeviceMemory const> buffers);

absl::Span<const std::string> obj_files() const { return obj_files_; }

Expand Down
33 changes: 15 additions & 18 deletions xla/service/executable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,16 @@ limitations under the License.
#include "xla/service/executable.h"

#include <memory>
#include <utility>

#include "absl/status/status.h"
#include "absl/strings/str_format.h"
#include "absl/types/span.h"
#include "xla/debug_options_flags.h"
#include "xla/service/dump.h"
#include "xla/service/hlo_graph_dumper.h"
#include "xla/service/maybe_owning_device_memory.h"
#include "xla/service/shaped_buffer.h"
#include "xla/status_macros.h"
#include "xla/stream_executor/device_description.h"
#include "tsl/platform/env.h"
Expand Down Expand Up @@ -59,10 +62,9 @@ void ExecutionInput::SetUnownedBuffer(const ShapeIndex& index,

absl::StatusOr<ScopedShapedBuffer> Executable::ExecuteOnStream(
const ServiceExecutableRunOptions* run_options,
absl::Span<const ShapedBuffer* const> arguments,
HloExecutionProfile* hlo_execution_profile) {
absl::Span<const ShapedBuffer* const> arguments) {
absl::StatusOr<ScopedShapedBuffer> result =
ExecuteAsyncOnStream(run_options, arguments, hlo_execution_profile);
ExecuteAsyncOnStream(run_options, arguments);
absl::Status blocking_status = run_options->stream()->BlockHostUntilDone();
TF_RETURN_IF_ERROR(result.status());
TF_RETURN_IF_ERROR(blocking_status);
Expand All @@ -81,25 +83,22 @@ static ExecutionInput MakeMaybeOwningDeviceMemoryTree(

absl::StatusOr<ScopedShapedBuffer> Executable::ExecuteAsyncOnStream(
const ServiceExecutableRunOptions* run_options,
absl::Span<const ShapedBuffer* const> arguments,
HloExecutionProfile* hlo_execution_profile) {
absl::Span<const ShapedBuffer* const> arguments) {
std::vector<ExecutionInput> args;
args.reserve(arguments.size());
for (const ShapedBuffer* arg : arguments) {
args.emplace_back(MakeMaybeOwningDeviceMemoryTree(*arg));
}
TF_ASSIGN_OR_RETURN(ExecutionOutput out,
ExecuteAsyncOnStream(run_options, std::move(args),
hlo_execution_profile));
ExecuteAsyncOnStream(run_options, std::move(args)));
return out.ConsumeResult();
}

absl::StatusOr<ExecutionOutput> Executable::ExecuteOnStream(
const ServiceExecutableRunOptions* run_options,
std::vector<ExecutionInput> arguments,
HloExecutionProfile* hlo_execution_profile) {
absl::StatusOr<ExecutionOutput> result = ExecuteAsyncOnStream(
run_options, std::move(arguments), hlo_execution_profile);
std::vector<ExecutionInput> arguments) {
absl::StatusOr<ExecutionOutput> result =
ExecuteAsyncOnStream(run_options, std::move(arguments));
absl::Status blocking_status = run_options->stream()->BlockHostUntilDone();
TF_RETURN_IF_ERROR(result.status());
TF_RETURN_IF_ERROR(blocking_status);
Expand All @@ -116,8 +115,7 @@ absl::StatusOr<std::vector<ScopedShapedBuffer>> Executable::ExecuteOnStreams(

if (run_options.size() == 1) {
TF_ASSIGN_OR_RETURN(auto rv,
ExecuteOnStream(&run_options[0], arguments[0],
/*hlo_execution_profile=*/nullptr));
ExecuteOnStream(&run_options[0], arguments[0]));
return_values.push_back(std::move(rv));
return std::move(return_values);
}
Expand All @@ -126,9 +124,8 @@ absl::StatusOr<std::vector<ScopedShapedBuffer>> Executable::ExecuteOnStreams(
// We cannot BlockHostUntilDone() on the already-launched executions in case
// of error, since if the executions communicate, the initially launched
// executions may never complete if not all executions are running.
TF_ASSIGN_OR_RETURN(
auto rv, ExecuteAsyncOnStream(&run_options[i], arguments[i],
/*hlo_execution_profile=*/nullptr));
TF_ASSIGN_OR_RETURN(auto rv,
ExecuteAsyncOnStream(&run_options[i], arguments[i]));
return_values.push_back(std::move(rv));
}
for (const auto& options : run_options) {
Expand Down Expand Up @@ -218,7 +215,7 @@ absl::StatusOr<ScopedShapedBuffer> Executable::ExecuteAsyncOnStreamWrapper(
absl::Span<const ShapedBuffer* const> arguments) {
auto state = ExecuteWrapperBeforeExecution(*this, run_options);
absl::StatusOr<ScopedShapedBuffer> return_value =
ExecuteAsyncOnStream(run_options, arguments, nullptr);
ExecuteAsyncOnStream(run_options, arguments);
TF_RETURN_IF_ERROR(ExecuteWrapperAfterExecution(
this, state, return_value.status(), run_options->stream()));
return return_value;
Expand All @@ -229,7 +226,7 @@ absl::StatusOr<ExecutionOutput> Executable::ExecuteAsyncOnStreamWrapper(
std::vector<ExecutionInput> arguments) {
auto state = ExecuteWrapperBeforeExecution(*this, run_options);
absl::StatusOr<ExecutionOutput> return_value =
ExecuteAsyncOnStream(run_options, std::move(arguments), nullptr);
ExecuteAsyncOnStream(run_options, std::move(arguments));
TF_RETURN_IF_ERROR(ExecuteWrapperAfterExecution(
this, state, return_value.status(), run_options->stream()));
return return_value;
Expand Down
19 changes: 4 additions & 15 deletions xla/service/executable.h
Original file line number Diff line number Diff line change
Expand Up @@ -260,14 +260,10 @@ class Executable {
// Enqueues the compilation result on the provided stream, passing the given
// arguments. This call is blocking and returns after the execution is done.
//
// If the hlo_execution_profile is provided as non-nullptr, profiling will be
// enabled.
//
// Returns a shaped buffer containing the result of the computation.
absl::StatusOr<ScopedShapedBuffer> ExecuteOnStream(
const ServiceExecutableRunOptions* run_options,
absl::Span<const ShapedBuffer* const> arguments,
HloExecutionProfile* hlo_execution_profile);
absl::Span<const ShapedBuffer* const> arguments);

// Starts the given program executing on the given stream/executor.
//
Expand All @@ -283,26 +279,19 @@ class Executable {
// operations are enqueued for launch on the stream. Note that some
// implementations may in fact block or may block in some circumstances (e.g.,
// when profiling); i.e., asynchronous is a "may" not a "must".
//
// If the hlo_execution_profile is provided as non-nullptr, profiling will be
// enabled. Note that profiling is tricky to use correctly, as the profiling
// objects (when they exist) must out-live the task.
virtual absl::StatusOr<ScopedShapedBuffer> ExecuteAsyncOnStream(
const ServiceExecutableRunOptions* run_options,
absl::Span<const ShapedBuffer* const> arguments,
HloExecutionProfile* hlo_execution_profile);
absl::Span<const ShapedBuffer* const> arguments);

// Same as ExecuteAsyncOnStream(), but blocks waiting for the computation to
// complete.
absl::StatusOr<ExecutionOutput> ExecuteOnStream(
const ServiceExecutableRunOptions* run_options,
std::vector<ExecutionInput> arguments,
HloExecutionProfile* hlo_execution_profile);
std::vector<ExecutionInput> arguments);

virtual absl::StatusOr<ExecutionOutput> ExecuteAsyncOnStream(
const ServiceExecutableRunOptions* run_options,
std::vector<ExecutionInput> arguments,
HloExecutionProfile* hlo_execution_profile) = 0;
std::vector<ExecutionInput> arguments) = 0;

// Same as ExecuteOnStream(), but runs this executable on multiple
// streams. arguments[i] contains the arguments to the execution on
Expand Down
3 changes: 1 addition & 2 deletions xla/service/executable_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ class TestExecutable : public Executable {

absl::StatusOr<ExecutionOutput> ExecuteAsyncOnStream(
const ServiceExecutableRunOptions* run_options,
std::vector<ExecutionInput> arguments,
HloExecutionProfile* hlo_execution_profile) override {
std::vector<ExecutionInput> arguments) override {
return absl::UnimplementedError("Not needed for this test.");
}
};
Expand Down
1 change: 0 additions & 1 deletion xla/service/gpu/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,6 @@ cc_library(
"//xla/hlo/ir:hlo",
"//xla/service:buffer_assignment",
"//xla/service:executable",
"//xla/service:hlo_execution_profile",
"//xla/service:hlo_value",
"//xla/service:maybe_owning_device_memory",
"//xla/service:rendezvous",
Expand Down
7 changes: 2 additions & 5 deletions xla/service/gpu/gpu_executable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ limitations under the License.
#include "xla/service/gpu/runtime/sequential_thunk.h"
#include "xla/service/gpu/runtime/thunk.h"
#include "xla/service/gpu/stream_executor_util.h"
#include "xla/service/hlo_execution_profile.h"
#include "xla/service/hlo_value.h"
#include "xla/service/maybe_owning_device_memory.h"
#include "xla/service/rendezvous.h"
Expand Down Expand Up @@ -782,15 +781,13 @@ absl::StatusOr<BufferAllocations> GpuExecutable::GenerateBufferAllocations(

absl::StatusOr<ExecutionOutput> GpuExecutable::ExecuteAsyncOnStream(
const ServiceExecutableRunOptions* run_options,
std::vector<ExecutionInput> arguments,
HloExecutionProfile* hlo_execution_profile) {
std::vector<ExecutionInput> arguments) {
return ExecuteAsyncOnStreamImpl(run_options, absl::MakeSpan(arguments));
}

absl::StatusOr<ScopedShapedBuffer> GpuExecutable::ExecuteAsyncOnStream(
const ServiceExecutableRunOptions* run_options,
absl::Span<const ShapedBuffer* const> arguments,
HloExecutionProfile* hlo_execution_profile) {
absl::Span<const ShapedBuffer* const> arguments) {
TF_ASSIGN_OR_RETURN(ExecutionOutput out,
ExecuteAsyncOnStreamImpl(run_options, arguments));
return out.ConsumeResult();
Expand Down
Loading

0 comments on commit 4f0e89b

Please sign in to comment.