Skip to content

Commit

Permalink
Merge branch 'develop' into pointwise-multi-output
Browse files Browse the repository at this point in the history
  • Loading branch information
pfultz2 authored Apr 19, 2024
2 parents 371ac33 + f6e22cb commit cef1409
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 25 deletions.
2 changes: 2 additions & 0 deletions src/include/migraphx/program.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ struct MIGRAPHX_EXPORT program
std::vector<argument> eval(parameter_map params,
execution_environment exec_env = execution_environment{}) const;

std::vector<argument> eval_with_context(std::vector<context>& ctx, parameter_map params) const;

void finish() const;

std::size_t size() const;
Expand Down
7 changes: 7 additions & 0 deletions src/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,13 @@ std::vector<argument> generic_eval(const program& p,
return generic_eval(mm, ctx, params, {}, trace);
}

std::vector<argument> program::eval_with_context(std::vector<context>& ctx,
parameter_map params) const
{
const module* mm = this->get_main_module();
return generic_eval(mm, ctx, std::move(params), {}, [](auto&&, auto f) { return f(); });
}

std::vector<argument> program::eval(parameter_map params, execution_environment exec_env) const
{
auto& contexts = this->impl->contexts;
Expand Down
35 changes: 24 additions & 11 deletions src/targets/gpu/compile_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/program.hpp>
#include <migraphx/module.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/instruction.hpp>
Expand Down Expand Up @@ -185,17 +186,29 @@ struct compile_plan
std::cout << "No binary" << std::endl;
return std::numeric_limits<double>::max();
}
// Time all the code objects for a given perf config and calculate total
// time e.g. in case of split-K GEMM, it may or may not support fusion.
// In that case MLIR compile would return code objects for individual
// GEMM and pre/post fusion code objects.
auto cobjs = cr->replace.code_objects;
double t = transform_accumulate(
cobjs.begin(),
cobjs.end(),
double{0},
std::plus<>{},
[&](const operation& op) { return time_op(*ctx, op, 20); });
/*
create a small program with insturction being compiled and call "replace"
on that which would insert all the compiled code objects, prefills etc.
necessary to run candidate code object
*/
program bench_prog;
auto* bench_mm = bench_prog.get_main_module();
std::vector<instruction_ref> bench_ins_inputs;

std::transform(cr->ins->inputs().begin(),
cr->ins->inputs().end(),
std::back_inserter(bench_ins_inputs),
[&](const auto& arg) {
return bench_mm->add_parameter(
std::to_string(bench_ins_inputs.size()),
arg->get_shape());
});
auto bench_ins = bench_mm->add_instruction(
cr->ins->get_operator(), bench_ins_inputs, cr->ins->module_inputs());
cr->replace.replace(*bench_mm, bench_ins);
// do dead code elimination by directly removing instruction
bench_mm->remove_instruction(bench_ins);
auto t = time_program(*ctx, bench_prog, 20);
if(trace_level > 1)
std::cout << t << "ms" << std::endl;
return t;
Expand Down
4 changes: 3 additions & 1 deletion src/targets/gpu/gemm_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,10 @@ void blas_shape(const shape& s)
{
if(s.lens().size() < 2)
return;
if(std::none_of(s.strides().end() - 2, s.strides().end(), [&](auto i) { return i == 1; }))
if(std::none_of(s.strides().end() - 2, s.strides().end(), [](auto i) { return i == 1; }))
MIGRAPHX_THROW("GPU_GEMM: needs to have one matrix stride as 1");
if(std::any_of(s.strides().end() - 2, s.strides().end(), [](auto i) { return i == 0; }))
MIGRAPHX_THROW("GPU_GEMM: matrix dimensions can't be broadcasted");
if(s.lens().size() < 3)
return;
shape batch_shape{s.type(),
Expand Down
7 changes: 5 additions & 2 deletions src/targets/gpu/include/migraphx/gpu/time_op.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#ifndef MIGRAPHX_GUARD_GPU_DRIVER_PERF_HPP
#define MIGRAPHX_GUARD_GPU_DRIVER_PERF_HPP

#include <migraphx/program.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/operation.hpp>
Expand All @@ -33,10 +34,12 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {

MIGRAPHX_GPU_EXPORT double
time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n = 100);
time_op(const context& ictx, operation op, const std::vector<shape>& inputs, int n = 100);

MIGRAPHX_GPU_EXPORT double time_program(const context& ictx, program p, int n = 100);

/* benchmark gpu::code_object with expected input shapes over n iterations */
MIGRAPHX_GPU_EXPORT double time_op(context& ictx, operation op, int n = 100);
MIGRAPHX_GPU_EXPORT double time_op(const context& ictx, operation op, int n = 100);

} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
Expand Down
46 changes: 35 additions & 11 deletions src/targets/gpu/time_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/program.hpp>
#include <migraphx/gpu/time_op.hpp>
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/context.hpp>
Expand All @@ -41,35 +42,58 @@ std::vector<argument> generate_arguments(const std::vector<shape>& shapes, unsig
return args;
}

double time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n)
template <class F>
double time_loop(migraphx::gpu::context& gctx, int n, F f)
{
// TODO: Use std::ref
migraphx::context ctx = ictx;
auto& gctx = any_cast<migraphx::gpu::context>(ctx);
auto output = op.compute_shape(inputs);
op.finalize(ctx, output, inputs);
auto args = generate_arguments(inputs);
auto start = context::create_event_for_timing();
auto stop = context::create_event_for_timing();
auto run = [&] { op.compute(ctx, output, args); };
run();
f();
gctx.get_stream().record(start.get());
for(auto i : range(n))
{
(void)i;
run();
f();
}
gctx.get_stream().record(stop.get());
gctx.finish();
return context::get_elapsed_ms(start.get(), stop.get()) / n;
}

double time_op(context& ictx, operation op, int n)
double time_op(const context& ictx, operation op, const std::vector<shape>& inputs, int n)
{
// TODO: Use std::ref
migraphx::context ctx = ictx;
auto& gctx = any_cast<migraphx::gpu::context>(ctx);
auto output = op.compute_shape(inputs);
op.finalize(ctx, output, inputs);
auto args = generate_arguments(inputs);
auto run = [&] { op.compute(ctx, output, args); };
return time_loop(gctx, n, run);
}

double time_op(const context& ictx, operation op, int n)
{
auto inputs = any_cast<migraphx::gpu::code_object_op>(op).expected_inputs;
return time_op(ictx, op, inputs, n);
}

double time_program(const context& ictx, program p, int n)
{
std::vector<migraphx::context> ctx_vec = {ictx};
auto& gctx = any_cast<migraphx::gpu::context>(ctx_vec.front());
auto* mm = p.get_main_module();
mm->finalize(ctx_vec);
auto in_shapes = p.get_parameter_shapes();
std::unordered_map<std::string, migraphx::argument> param_map;
unsigned long seed = 0;
for(const auto& [name, shape] : in_shapes)
{
param_map[name] = to_gpu(generate_argument(shape, seed++));
}
auto run = [&] { p.eval_with_context(ctx_vec, param_map); };
return time_loop(gctx, n, run);
}

} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
49 changes: 49 additions & 0 deletions test/verify/test_gemm_multibroadcast.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

#include "verify_program.hpp"
#include <migraphx/program.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/make_op.hpp>

template <migraphx::shape::type_t DType>
struct test_gemm_multibroadcast : verify_program<test_gemm_multibroadcast<DType>>
{
migraphx::program create_program() const
{
migraphx::program p;
auto* mm = p.get_main_module();
auto a = mm->add_parameter("a", migraphx::shape{DType, {2, 2, 1025}});
auto b = mm->add_parameter("b", migraphx::shape{DType, {2, 1, 2}});
auto bb = mm->add_instruction(
migraphx::make_op("multibroadcast", {{"out_lens", {2, 1025, 2}}}), b);
mm->add_instruction(migraphx::make_op("dot"), a, bb);
return p;
}
std::string section() const { return "gemm"; }
};

template struct test_gemm_multibroadcast<migraphx::shape::float_type>;
template struct test_gemm_multibroadcast<migraphx::shape::half_type>;
template struct test_gemm_multibroadcast<migraphx::shape::fp8e4m3fnuz_type>;

0 comments on commit cef1409

Please sign in to comment.