Skip to content

Commit

Permalink
Merge branch 'develop' into onnxruntime-sync-2023-12-22
Browse files Browse the repository at this point in the history
  • Loading branch information
TedThemistokleous authored Dec 29, 2023
2 parents 6cefecb + fff12a2 commit 3770c8e
Show file tree
Hide file tree
Showing 9 changed files with 432 additions and 44 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -323,5 +323,5 @@ rocm_create_package(
MAINTAINER "AMDMIGraphX Maintainer <[email protected]>"
LDCONFIG
PTH
DEPENDS miopen-hip rocblas hip-rocclr hip-base half
DEPENDS miopen-hip rocblas hip-rocclr hip-base half libtbb2
)
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ add_library(migraphx
analyze_streams.cpp
apply_alpha_beta.cpp
argument.cpp
autocast_fp8.cpp
auto_contiguous.cpp
common.cpp
common_dims.cpp
Expand Down
81 changes: 81 additions & 0 deletions src/autocast_fp8.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/autocast_fp8.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/program.hpp>
#include <migraphx/ranges.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {

void autocast_fp8_pass::apply(module& m) const
{
std::vector<instruction_ref> remove_parameters;
for(auto ins : iterator_for(m))
{
const auto& ins_name = ins->name();
if(ins_name == "@param" and contains(fp8_types, ins->get_shape().type()))
{
shape::type_t fp8_type = ins->get_shape().type();
migraphx::shape new_shape = ins->get_shape().with_type(target_type);
std::string param_name = ins->get_operator().to_value()["parameter"].to<std::string>();
m.rename_parameter(ins, param_name + "_old");
auto new_param = m.add_parameter(param_name, new_shape);
auto new_ins = m.insert_instruction(
ins,
migraphx::make_op("convert", {{"target_type", migraphx::to_value(fp8_type)}}),
new_param);
m.replace_instruction(ins, new_ins);
remove_parameters.push_back(ins);
}

if(ins_name == "@return")
{
std::vector<instruction_ref> inputs = ins->inputs();
std::vector<instruction_ref> new_inputs;
std::transform(
inputs.begin(), inputs.end(), std::back_inserter(new_inputs), [&](auto i) {
if(contains(fp8_types, i->get_shape().type()))
{
return m.insert_instruction(
ins,
migraphx::make_op("convert",
{{"target_type", migraphx::to_value(target_type)}}),
i);
}
else
return i;
});
m.replace_return({new_inputs});
}
}
// Remove unused parameters with fp8 type
for(const auto& i : remove_parameters)
m.remove_instruction(i);
}

} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
49 changes: 49 additions & 0 deletions src/include/migraphx/autocast_fp8.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_AUTOCAST_FP8_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_AUTOCAST_FP8_HPP

#include <migraphx/shape.hpp>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {

struct program;
struct module;

/**
This pass will convert model with fp8 input parameter to model with fp32
input parameter and internally add casts to fp8 for those converted params.*/
struct MIGRAPHX_EXPORT autocast_fp8_pass
{
std::set<shape::type_t> fp8_types = {migraphx::shape::fp8e4m3fnuz_type};
shape::type_t target_type = migraphx::shape::float_type;
std::string name() const { return "autocast_fp8_pass"; }
void apply(module& m) const;
};

} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx

#endif
2 changes: 2 additions & 0 deletions src/include/migraphx/module.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ struct MIGRAPHX_EXPORT module

instruction_ref get_parameter(std::string name) const;

void rename_parameter(instruction_ref ins, const std::string& name);

std::unordered_map<std::string, shape> get_parameter_shapes() const;

bool has_instruction(instruction_ref ins) const;
Expand Down
150 changes: 109 additions & 41 deletions src/include/migraphx/op/nonmaxsuppression.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,59 @@
#include <migraphx/check_shapes.hpp>
#include <migraphx/output_iterator.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/par.hpp>

/*
https://github.com/onnx/onnx/blob/main/docs/Operators.md#NonMaxSuppression
Filter out boxes that have high intersection-over-union (IOU) overlap with previously selected
boxes. Bounding boxes with score less than score_threshold are removed. Bounding box format is
indicated by attribute center_point_box. Note that this algorithm is agnostic to where the origin is
in the coordinate system and more generally is invariant to orthogonal transformations and
translations of the coordinate system; thus translating or reflections of the coordinate system
result in the same boxes being selected by the algorithm. The selected_indices output is a set of
integers indexing into the input collection of bounding boxes representing the selected boxes. The
bounding box coordinates corresponding to the selected indices can then be obtained using the Gather
or GatherND operation.
Version
This version of the operator has been available since version 11 of the default ONNX operator set.
Other versions of this operator: 10
Attributes
center_point_box : int (default is 0)
Integer indicate the format of the box data. The default is 0. 0 - the box data is supplied as [y1,
x1, y2, x2] where (y1, x1) and (y2, x2) are the coordinates of any diagonal pair of box corners and
the coordinates can be provided as normalized (i.e., lying in the interval [0, 1]) or absolute.
Mostly used for TF models. 1 - the box data is supplied as [x_center, y_center, width, height].
Mostly used for Pytorch models.
Inputs (2 - 5)
---------------------------------------------------------------------------------------------------------------------
boxes : tensor(float)
An input tensor with shape [num_batches, spatial_dimension, 4].
The single box data format is indicated by center_point_box.
scores : tensor(float)
An input tensor with shape [num_batches, num_classes, spatial_dimension]
max_output_boxes_per_class (optional) : tensor(int64)
Integer representing the maximum number of boxes to be selected per batch per class.
It is a scalar. Default to 0, which means no output.
iou_threshold (optional) : tensor(float)
Float representing the threshold for deciding whether boxes overlap too much with respect to IOU.
It is scalar. Value range [0, 1]. Default to 0.
score_threshold (optional) : tensor(flo187Gat)
Float representing the threshold for deciding when to remove boxes based on score. It is a scalar.
----------------------------------------------------------------------------------------------------------------------
Outputs
selected_indices : tensor(int64)
selected indices from the boxes tensor. [num_selected_indices, 3],
the selected index format is [batch_index, class_index, box_index].
----------------------------------------------------------------------------------------------------------------------
*/
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
Expand Down Expand Up @@ -184,13 +236,20 @@ struct nonmaxsuppression
result.y = {static_cast<double>(start[0]), static_cast<double>(start[2])};
}

result.sort();

return result;
}

inline bool suppress_by_iou(box b1, box b2, double iou_threshold) const
{
b1.sort();
b2.sort();
const double area1 = b1.area();
const double area2 = b2.area();

if(area1 <= .0f or area2 <= .0f)
{
return false;
}

box intersection{};
for(auto i : range(2))
Expand All @@ -203,12 +262,10 @@ struct nonmaxsuppression
}
}

const double area1 = b1.area();
const double area2 = b2.area();
const double intersection_area = intersection.area();
const double union_area = area1 + area2 - intersection_area;

if(area1 <= .0f or area2 <= .0f or union_area <= .0f)
if(union_area <= .0f)
{
return false;
}
Expand All @@ -220,30 +277,42 @@ struct nonmaxsuppression

// filter boxes below score_threshold
template <class T>
std::priority_queue<std::pair<double, int64_t>>
std::vector<std::pair<double, int64_t>>
filter_boxes_by_score(T scores_start, std::size_t num_boxes, double score_threshold) const
{
std::priority_queue<std::pair<double, int64_t>> boxes_heap;
auto insert_to_boxes_heap =
make_function_output_iterator([&](const auto& x) { boxes_heap.push(x); });
std::vector<std::pair<double, int64_t>> boxes_heap;
int64_t box_idx = 0;
transform_if(
scores_start,
scores_start + num_boxes,
insert_to_boxes_heap,
[&](auto sc) {
box_idx++;
return sc >= score_threshold;
},
[&](auto sc) { return std::make_pair(sc, box_idx - 1); });

if(score_threshold > 0.0)
{
transform_if(
scores_start,
scores_start + num_boxes,
std::back_inserter(boxes_heap),
[&](auto sc) {
box_idx++;
return sc >= score_threshold;
},
[&](auto sc) { return std::make_pair(sc, box_idx - 1); });
}
else
{ // score is irrelevant, just push into boxes_heap and make a score-index pair
std::transform(scores_start,
scores_start + num_boxes,
std::back_inserter(boxes_heap),
[&](auto sc) {
box_idx++;
return std::make_pair(sc, box_idx - 1);
});
}
par_sort(boxes_heap.begin(), boxes_heap.end(), std::greater<std::pair<double, int64_t>>{});
return boxes_heap;
}

template <class Output, class Boxes, class Scores>
std::size_t compute_nms(Output output,
Boxes boxes,
Scores scores,
const shape& max_output_shape,
std::size_t max_output_boxes_per_class,
double iou_threshold,
double score_threshold) const
Expand All @@ -254,9 +323,7 @@ struct nonmaxsuppression
const auto num_classes = lens[1];
const auto num_boxes = lens[2];
// boxes of a class with NMS applied [score, index]
std::vector<std::pair<double, int64_t>> selected_boxes_inside_class;
std::vector<int64_t> selected_indices;
selected_boxes_inside_class.reserve(max_output_shape.elements());
// iterate over batches and classes
shape comp_s{shape::double_type, {num_batches, num_classes}};
shape_for_each(comp_s, [&](const auto& idx) {
Expand All @@ -267,31 +334,33 @@ struct nonmaxsuppression
// iterator to first value of this batch
auto batch_boxes_start = boxes.begin() + batch_idx * num_boxes * 4;
auto boxes_heap = filter_boxes_by_score(scores_start, num_boxes, score_threshold);
selected_boxes_inside_class.clear();
int64_t selected_boxes_inside_class = 0;
while(not boxes_heap.empty() &&
selected_boxes_inside_class.size() < max_output_boxes_per_class)
selected_boxes_inside_class < max_output_boxes_per_class)
{
// select next top scorer box and remove any boxes from boxes_heap that exceeds IOU
// threshold with the selected box
const auto next_top_score = boxes_heap.top();
boxes_heap.pop();
selected_boxes_inside_class.push_back(next_top_score);
const auto next_top_score = boxes_heap.front();
auto next_box = batch_box(batch_boxes_start, next_top_score.second);
auto next_box_idx = next_top_score.second;

selected_boxes_inside_class++;
selected_indices.push_back(batch_idx);
selected_indices.push_back(class_idx);
selected_indices.push_back(next_top_score.second);
std::priority_queue<std::pair<double, int64_t>> remainder_boxes;
while(not boxes_heap.empty())
{
auto iou_candidate_box = boxes_heap.top();
if(not this->suppress_by_iou(
batch_box(batch_boxes_start, iou_candidate_box.second),
batch_box(batch_boxes_start, next_top_score.second),
iou_threshold))
{
remainder_boxes.push(iou_candidate_box);
}
boxes_heap.pop();
}
selected_indices.push_back(next_box_idx);

std::vector<std::pair<double, int64_t>> remainder_boxes(boxes_heap.size());

auto it = par_copy_if(
boxes_heap.begin() + 1,
boxes_heap.end(),
remainder_boxes.begin(),
[&](auto iou_candidate_box) {
auto iou_box = batch_box(batch_boxes_start, iou_candidate_box.second);
return not this->suppress_by_iou(iou_box, next_box, iou_threshold);
});

remainder_boxes.resize(it - remainder_boxes.begin());
boxes_heap = remainder_boxes;
}
});
Expand Down Expand Up @@ -320,7 +389,6 @@ struct nonmaxsuppression
num_selected = compute_nms(output,
boxes,
scores,
max_output_shape,
max_output_boxes_per_class,
iou_threshold,
score_threshold);
Expand Down
5 changes: 3 additions & 2 deletions src/include/migraphx/op/scatternd_op.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,15 @@ struct scatternd_op : op_name<Derived>
auto updates_idx = updates_std.multi(i);
std::vector<std::size_t> indices_idx(q, 0);
std::copy(
updates_idx.begin(), updates_idx.begin() + q - 1, indices_idx.begin());
updates_idx.begin(), updates_idx.begin() + (q - 1), indices_idx.begin());
auto index_start = indices.begin() +
indices_shape.index(indices_idx.begin(), indices_idx.end());
auto index_end = index_start + k;

std::vector<std::size_t> out_idx(r, 0);
std::copy(index_start, index_end, out_idx.begin());
std::copy(updates_idx.begin() + q - 1, updates_idx.end(), out_idx.begin() + k);
std::copy(
updates_idx.begin() + (q - 1), updates_idx.end(), out_idx.begin() + k);

self.reduction()(output[dyn_out.computed_shape.index(out_idx)], updates[i]);
}
Expand Down
Loading

0 comments on commit 3770c8e

Please sign in to comment.