Skip to content

Commit

Permalink
Please consider the following formatting changes
Browse files Browse the repository at this point in the history
  • Loading branch information
alibuild committed Oct 21, 2024
1 parent 534da50 commit bb2cb6e
Show file tree
Hide file tree
Showing 7 changed files with 385 additions and 372 deletions.
76 changes: 37 additions & 39 deletions Common/ML/include/ML/ort_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,60 +35,58 @@ namespace ml
class OrtModel
{

public:
// Constructor
OrtModel() = default;
OrtModel(std::unordered_map<std::string, std::string> optionsMap){ reset(optionsMap); }
void init(std::unordered_map<std::string, std::string> optionsMap){ reset(optionsMap); }
void reset(std::unordered_map<std::string, std::string>);
public:
// Constructor
OrtModel() = default;
OrtModel(std::unordered_map<std::string, std::string> optionsMap) { reset(optionsMap); }
void init(std::unordered_map<std::string, std::string> optionsMap) { reset(optionsMap); }
void reset(std::unordered_map<std::string, std::string>);

virtual ~OrtModel() = default;
virtual ~OrtModel() = default;

// Conversion
template<class I, class O>
std::vector<O> v2v(std::vector<I>&, bool = true);
// Conversion
template <class I, class O>
std::vector<O> v2v(std::vector<I>&, bool = true);

// Inferencing
template<class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. OrtDataType::Float16_t from O2/Common/ML/include/ML/GPUORTFloat16.h
std::vector<O> inference(std::vector<I>&);
// Inferencing
template <class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. OrtDataType::Float16_t from O2/Common/ML/include/ML/GPUORTFloat16.h
std::vector<O> inference(std::vector<I>&);

template<class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
std::vector<O> inference(std::vector<std::vector<I>>&);
template <class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
std::vector<O> inference(std::vector<std::vector<I>>&);

// template<class I, class T, class O> // class I is the input data type, e.g. float, class T the throughput data type and class O is the output data type
// std::vector<O> inference(std::vector<I>&);
// template<class I, class T, class O> // class I is the input data type, e.g. float, class T the throughput data type and class O is the output data type
// std::vector<O> inference(std::vector<I>&);

// Reset session
void resetSession();
// Reset session
void resetSession();

std::vector<std::vector<int64_t>> getNumInputNodes() const { return mInputShapes; }
std::vector<std::vector<int64_t>> getNumOutputNodes() const { return mOutputShapes; }
std::vector<std::string> getInputNames() const { return mInputNames; }
std::vector<std::string> getOutputNames() const { return mOutputNames; }
std::vector<std::vector<int64_t>> getNumInputNodes() const { return mInputShapes; }
std::vector<std::vector<int64_t>> getNumOutputNodes() const { return mOutputShapes; }
std::vector<std::string> getInputNames() const { return mInputNames; }
std::vector<std::string> getOutputNames() const { return mOutputNames; }

void setActiveThreads(int threads) { intraOpNumThreads = threads; }
void setActiveThreads(int threads) { intraOpNumThreads = threads; }

private:
private:
// ORT variables -> need to be hidden as Pimpl
struct OrtVariables;
OrtVariables* pImplOrt;

// ORT variables -> need to be hidden as Pimpl
struct OrtVariables;
OrtVariables* pImplOrt;
// Input & Output specifications of the loaded network
std::vector<const char*> inputNamesChar, outputNamesChar;
std::vector<std::string> mInputNames, mOutputNames;
std::vector<std::vector<int64_t>> mInputShapes, mOutputShapes;

// Input & Output specifications of the loaded network
std::vector<const char*> inputNamesChar, outputNamesChar;
std::vector<std::string> mInputNames, mOutputNames;
std::vector<std::vector<int64_t>> mInputShapes, mOutputShapes;

// Environment settings
std::string modelPath, device = "cpu", dtype = "float"; // device options should be cpu, rocm, migraphx, cuda
int intraOpNumThreads = 0, deviceId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0;

std::string printShape(const std::vector<int64_t>&);
// Environment settings
std::string modelPath, device = "cpu", dtype = "float"; // device options should be cpu, rocm, migraphx, cuda
int intraOpNumThreads = 0, deviceId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0;

std::string printShape(const std::vector<int64_t>&);
};

} // namespace ml

} // namespace ml
} // namespace o2

#endif // O2_ML_ORT_INTERFACE_H
88 changes: 52 additions & 36 deletions Common/ML/src/ort_interface.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ namespace o2
namespace ml
{

struct OrtModel::OrtVariables { // The actual implementation is hidden in the .cxx file
struct OrtModel::OrtVariables { // The actual implementation is hidden in the .cxx file
// ORT runtime objects
Ort::RunOptions runOptions;
std::shared_ptr<Ort::Env> env = nullptr;
Expand All @@ -35,55 +35,56 @@ struct OrtModel::OrtVariables { // The actual implementation is hidden in the .
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo("Cpu", OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemType::OrtMemTypeDefault);
};

void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap){
void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap)
{

pImplOrt = new OrtVariables();

// Load from options map
if(!optionsMap.contains("model-path")){
if (!optionsMap.contains("model-path")) {
LOG(fatal) << "(ORT) Model path cannot be empty!";
}
modelPath = optionsMap["model-path"];
device = (optionsMap.contains("device") ? optionsMap["device"] : "CPU");
dtype = (optionsMap.contains("dtype") ? optionsMap["dtype"] : "float");
deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : 0);
allocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0);
intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0);
intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0);
loggingLevel = (optionsMap.contains("logging-level") ? std::stoi(optionsMap["logging-level"]) : 0);
enableProfiling = (optionsMap.contains("enable-profiling") ? std::stoi(optionsMap["enable-profiling"]) : 0);
enableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0);

std::string dev_mem_str = "Hip";
#ifdef ORT_ROCM_BUILD
if(device == "ROCM") {
if (device == "ROCM") {
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(pImplOrt->sessionOptions, deviceId));
LOG(info) << "(ORT) ROCM execution provider set";
}
#endif
#ifdef ORT_MIGRAPHX_BUILD
if(device == "MIGRAPHX") {
if (device == "MIGRAPHX") {
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(pImplOrt->sessionOptions, deviceId));
LOG(info) << "(ORT) MIGraphX execution provider set";
}
#endif
#ifdef ORT_CUDA_BUILD
if(device == "CUDA") {
if (device == "CUDA") {
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(pImplOrt->sessionOptions, deviceId));
LOG(info) << "(ORT) CUDA execution provider set";
dev_mem_str = "Cuda";
}
#endif

if(allocateDeviceMemory){
if (allocateDeviceMemory) {
pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceId, OrtMemType::OrtMemTypeDefault);
LOG(info) << "(ORT) Memory info set to on-device memory";
}

if(device == "CPU") {
if (device == "CPU") {
(pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads);
if(intraOpNumThreads > 1){
if (intraOpNumThreads > 1) {
(pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL);
} else if(intraOpNumThreads == 1){
} else if (intraOpNumThreads == 1) {
(pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
}
LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " threads";
Expand All @@ -92,8 +93,8 @@ void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap){
(pImplOrt->sessionOptions).DisableMemPattern();
(pImplOrt->sessionOptions).DisableCpuMemArena();

if(enableProfiling){
if(optionsMap.contains("profiling-output-path")){
if (enableProfiling) {
if (optionsMap.contains("profiling-output-path")) {
(pImplOrt->sessionOptions).EnableProfiling((optionsMap["profiling-output-path"] + "/ORT_LOG_").c_str());
} else {
LOG(warning) << "(ORT) If profiling is enabled, optionsMap[\"profiling-output-path\"] should be set. Disabling profiling for now.";
Expand All @@ -109,27 +110,27 @@ void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap){
(pImplOrt->session).reset(new Ort::Session{*(pImplOrt->env), modelPath.c_str(), pImplOrt->sessionOptions});

for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get());
mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get());
}
for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
mInputShapes.emplace_back((pImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
mInputShapes.emplace_back((pImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
}
for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
mOutputNames.push_back((pImplOrt->session)->GetOutputNameAllocated(i, pImplOrt->allocator).get());
mOutputNames.push_back((pImplOrt->session)->GetOutputNameAllocated(i, pImplOrt->allocator).get());
}
for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
mOutputShapes.emplace_back((pImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
mOutputShapes.emplace_back((pImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
}

inputNamesChar.resize(mInputNames.size(), nullptr);
std::transform(std::begin(mInputNames), std::end(mInputNames), std::begin(inputNamesChar),
[&](const std::string& str) { return str.c_str(); });
[&](const std::string& str) { return str.c_str(); });
outputNamesChar.resize(mOutputNames.size(), nullptr);
std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(outputNamesChar),
[&](const std::string& str) { return str.c_str(); });
[&](const std::string& str) { return str.c_str(); });

// Print names
if(loggingLevel > 1) {
if (loggingLevel > 1) {
LOG(info) << "Input Nodes:";
for (size_t i = 0; i < mInputNames.size(); i++) {
LOG(info) << "\t" << mInputNames[i] << " : " << printShape(mInputShapes[i]);
Expand All @@ -142,24 +143,28 @@ void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap){
}
}

void OrtModel::resetSession() {
void OrtModel::resetSession()
{
(pImplOrt->session).reset(new Ort::Session{*(pImplOrt->env), modelPath.c_str(), pImplOrt->sessionOptions});
}

template<class I, class O>
std::vector<O> OrtModel::v2v(std::vector<I>& input, bool clearInput) {
if constexpr (std::is_same_v<I,O>){
template <class I, class O>
std::vector<O> OrtModel::v2v(std::vector<I>& input, bool clearInput)
{
if constexpr (std::is_same_v<I, O>) {
return input;
} else {
std::vector<O> output(input.size());
std::transform(std::begin(input), std::end(input), std::begin(output), [](I f) { return O(f); });
if(clearInput) input.clear();
if (clearInput)
input.clear();
return output;
}
}

template<class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
std::vector<O> OrtModel::inference(std::vector<I>& input){
template <class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
std::vector<O> OrtModel::inference(std::vector<I>& input)
{
std::vector<int64_t> inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
std::vector<Ort::Value> inputTensor;
inputTensor.emplace_back(Ort::Value::CreateTensor<O>(pImplOrt->memoryInfo, (reinterpret_cast<O*>(input)).data(), input.size(), inputShape.data(), inputShape.size()));
Expand All @@ -171,10 +176,11 @@ std::vector<O> OrtModel::inference(std::vector<I>& input){
return outputValuesVec;
}

template<class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
std::vector<O> OrtModel::inference(std::vector<std::vector<I>>& input){
template <class I, class O> // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
std::vector<O> OrtModel::inference(std::vector<std::vector<I>>& input)
{
std::vector<Ort::Value> inputTensor;
for(auto i : input){
for (auto i : input) {
std::vector<int64_t> inputShape{(int64_t)(i.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
inputTensor.emplace_back(Ort::Value::CreateTensor<O>(pImplOrt->memoryInfo, (reinterpret_cast<O*>(i)).data(), i.size(), inputShape.data(), inputShape.size()));
}
Expand All @@ -195,7 +201,9 @@ std::string OrtModel::printShape(const std::vector<int64_t>& v)
return ss.str();
}

template <> std::vector<float> OrtModel::inference<float, float>(std::vector<float>& input) {
template <>
std::vector<float> OrtModel::inference<float, float>(std::vector<float>& input)
{
std::vector<int64_t> inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
std::vector<Ort::Value> inputTensor;
inputTensor.emplace_back(Ort::Value::CreateTensor<float>(pImplOrt->memoryInfo, input.data(), input.size(), inputShape.data(), inputShape.size()));
Expand All @@ -207,7 +215,9 @@ template <> std::vector<float> OrtModel::inference<float, float>(std::vector<flo
return outputValuesVec;
}

template <> std::vector<float> OrtModel::inference<OrtDataType::Float16_t, float>(std::vector<OrtDataType::Float16_t>& input) {
template <>
std::vector<float> OrtModel::inference<OrtDataType::Float16_t, float>(std::vector<OrtDataType::Float16_t>& input)
{
std::vector<int64_t> inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
std::vector<Ort::Value> inputTensor;
inputTensor.emplace_back(Ort::Value::CreateTensor<Ort::Float16_t>(pImplOrt->memoryInfo, reinterpret_cast<Ort::Float16_t*>(input.data()), input.size(), inputShape.data(), inputShape.size()));
Expand All @@ -219,7 +229,9 @@ template <> std::vector<float> OrtModel::inference<OrtDataType::Float16_t, float
return outputValuesVec;
}

template <> std::vector<OrtDataType::Float16_t> OrtModel::inference<OrtDataType::Float16_t, OrtDataType::Float16_t>(std::vector<OrtDataType::Float16_t>& input) {
template <>
std::vector<OrtDataType::Float16_t> OrtModel::inference<OrtDataType::Float16_t, OrtDataType::Float16_t>(std::vector<OrtDataType::Float16_t>& input)
{
std::vector<int64_t> inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
std::vector<Ort::Value> inputTensor;
inputTensor.emplace_back(Ort::Value::CreateTensor<Ort::Float16_t>(pImplOrt->memoryInfo, reinterpret_cast<Ort::Float16_t*>(input.data()), input.size(), inputShape.data(), inputShape.size()));
Expand All @@ -231,7 +243,9 @@ template <> std::vector<OrtDataType::Float16_t> OrtModel::inference<OrtDataType:
return outputValuesVec;
}

template <> std::vector<OrtDataType::Float16_t> OrtModel::inference<float, OrtDataType::Float16_t>(std::vector<float>& input) {
template <>
std::vector<OrtDataType::Float16_t> OrtModel::inference<float, OrtDataType::Float16_t>(std::vector<float>& input)
{
std::vector<int64_t> inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
std::vector<Ort::Value> inputTensor;
inputTensor.emplace_back(Ort::Value::CreateTensor<Ort::Float16_t>(pImplOrt->memoryInfo, reinterpret_cast<Ort::Float16_t*>(input.data()), input.size(), inputShape.data(), inputShape.size()));
Expand All @@ -243,9 +257,11 @@ template <> std::vector<OrtDataType::Float16_t> OrtModel::inference<float, OrtDa
return outputValuesVec;
}

template <> std::vector<OrtDataType::Float16_t> OrtModel::inference<OrtDataType::Float16_t, OrtDataType::Float16_t>(std::vector<std::vector<OrtDataType::Float16_t>>& input) {
template <>
std::vector<OrtDataType::Float16_t> OrtModel::inference<OrtDataType::Float16_t, OrtDataType::Float16_t>(std::vector<std::vector<OrtDataType::Float16_t>>& input)
{
std::vector<Ort::Value> inputTensor;
for(auto i : input){
for (auto i : input) {
std::vector<int64_t> inputShape{(int64_t)(i.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
inputTensor.emplace_back(Ort::Value::CreateTensor<Ort::Float16_t>(pImplOrt->memoryInfo, reinterpret_cast<Ort::Float16_t*>(i.data()), i.size(), inputShape.data(), inputShape.size()));
}
Expand Down
Loading

0 comments on commit bb2cb6e

Please sign in to comment.