Skip to content

Commit

Permalink
merge kernels in existing XCLBIN
Browse files Browse the repository at this point in the history
  • Loading branch information
nirvedhmeshram committed Jun 14, 2024
1 parent 7df3386 commit aaec27e
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 23 deletions.
75 changes: 60 additions & 15 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,8 @@ LogicalResult AIETargetBackend::serializeExecutable(
SmallVector<uint32_t> xclbinIndices(ordinalCount);
SmallVector<uint32_t> asmInstrIndices(ordinalCount);

SmallVector<SmallString<128>> xclbinPaths;

for (size_t i = 0; i < entryPointNames.size(); i++) {
uint64_t ordinal = entryPointOrdinals.at(entryPointNames[i]);

Expand Down Expand Up @@ -300,18 +302,34 @@ LogicalResult AIETargetBackend::serializeExecutable(
llvm::sys::path::append(npuInstPath,
entryPointNamesFb[ordinal] + ".npu.txt");

SmallVector<StringRef> cmdArgs{aie2xclbin,
inputMlirPath,
"--peano",
options.peanoInstallDir,
"--xclbin-name",
xclbinPath,
"--npu-insts-name",
npuInstPath,
"--xclbin-kernel-name",
entryPointNamesFb[ordinal],
"--tmpdir",
entryPointWorkDir};
// Convert ordinal to hexadecimal string for xclbin kern id
std::stringstream ss;
ss << "0x" << std::hex << ordinal + 10;
std::string ordinalHex = ss.str();

SmallVector<StringRef> cmdArgs;
SmallVector<StringRef> cmdArgsBase{aie2xclbin,
inputMlirPath,
"--peano",
options.peanoInstallDir,
"--xclbin-name",
xclbinPath,
"--npu-insts-name",
npuInstPath,
"--xclbin-kernel-name",
entryPointNamesFb[ordinal],
"--tmpdir",
entryPointWorkDir,
"--xclbin-kernel-id",
ordinalHex};
cmdArgs = cmdArgsBase;
bool AttemptingMerge = false;
if (i > 0) {
cmdArgs.push_back("--input-xclbin-name");
cmdArgs.push_back(xclbinPaths.back());
AttemptingMerge = true;
}
xclbinPaths.push_back(xclbinPath);

auto addOpt = [&](StringRef arg, bool value) {
if (value) cmdArgs.push_back(arg);
Expand Down Expand Up @@ -350,11 +368,24 @@ LogicalResult AIETargetBackend::serializeExecutable(
{
SmallVector<StringRef> cmdEnvRefs{cmdEnv.begin(), cmdEnv.end()};
int result = llvm::sys::ExecuteAndWait(cmdArgs[0], cmdArgs, cmdEnvRefs);
if (result != 0)
if (result != 0 && AttemptingMerge) {
// we failed to create xclbin but maybe we failed becuase we were trying
// to merge the kerenel in exisiting kernel, try again to see if perhaps
// we have success if we dont try to merge.
AttemptingMerge = false;
result =
llvm::sys::ExecuteAndWait(cmdArgsBase[0], cmdArgsBase, cmdEnvRefs);
xclbinPaths.push_back(xclbinPath);
}
if (result != 0) {
return moduleOp.emitOpError(
"Failed to produce an XCLBin with external tool.");
}
// delete the previous xclbin if we were able to merge as the new one now
// will have all the kernels from the previous one.
if (AttemptingMerge) xclbinPaths.erase(xclbinPaths.end() - 2);
xclbinIndices[ordinal] = xclbinPaths.size() - 1;
}

std::ifstream instrFile(static_cast<std::string>(npuInstPath));
std::string line;
while (std::getline(instrFile, line)) {
Expand All @@ -369,7 +400,7 @@ LogicalResult AIETargetBackend::serializeExecutable(
asmInstrIndices[ordinal] = asmInstrRefs.size();
asmInstrRefs.push_back(
iree_amd_aie_hal_xrt_AsmInstDef_create(builder, npuInstrsVec));

/*
xclbinIn = openInputFile(xclbinPath, &errorMessage);
if (!xclbinIn) {
moduleOp.emitOpError() << "Failed to open xclbin file: " << errorMessage;
Expand All @@ -378,7 +409,21 @@ LogicalResult AIETargetBackend::serializeExecutable(
xclbinIndices[ordinal] = xclbinRefs.size();
xclbinRefs.push_back(
iree_amd_aie_hal_xrt_XclbinDef_create(builder, xclbinStringRef));
*/
}
// write out the final xclbins to flatbuffer
for (auto xclbinPath : xclbinPaths) {
llvm::outs() << "writing xclbin from path: " << xclbinPath << "\n";
std::string errorMessage;
xclbinIn = openInputFile(xclbinPath, &errorMessage);
if (!xclbinIn) {
moduleOp.emitOpError() << "Failed to open xclbin file: " << errorMessage;
}
auto xclbinStringRef = builder.createString(xclbinIn->getBuffer());
xclbinRefs.push_back(
iree_amd_aie_hal_xrt_XclbinDef_create(builder, xclbinStringRef));
}

// Serialize the executable to flatbuffer format
auto entryPointsRef = builder.createStringVec(entryPointNamesFb);

Expand Down
26 changes: 18 additions & 8 deletions runtime/src/iree-amd-aie/driver/xrt/native_executable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ iree_status_t iree_hal_xrt_native_executable_create(
iree_amd_aie_hal_xrt_XclbinDef_vec_t xclbins_vec =
iree_amd_aie_hal_xrt_ExecutableDef_xclbins_get(executable_def);

iree_host_size_t number_xclbin =
iree_amd_aie_hal_xrt_XclbinDef_vec_len(xclbins_vec);

iree_amd_aie_hal_xrt_AsmInstDef_vec_t asm_instrs_vec =
iree_amd_aie_hal_xrt_ExecutableDef_asm_instrs_get(executable_def);

Expand Down Expand Up @@ -163,17 +166,15 @@ iree_status_t iree_hal_xrt_native_executable_create(
&executable->resource);
executable->host_allocator = host_allocator;
executable->entry_point_count = entry_point_count;
for (iree_host_size_t entry_ordinal = 0; entry_ordinal < entry_point_count;
entry_ordinal++) {
const char* entry_name =
flatbuffers_string_vec_at(entry_points_vec, entry_ordinal);
uint32_t xclbin_index =
flatbuffers_uint32_vec_at(xclbin_indices_vec, entry_ordinal);
// collect all the hardware contexts first as muliple entry points can map to
// the same context and this way we dont need to keep reloading them.
std::vector<xrt::hw_context> contexts;
for (iree_host_size_t xclbin_index = 0; xclbin_index < number_xclbin;
xclbin_index++) {
iree_amd_aie_hal_xrt_XclbinDef_table_t xclbin_def =
iree_amd_aie_hal_xrt_XclbinDef_vec_at(xclbins_vec, xclbin_index);
flatbuffers_string_t xclbin_fb =
iree_amd_aie_hal_xrt_XclbinDef_xclbin_get(xclbin_def);

// XRT API needs this vector and cant actually read a void*.
std::vector<char> xclbinVector(
xclbin_fb, xclbin_fb + flatbuffers_string_len(xclbin_fb));
Expand All @@ -186,6 +187,14 @@ iree_status_t iree_hal_xrt_native_executable_create(
}
device.register_xclbin(xclbin);
xrt::hw_context context(device, xclbin.get_uuid());
contexts.push_back(context);
}
for (iree_host_size_t entry_ordinal = 0; entry_ordinal < entry_point_count;
entry_ordinal++) {
const char* entry_name =
flatbuffers_string_vec_at(entry_points_vec, entry_ordinal);
uint32_t xclbin_index =
flatbuffers_uint32_vec_at(xclbin_indices_vec, entry_ordinal);
uint32_t asm_instr_index =
flatbuffers_uint32_vec_at(asm_instr_indices_vec, entry_ordinal);
iree_amd_aie_hal_xrt_AsmInstDef_table_t asminst_def =
Expand All @@ -196,7 +205,8 @@ iree_status_t iree_hal_xrt_native_executable_create(
std::unique_ptr<xrt::kernel> kernel;
std::unique_ptr<xrt::bo> instr;
try {
kernel = std::make_unique<xrt::kernel>(context, entry_name);
kernel =
std::make_unique<xrt::kernel>(contexts[xclbin_index], entry_name);
// XCL_BO_FLAGS_CACHEABLE is used to indicate that this is an instruction
// buffer that resides in instr_memory. This buffer is always passed as
// the first argument to the kernel and we can use the
Expand Down

0 comments on commit aaec27e

Please sign in to comment.