diff --git a/fetch-repos.sh b/fetch-repos.sh index a4fc124fa4..64b073e6a1 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -32,7 +32,7 @@ FINN_EXP_COMMIT="0724be21111a21f0d81a072fccc1c446e053f851" BREVITAS_COMMIT="d4834bd2a0fad3c1fbc0ff7e1346d5dcb3797ea4" PYVERILATOR_COMMIT="ce0a08c20cb8c1d1e84181d6f392390f846adbd1" CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4" -HLSLIB_COMMIT="16e5847a5e3ef76cffe84c8fad2f010d593457d3" +HLSLIB_COMMIT="35a04fcfc58044cbbbdd6ef07a38a247aa76efb6" OMX_COMMIT="0b59762f9e4c4f7e5aa535ee9bc29f292434ca7a" AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b" XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" @@ -45,7 +45,7 @@ FINN_EXP_URL="https://github.com/Xilinx/finn-experimental.git" BREVITAS_URL="https://github.com/Xilinx/brevitas.git" PYVERILATOR_URL="https://github.com/maltanar/pyverilator.git" CNPY_URL="https://github.com/rogersce/cnpy.git" -HLSLIB_URL="https://github.com/Xilinx/finn-hlslib.git" +HLSLIB_URL="https://github.com/lstasytis/finn-hlslib.git" OMX_URL="https://github.com/maltanar/oh-my-xilinx.git" AVNET_BDF_URL="https://github.com/Avnet/bdf.git" XIL_BDF_URL="https://github.com/Xilinx/XilinxBoardStore.git" diff --git a/src/finn/custom_op/fpgadataflow/hls/iodma_hls.py b/src/finn/custom_op/fpgadataflow/hls/iodma_hls.py index 0ba7ba974f..be27423742 100644 --- a/src/finn/custom_op/fpgadataflow/hls/iodma_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/iodma_hls.py @@ -224,7 +224,7 @@ def get_ap_int_max_w(self): def docompute(self): direction = self.get_nodeattr("direction") mode = self.get_nodeattr("burstMode") - dwc_func = "StreamingDataWidthConverter_Batch" + dwc_func = "StreamingDataWidthConverterGeneralized_Batch" if direction == "in": if mode == "wrap": func = "Mem2Stream_Batch_external_wmem" @@ -236,7 +236,7 @@ def docompute(self): raise ValueError("Invalid IODMA direction, please set to in or out") # define templates for instantiation dma_inst_template = func + "(%s, %s, numReps);" - dwc_inst_template = dwc_func + "<%d, %d, %d, %d, %d>(%s, %s, numReps);" + dwc_inst_template = dwc_func + "<%d, %d, %d, %d>(%s, %s, numReps);" # do stream infrastructure and instantiations intfw = self.get_nodeattr("intfWidth") strmw = self.get_nodeattr("streamWidth") @@ -252,10 +252,10 @@ def docompute(self): numInWords = total_bits // inWidth numOutWords = total_bits // outWidth - totalIters = max(numInWords, numOutWords) + # totalIters = max(numInWords, numOutWords) - if outWidth > inWidth: - totalIters += int(np.floor(outWidth / inWidth) + 1) - 1 + # if outWidth > inWidth: + # totalIters += int(np.floor(outWidth / inWidth) + 1) - 1 # AXI MM -> IODMA -> (DWCs) -> out # DWCs depend on AXI MM and out interface width @@ -277,7 +277,6 @@ def docompute(self): outWidth, numInWords, numOutWords, - totalIters, "dma2dwc", "out_" + self.hls_sname(), ), @@ -289,10 +288,10 @@ def docompute(self): numInWords = total_bits // inWidth numOutWords = total_bits // outWidth - totalIters = max(numInWords, numOutWords) + # totalIters = max(numInWords, numOutWords) - if outWidth > inWidth: - totalIters += int(np.floor(outWidth / inWidth) + 1) - 1 + # if outWidth > inWidth: + # totalIters += int(np.floor(outWidth / inWidth) + 1) - 1 # in0 -> (DWCs) -> IODMA -> AXI MM # DWCs depend on AXI MM and out interface width @@ -313,7 +312,6 @@ def docompute(self): outWidth, numInWords, numOutWords, - totalIters, "in0_" + self.hls_sname(), "dwc2dma", ), diff --git a/src/finn/custom_op/fpgadataflow/hls/streamingdatawidthconverter_hls.py b/src/finn/custom_op/fpgadataflow/hls/streamingdatawidthconverter_hls.py index 81f43c3315..9e0a72d5ed 100644 --- a/src/finn/custom_op/fpgadataflow/hls/streamingdatawidthconverter_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/streamingdatawidthconverter_hls.py @@ -26,10 +26,11 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import math import numpy as np import os from qonnx.core.datatype import DataType -import math + from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend from finn.custom_op.fpgadataflow.streamingdatawidthconverter import ( StreamingDataWidthConverter, @@ -60,30 +61,19 @@ def defines(self, var): # so we use numReps to represent the first 2 dimensions # + batching if shape[0] != 1 numReps = int(np.prod(self.get_folded_input_shape()[:-2])) - # numReps = 1 # assuming folded shapes are at least 2 dim-long numInWords = int(np.prod(self.get_folded_input_shape()[-2:-1])) numOutWords = int(np.prod(self.get_folded_output_shape()[-2:-1])) - # numInWords = int(np.prod(self.get_folded_input_shape()[-2:])) - # numOutWords = int(np.prod(self.get_folded_output_shape()[-2:])) - inWidth = self.get_nodeattr("inWidth") outWidth = self.get_nodeattr("outWidth") - totalIters = max(numInWords, numOutWords) - - # if we are building up a word, the overall loop count is longer - if outWidth > inWidth: - totalIters += int(np.floor(outWidth / inWidth) + 1) - 1 - self.code_gen_dict["$DEFINES$"] = [ "#define InWidth %d " % inWidth, "#define OutWidth %d " % outWidth, "#define NumInWords %d " % numInWords, "#define NumOutWords %d " % numOutWords, - "#define totalIters %d " % totalIters, "#define numReps %d" % numReps, ] @@ -106,8 +96,8 @@ def docompute(self): op = "StreamingDataWidthConverterGeneralized_Batch" self.code_gen_dict["$DOCOMPUTE$"] = [ - "%s(in0_%s, out_%s, numReps);" % (self.hls_sname(), self.hls_sname()) + "%s(in0_%s, out_%s, numReps);" % (self.hls_sname(), self.hls_sname()) ] def blackboxfunction(self): @@ -243,7 +233,6 @@ def execute_node(self, context, graph): ), """Output shape doesn't match expected shape, should be same as input shape""" - def lut_estimation(self): """Calculates resource estimations for LUTs""" @@ -270,6 +259,6 @@ def lut_estimation(self): cset_luts += intw + outw # generalized DWC cost penalty, this value is temporary - cnt_luts *=8 + cnt_luts *= 8 - return int(cnt_luts + cset_luts) \ No newline at end of file + return int(cnt_luts + cset_luts) diff --git a/tests/fpgadataflow/test_fpgadataflow_dwc.py b/tests/fpgadataflow/test_fpgadataflow_dwc.py index 0fb4aeffe4..1f2071d122 100644 --- a/tests/fpgadataflow/test_fpgadataflow_dwc.py +++ b/tests/fpgadataflow/test_fpgadataflow_dwc.py @@ -30,20 +30,13 @@ import pytest import numpy as np -import os -import xml.etree.ElementTree as ET from onnx import TensorProto, helper from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper -from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model -import finn.builder.build_dataflow as build -import finn.builder.build_dataflow_config as build_cfg import finn.core.onnx_exec as oxe -from finn.analysis.fpgadataflow.post_synth_res import post_synth_res -from finn.core.throughput_test import throughput_test_rtlsim from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim @@ -51,85 +44,6 @@ from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers -from finn.util.basic import make_build_dir -from finn.util.fpgadataflow import is_hls_node, is_rtl_node - - -def post_synth_res_dwc(model, override_synth_report_filename=None): - """Extracts the FPGA resource results from the Vivado synthesis. - This function extras only a DWC from a DWC-only stitched model - - Returns {node name : resources_dict}.""" - - res_dict = {} - if override_synth_report_filename is not None: - synth_report_filename = override_synth_report_filename - else: - synth_report_filename = model.get_metadata_prop("vivado_synth_rpt") - if os.path.isfile(synth_report_filename): - tree = ET.parse(synth_report_filename) - root = tree.getroot() - all_cells = root.findall(".//tablecell") - # strip all whitespace from table cell contents - for cell in all_cells: - cell.attrib["contents"] = cell.attrib["contents"].strip() - else: - raise Exception("Please run synthesis first") - - # TODO build these indices based on table headers instead of harcoding - restype_to_ind_default = { - "LUT": 2, - "SRL": 5, - "FF": 6, - "BRAM_36K": 7, - "BRAM_18K": 8, - "DSP48": 9, - } - restype_to_ind_vitis = { - "LUT": 4, - "SRL": 7, - "FF": 8, - "BRAM_36K": 9, - "BRAM_18K": 10, - "URAM": 11, - "DSP48": 12, - } - - if model.get_metadata_prop("platform") == "alveo": - restype_to_ind = restype_to_ind_vitis - else: - restype_to_ind = restype_to_ind_default - - def get_instance_stats(inst_name): - row = root.findall(".//*[@contents='%s']/.." % inst_name) - if row != []: - node_dict = {} - row = list(row[0]) - for restype, ind in restype_to_ind.items(): - node_dict[restype] = int(row[ind].attrib["contents"]) - return node_dict - else: - return None - - # global (top-level) stats, including shell etc. - top_dict = get_instance_stats("(top)") - if top_dict is not None: - res_dict["(top)"] = top_dict - - for node in model.graph.node: - if node.op_type == "StreamingDataflowPartition": - sdp_model = ModelWrapper(getCustomOp(node).get_nodeattr("model")) - sdp_res_dict = post_synth_res(sdp_model, synth_report_filename) - res_dict.update(sdp_res_dict) - elif is_hls_node(node) or is_rtl_node(node): - node_dict = get_instance_stats( - "top_StreamingDataflowPartition_1_0_StreamingDataflowPartition_1" - + "_StreamingDataflowPartition_1_StreamingDataWidthConverter_hls_0_0" - ) - if node_dict is not None: - res_dict[node.name] = node_dict - - return res_dict def make_single_dwc_modelwrapper(in_shape, out_shape, inWidth, outWidth, finn_dtype): @@ -171,33 +85,21 @@ def prepare_inputs(input_tensor, dt): @pytest.mark.parametrize( "config", [ - ([1, 2, 2, 1680], [1, 2, 2, 1680], 70, 240, DataType["BIPOLAR"]), # extra word of padding - ([1, 2, 2, 1680], [1, 2, 2, 1680], 240, 70, DataType["BIPOLAR"]), # extra word of padding - ([1, 1680], [1, 1680], 70, 240, DataType["BIPOLAR"]), # extra word of padding - ([1, 1680], [1, 1680], 240, 70, DataType["BIPOLAR"]), # extra word of padding - ([1, 1680], [1, 1680], 35, 280, DataType["BIPOLAR"]), # extra word of padding - ([1, 1680], [1, 1680], 280, 35, DataType["BIPOLAR"]), # extra word of padding - # requires LCM for old version - ([1, 42], [1, 42], 6, 14, DataType["BIPOLAR"]), # extra word of padding - ([1, 1239], [1, 1239], 21, 59, DataType["BIPOLAR"]), # extra word of padding - ([1, 1680], [1, 1680], 70, 240, DataType["BIPOLAR"]), # extra word of padding - ([1, 42], [1, 42], 14, 6, DataType["BIPOLAR"]), # extra word of padding - ([1, 1239], [1, 1239], 59, 21, DataType["BIPOLAR"]), # extra word of padding - ([1, 1680], [1, 1680], 240, 70, DataType["BIPOLAR"]), # extra word of padding - # conversion without needing LCMs - ([1, 180], [1, 180], 2, 18, DataType["BIPOLAR"]), # extra word of padding - ([1, 720], [1, 720], 8, 72, DataType["BIPOLAR"]), # extra word of padding - ([1, 2880], [1, 2880], 32, 288, DataType["BIPOLAR"]), # extra word of padding - ([1, 180], [1, 180], 18, 2, DataType["BIPOLAR"]), # extra word of padding - ([1, 720], [1, 720], 72, 8, DataType["BIPOLAR"]), # extra word of padding - ([1, 2880], [1, 2880], 288, 32, DataType["BIPOLAR"]), # extra word of padding - # passthrough - ([1, 100], [1, 100], 10, 10, DataType["BIPOLAR"]), # extra word of padding - ([1, 400], [1, 400], 40, 40, DataType["BIPOLAR"]), # extra word of padding - ([1, 1600], [1, 1600], 160, 160, DataType["BIPOLAR"]), # extra word of padding + # Standard DWC functionality: + ([1, 1, 24], [1, 1, 24], 6, 4, DataType["INT2"]), + ([1, 1, 24], [1, 1, 24], 4, 6, DataType["INT2"]), + ([1, 1, 4], [1, 1, 4], 2, 4, DataType["BIPOLAR"]), + ([1, 1, 4], [1, 1, 4], 4, 2, DataType["INT2"]), + ([1, 2, 8], [1, 2, 8], 4, 4, DataType["INT2"]), + ([1, 2, 8], [1, 2, 8], 8, 16, DataType["INT2"]), + # padding-specific tests: + ([1, 2, 2, 6 * 4], [1, 2, 2, 2 * 13], 4, 13, DataType["BIPOLAR"]), + ([1, 2, 2, 2 * 4], [1, 2, 2, 4 * 4], 4, 4, DataType["BIPOLAR"]), + ([1, 2, 2, 1 * 10], [1, 2, 2, 2 * 6], 10, 6, DataType["BIPOLAR"]), + ([1, 2, 2, 1 * 10], [1, 2, 2, 2 * 4], 10, 4, DataType["BIPOLAR"]), ], ) -@pytest.mark.parametrize("exec_mode", ["rtlsim", "cppsim"]) +@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) @pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado @@ -214,19 +116,6 @@ def test_fpgadataflow_dwc(config, exec_mode): y = oxe.execute_onnx(model, input_dict)["outp"] assert y.shape == tuple(out_shape), """The output shape is incorrect.""" - # remove padding if it was performed - y = y.reshape(1, np.prod(y.shape)) - x = x.reshape(1, np.prod(x.shape)) - - if y.shape[-1] > x.shape[-1]: - y = y[0, : x.shape[-1]] - else: - x = x[0, : y.shape[-1]] - - assert ( - y == x - ).all(), """The output values are not the same as the - input values anymore.""" model = model.transform(SpecializeLayers(test_fpga_part)) model = model.transform(GiveUniqueNodeNames()) @@ -244,10 +133,10 @@ def test_fpgadataflow_dwc(config, exec_mode): assert y.shape == tuple(out_shape), """The output shape is incorrect.""" - # remove padding if it was performed y = y.reshape(1, np.prod(y.shape)) x = x.reshape(1, np.prod(x.shape)) + # remove padding if it was performed if y.shape[-1] > x.shape[-1]: y = y[0, : x.shape[-1]] else: @@ -262,128 +151,4 @@ def test_fpgadataflow_dwc(config, exec_mode): ).all(), """The output values are not the same as the input values anymore.""" else: - assert True # we - - -@pytest.mark.parametrize( - "config", - [ - ([1, 840], [1, 840], 35, 120, DataType["BIPOLAR"]), # extra word of padding - ([1, 840], [1, 840], 120, 35, DataType["BIPOLAR"]), # extra word of padding - ([1, 1680], [1, 1680], 35, 280, DataType["BIPOLAR"]), # extra word of padding - ([1, 1680], [1, 1680], 280, 35, DataType["BIPOLAR"]), # extra word of padding - # requires LCM for old version - ([1, 42], [1, 42], 6, 14, DataType["BIPOLAR"]), # extra word of padding - ([1, 1239], [1, 1239], 21, 59, DataType["BIPOLAR"]), # extra word of padding - ([1, 1680], [1, 1680], 70, 240, DataType["BIPOLAR"]), # extra word of padding - ([1, 42], [1, 42], 14, 6, DataType["BIPOLAR"]), # extra word of padding - ([1, 1239], [1, 1239], 59, 21, DataType["BIPOLAR"]), # extra word of padding - ([1, 1680], [1, 1680], 240, 70, DataType["BIPOLAR"]), # extra word of padding - # conversion without needing LCMs - ([1, 180], [1, 180], 2, 18, DataType["BIPOLAR"]), # extra word of padding - ([1, 720], [1, 720], 8, 72, DataType["BIPOLAR"]), # extra word of padding - ([1, 2880], [1, 2880], 32, 288, DataType["BIPOLAR"]), # extra word of padding - ([1, 180], [1, 180], 18, 2, DataType["BIPOLAR"]), # extra word of padding - ([1, 720], [1, 720], 72, 8, DataType["BIPOLAR"]), # extra word of padding - ([1, 2880], [1, 2880], 288, 32, DataType["BIPOLAR"]), # extra word of padding - # passthrough - ([1, 100], [1, 100], 10, 10, DataType["BIPOLAR"]), # extra word of padding - ([1, 400], [1, 400], 40, 40, DataType["BIPOLAR"]), # extra word of padding - ([1, 1600], [1, 1600], 160, 160, DataType["BIPOLAR"]), # extra word of padding - ], -) -@pytest.mark.fpgadataflow -@pytest.mark.slow -@pytest.mark.parametrize("measure_resources", [True]) -@pytest.mark.parametrize("measure_functionality", [False]) -@pytest.mark.parametrize("measure_performance", [False]) -@pytest.mark.parametrize("test_type", ["new"]) -@pytest.mark.vivado -def test_fpgadataflow_dwc_stitched_rtlsim( - config, measure_resources, measure_functionality, measure_performance, test_type -): - in_shape, out_shape, inWidth, outWidth, finn_dtype = config - - test_fpga_part = "xc7z020clg400-1" - target_clk_ns = 4 - # generate input data - x = gen_finn_dt_tensor(finn_dtype, in_shape) - input_dict = prepare_inputs(x, finn_dtype) - - build_dir = os.environ["FINN_BUILD_DIR"] - - build_dir = build_dir + "/test_model/" - if not os.path.isdir(build_dir): - build_dir = make_build_dir(prefix="dwc_performance_testing_") - - model = make_single_dwc_modelwrapper(in_shape, out_shape, inWidth, outWidth, finn_dtype) - model = model.transform(SpecializeLayers(test_fpga_part)) - model_dir = f"{build_dir}/dwc_res_tests_{inWidth}_{outWidth}" - model.save(model_dir) - - final_output_dir = build_dir - - # Delete previous run results if exist - # if os.path.exists(final_output_dir): - # shutil.rmtree(final_output_dir) - # print("Previous run results deleted!") - - cfg = build.DataflowBuildConfig( - output_dir=final_output_dir, - mvau_wwidth_max=80, - target_fps=1000000, - synth_clk_period_ns=target_clk_ns, - board="Pynq-Z1", - # board = "U250", - shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ, - generate_outputs=[ - # build_cfg.DataflowOutputType.STITCHED_IP, - # build_cfg.DataflowOutputType.OOC_SYNTH, - build_cfg.DataflowOutputType.BITFILE, - # build_cfg.DataflowOutputType.PYNQ_DRIVER, - # build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE, - ], - ) - build.build_dataflow_cfg(model_dir, cfg) - - model.set_metadata_prop("rtlsim_so", "") - model.set_metadata_prop("exec_mode", "rtlsim") - res = post_synth_res_dwc(model, f"{final_output_dir}/report/post_synth_resources.xml") - res = res[""] - build_dir = os.environ["FINN_BUILD_DIR"] - build_dir += f"/dwc_performance_testing_{test_type}" - lut = res["LUT"] - ff = res["FF"] - target_clk = int(np.round(1000 / target_clk_ns)) - with open(f"{build_dir}/measurements.txt", "a+") as f: - f.writelines(f"{target_clk}\t{inWidth}\t{outWidth}\tnew_hls\t{lut}\t{ff}\n") - - # with open(f"{build_dir}_new_DWC_res.txt", 'a+') as f: - # f.write(res) # here filter to only what we care about - print(f"{target_clk}\t{inWidth}\t{outWidth}\tnew_hls\t{lut}\t{ff}\n") - - # assert True == False - - if measure_functionality: - y = oxe.execute_onnx(model, input_dict)["outp"] - - assert y.shape == tuple(out_shape), """The output shape is incorrect.""" - - # remove padding if it was performed - y = y.reshape(1, np.prod(y.shape)) - x = x.reshape(1, np.prod(x.shape)) - - if y.shape[-1] > x.shape[-1]: - y = y[0, : x.shape[-1]] - else: - x = x[0, : y.shape[-1]] - - assert ( - y == x - ).all(), """The output values are not the same as the - input values anymore.""" - - if measure_performance: - rtlsim_bs = 50 - res = throughput_test_rtlsim(model, rtlsim_bs) - print(f"Performance for {in_shape, out_shape,inWidth,outWidth} :", res) + assert True