diff --git a/docs/_static/first_stimuli.png b/docs/_static/first_stimuli.png new file mode 100755 index 00000000..c304bfd3 Binary files /dev/null and b/docs/_static/first_stimuli.png differ diff --git a/docs/_static/first_stimuli_fliped.png b/docs/_static/first_stimuli_fliped.png new file mode 100755 index 00000000..23c35600 Binary files /dev/null and b/docs/_static/first_stimuli_fliped.png differ diff --git a/docs/_static/second_stimuli.png b/docs/_static/second_stimuli.png new file mode 100755 index 00000000..4d93837c Binary files /dev/null and b/docs/_static/second_stimuli.png differ diff --git a/docs/python_api/cells.rst b/docs/python_api/cells.rst index 8dbef814..692cf4d8 100644 --- a/docs/python_api/cells.rst +++ b/docs/python_api/cells.rst @@ -19,6 +19,14 @@ Cells are organize with the following logic : .. figure:: ../_static/n2d2_cell_diagram.png :alt: Cell class diagram +Block +~~~~~ + +.. autoclass:: n2d2.cells.Block + :members: + :inherited-members: + + Sequence ~~~~~~~~ diff --git a/docs/python_api/example.rst b/docs/python_api/example.rst index 96537bea..10c45c83 100644 --- a/docs/python_api/example.rst +++ b/docs/python_api/example.rst @@ -8,6 +8,7 @@ If you find an example not up to date, please consider leaving an issue here : h .. toctree:: :maxdepth: 1 + example/data_augmentation example/performance_analysis example/load_onnx example/graph_manipulation diff --git a/docs/python_api/example/data_augmentation.rst b/docs/python_api/example/data_augmentation.rst new file mode 100644 index 00000000..1bfbe94e --- /dev/null +++ b/docs/python_api/example/data_augmentation.rst @@ -0,0 +1,139 @@ +Data augmentation +================= + +In this example, we will see how to use :py:class:`n2d2.provider.DataProvider` and :py:class:`n2d2.transform.Transformation` to load data and do some data augmentation. + +You can find the full python script here :download:`data_augmentation.py`. + +Preliminary +----------- + + +For this tutorial, we will use n2d2 for data augmentation, and numpy and matplotlib for the visualization. + +We will create a method plot_tensor to save the generated images from an :py:class:`n2d2.Tensor` + + +.. code-block:: + + import n2d2 + import matplotlib.pyplot as plt + + def plot_tensor(tensor, path): + plt.imshow(tensor[0,0,:], cmap='gray', vmin=0, vmax=255) + plt.savefig(path) + +Loading data +------------ + +We will begin by creating a :py:class:`n2d2.database.MNIST` driver to load the MNIST dataset. +We will then create a provider to get the images, we use a batch size of 1 to get only one image. + + +.. code-block:: + + database = n2d2.database.MNIST(data_path="/local/DATABASE/mnist", validation=0.1) + provider = n2d2.provider.DataProvider(database, [28, 28, 1], batch_size=1) + + +You can get the number of data per partition by using the method :py:meth:`n2d2.database.Database.get_partition_summary` which will print the paritionement of data. + +.. code-block:: + + database.get_partition_summary() + + +**Output :** + +.. testoutput:: + + Number of stimuli : 70000 + Learn : 54000 stimuli (77.14%) + Test : 10000 stimuli (14.29%) + Validation : 6000 stimuli (8.57%) + Unpartitioned : 0 stimuli (0.0%) + + +To select which partition you want to read from you need to use the method :py:meth:`n2d2.provider.DataProvider.set_partition` + +To read data from a :py:class:`n2d2.provider.DataProvider` you can use multiple methods. + +You can use the methods :py:meth:`n2d2.provider.DataProvider.read_batch` or :py:meth:`n2d2.provider.DataProvider.read_random_batch`. + + +.. note:: + + Since :py:class:`n2d2.provider.DataProvider` is an `iterable`, so you can use the ``next()`` function or a for loop ! + + .. code-block:: + + # for loop example + for data in provider: + pass + # next example + data = next(provider) + +For this tutorial we will use :py:meth:`n2d2.provider.DataProvider.read_batch` ! + +With this code we will get the first image and plot it : + +.. code-block:: + + image = provider.read_batch(idx=0).to_numpy() * 255 + plot_tensor(image, "first_stimuli.png") + + +.. figure:: /_static/first_stimuli.png + :alt: First stimuli of the MNIST dataset. + +Data augmentation +----------------- + +To do data augmentation with N2D2 we use :py:class:`n2d2.transform.Transformation`. +You can add transformation to provider with the method :py:meth:`n2d2.provider.DataProvider.add_on_the_fly_transformation` and :py:meth:`n2d2.provider.DataProvider.add_transformation`. + +.. warning:: + + Since we already loaded the first image the method :py:meth:`n2d2.provider.DataProvider.add_transformation` would not apply the transformation to the image. + +By using the transformation :py:class:`n2d2.transform.Flip` we will flip vertically our image. + +.. code-block:: + + provider.add_on_the_fly_transformation(n2d2.transform.Flip(vertical_flip=True)) + + image = provider.read_batch(idx=0).to_numpy() * 255 + plot_tensor(image, "first_stimuli_fliped.png") + + +.. figure:: /_static/first_stimuli_fliped.png + :alt: First stimuli of the MNIST dataset but flipped. + +We will negate the first transformation with another :py:class:`n2d2.transform.Flip` which we will add with the method :py:meth:`n2d2.provider.DataProvider.add_transformation`. + +.. code-block:: + + # negating the first transformation with another one + provider.add_transformation(n2d2.transform.Flip(vertical_flip=True)) + image = provider.read_batch(idx=1).to_numpy() * 255 + plot_tensor(image, "second_stimuli.png") + +.. figure:: /_static/second_stimuli.png + :alt: Second stimuli of the MNIST dataset. + + +Getting labels +-------------- + +To get the labels + +.. code-block:: + + + print("Second stimuli label : ", provider.get_labels()[0]) + +**Output :** + +.. testoutput:: + + Second stimuli label : 5 diff --git a/docs/python_api/export.rst b/docs/python_api/export.rst index 0ee0a5bf..0dd4b2f7 100644 --- a/docs/python_api/export.rst +++ b/docs/python_api/export.rst @@ -73,4 +73,16 @@ Example DEEPNET_CELL, nb_bits=8, export_nb_stimuli_max=-1, - calibration=-1) \ No newline at end of file + calibration=-1) + +Export CPP TensorRT +------------------- + +.. autofunction:: n2d2.export.export_tensor_rt + +Example +^^^^^^^ + +.. code-block:: python + + n2d2.export.export_tensor_rt(DEEPNET_CELL) \ No newline at end of file diff --git a/docs/quant/qat.rst b/docs/quant/qat.rst index 4ed83a95..a85f7ee5 100755 --- a/docs/quant/qat.rst +++ b/docs/quant/qat.rst @@ -1,4 +1,4 @@ -Quantization-Aware Training +[NEW] Quantization-Aware Training ================================== .. role:: raw-html(raw) :format: html @@ -567,7 +567,255 @@ Congratulations! Your ``resnet-18-v1`` model have now it's weights parameters an ONNX model : ResNet-18 Example - Python ############################################# -Coming soon. +In this example, we will do the same as in the previous section showcasing the python API. + +You can find the complete scrip for this tutorial here :download:`resnet18v1 quantization example`. + +Firstly, you need to retrieved the ``resnet18v1.onnx`` file that you can pick-up at https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet18v1/resnet18v1.onnx. +Or with the N2D2 script ``N2D2/tools/install_onnx_models.py`` that will automatically install a set of pre-trained ONNX models under your ``N2D2_MODELS`` system path. + +Once this is done, you can create a data provider for the dataset ``ILSVRC2012``. + +.. code-block:: python + + print("Create database") + database = n2d2.database.ILSVRC2012(learn=1.0, random_partitioning=True) + database.load(args.data_path, label_path=args.label_path) + print(database) + print("Create provider") + provider = n2d2.provider.DataProvider(database=database, size=[224, 224, 3], batch_size=batch_size) + print(provider) + +We will then do some pre-processing to the data-set. + +We use the :py:class:`n2d2.transform.Composite` to have a compact syntax and avoid multiple call to the method ``add_transformation``. + +.. code-block:: python + + print("Adding transformations") + transformations = n2d2.transform.Composite([ + n2d2.transform.ColorSpace("RGB"), + n2d2.transform.RangeAffine("Divides", 255.0), + n2d2.transform.RandomResizeCrop(224, 224, scale_min=0.2, scale_max=1.0, ratio_min=0.75, + ratio_max=1.33, apply_to="LearnOnly"), + n2d2.transform.Rescale(256, 256, keep_aspect_ratio=True, resize_to_fit=False, + apply_to="NoLearn"), + n2d2.transform.PadCrop(256, 256, apply_to="NoLearn"), + n2d2.transform.SliceExtraction(224, 224, offset_x=16, offset_y=16, apply_to="NoLearn"), + ]) + + print(transformations) + + flip_trans = n2d2.transform.Flip(apply_to="LearnOnly", random_horizontal_flip=True) + + provider.add_transformation(transformations) + provider.add_on_the_fly_transformation(flip_trans) + print(provider) + +Once this is done, we can import the ``resnet-18-v1`` ONNX model using :py:class:`n2d2.cells.DeepNetCell`. + +.. code-block:: python + + model = n2d2.cells.DeepNetCell.load_from_ONNX(provider, path_to_ONNX) + +Once the ONNX model is loaded, we will change the configuration of the :py:class:`n2d2.cells.Conv`, :py:class:`n2d2.cells.Fc` and :py:class:`n2d2.cells.BatchNorm2d` layers. +To do so, we will iterate through the layer of our model and check the type of the layer. +Then we will apply the wanted configuration for each cells. + +.. code-block:: python + + print("Updating cells ...") + + for cell in model: + ### Updating Conv Cells ### + if isinstance(cell, n2d2.cells.Conv): + # You need to replace weights filler before adding the quantizer. + cell.set_weights_filler( + n2d2.filler.Xavier( + variance_norm="FanOut", + scaling=1.0, + ), refill=True) + + if cell.has_bias(): + cell.refill_bias() + cell.quantizer = SATCell( + apply_scaling=False, + apply_quantization=False + ) + + cell.set_solver_parameter("learning_rate_policy", "CosineDecay") + cell.set_solver_parameter("learning_rate", 0.05) + cell.set_solver_parameter("momentum", 0.9) + cell.set_solver_parameter("decay", 0.00004) + cell.set_solver_parameter("max_iterations", 192175050) + cell.set_solver_parameter("iteration_size", 2) + + ### Updating Fc Cells ### + if isinstance(cell, n2d2.cells.Fc): + cell.set_weights_filler( + n2d2.filler.Xavier( + variance_norm="FanOut", + scaling=1.0, + ), refill=True) + cell.set_bias_filler( + n2d2.filler.Constant( + value=0.0, + ), refill=True) + + + cell.quantizer = SATCell( + apply_scaling=False, + apply_quantization=False + ) + cell.set_solver_parameter("learning_rate_policy", "CosineDecay") + cell.set_solver_parameter("learning_rate", 0.05) + cell.set_solver_parameter("momentum", 0.9) + cell.set_solver_parameter("decay", 0.00004) + cell.set_solver_parameter("max_iterations", 192175050) + cell.set_solver_parameter("iteration_size", 2) + + ### Updating BatchNorm Cells ### + if isinstance(cell, n2d2.cells.BatchNorm2d): + cell.set_solver_parameter("learning_rate_policy", "CosineDecay") + cell.set_solver_parameter("learning_rate", 0.05) + cell.set_solver_parameter("momentum", 0.9) + cell.set_solver_parameter("decay", 0.00004) + cell.set_solver_parameter("max_iterations", 192175050) + cell.set_solver_parameter("iteration_size", 2) + print("AFTER MODIFICATION :") + print(model) + +Once this is done, we will do a regular training loop and save weights every time we met a new best `precision` during the validation phase. +The clamped weights will be saved in a folder `resnet_weights_clamped`. + +.. code-block:: python + + softmax = n2d2.cells.Softmax(with_loss=True) + + loss_function = n2d2.target.Score(provider) + max_precision = -1 + print("\n### Training ###") + for epoch in range(nb_epochs): + provider.set_partition("Learn") + model.learn() + + print("\n# Train Epoch: " + str(epoch) + " #") + + for i in range(math.ceil(database.get_nb_stimuli('Learn') / batch_size)): + x = provider.read_random_batch() + x = model(x) + x = softmax(x) + x = loss_function(x) + + x.back_propagate() + x.update() + + print("Example: " + str(i * batch_size) + ", loss: " + + "{0:.3f}".format(x[0]), end='\r') + + print("\n### Validation ###") + + loss_function.clear_success() + + provider.set_partition('Validation') + model.test() + + for i in range(math.ceil(database.get_nb_stimuli('Validation') / batch_size)): + batch_idx = i * batch_size + + x = provider.read_batch(batch_idx) + x = model(x) + x = softmax(x) + x = loss_function(x) + + print("Validate example: " + str(i * batch_size) + ", val success: " + + "{0:.2f}".format(100 * loss_function.get_average_score(metric="Precision")) + "%", end='\r') + + print("\nPloting the network ...") + x.get_deepnet().draw_graph("./resnet18v1_clamped") + x.get_deepnet().log_stats("./resnet18v1_clamped_stats") + print("Saving weights !") + model.get_embedded_deepnet().export_network_free_parameters("resnet_weights_clamped") + + +Your `resnet-18-v1` model now have clamped weights ! + +Now we will change the ``quantizer`` objects to quantize the network et 4 bits (range=15). + +.. code-block:: python + + print("Updating cells") + + for cell in model: + ### Updating Rectifier ### + if isinstance(cell.activation, n2d2.activation.Rectifier): + cell.activation = n2d2.activation.Linear( + quantizer=SATAct( + range=15, + solver=n2d2.solver.SGD( + learning_rate_policy = "CosineDecay", + learning_rate=0.05, + momentum=0.9, + decay=0.00004, + max_iterations=115305030 + ))) + + if isinstance(cell, (n2d2.cells.Conv, n2d2.cells.Fc)): + cell.quantizer.set_quantization(True) + cell.quantizer.set_range(15) + + # The first and last cell are in 8 bits precision ! + model["resnetv15_conv0_fwd"].quantizer.set_range(255) + model["resnetv15_dense0_fwd"].quantizer.set_range(255) + +Once the ``quantizer`` objects have been updated we can run a new training loop to learn the quantized wieghts and activations. + +.. code-block:: python + + print("\n### Training ###") + for epoch in range(nb_epochs): + + provider.set_partition("Learn") + model.learn() + + print("\n# Train Epoch: " + str(epoch) + " #") + + for i in range(math.ceil(database.get_nb_stimuli('Learn') / batch_size)): + x = provider.read_random_batch() + x = model(x) + x = softmax(x) + x = loss_function(x) + + x.back_propagate() + x.update() + + print("Example: " + str(i * batch_size) + ", loss: " + + "{0:.3f}".format(x[0]), end='\r') + + print("\n### Validation ###") + + loss_function.clear_success() + + provider.set_partition('Validation') + model.test() + + for i in range(math.ceil(database.get_nb_stimuli('Validation') / batch_size)): + batch_idx = i * batch_size + + x = provider.read_batch(batch_idx) + x = model(x) + x = softmax(x) + x = loss_function(x) + + print("Validate example: " + str(i * batch_size) + ", val success: " + + "{0:.2f}".format(100 * loss_function.get_average_score(metric="Precision")) + "%", end='\r') + + x.get_deepnet().draw_graph("./resnet18v1_quant") + x.get_deepnet().log_stats("./resnet18v1_quant_stats") + model.get_embedded_deepnet().export_network_free_parameters("resnet_weights_SAT") + +You can look at your quantized weights in the newly created ``resnet_weights_SAT`` folder. + Hand-Made model : LeNet Example - INI File ############################################# @@ -760,6 +1008,7 @@ for example *weights_clamped* folder. Congratulations! Your ``LeNet`` model have clamped weights now ! You can check the results in your *weights_clamped* folder, for example check your *conv3_weights_quant.distrib.png* file : +.. _clamped weights: .. figure:: /_static/qat_lenet_clamp.png :alt: Clamp weights. @@ -853,6 +1102,7 @@ The final test accuracy should be close to 99%: Congratulations! Your ``LeNet`` model is now fully-quantized ! You can check the results in your *weights* folder, for example check your *conv3_weights_quant.distrib.png* file : +.. _quantize weights: .. figure:: /_static/qat_lenet_conv_q.png :alt: Quantized LeNet weights. @@ -878,6 +1128,7 @@ Results must be exactly the same than with batch-normalization. Moreover quantiz model ! You can check the results in the newly generated ``LeNet.ini.png`` graph : +.. _QAT without Batchnorm: .. figure:: /_static/qat_conv_nobn.png :alt: no batchnorm. @@ -886,7 +1137,304 @@ Moreover you can find your quantized weights and biases under the folder ``weigh Hand-Made model : LeNet Example - Python ############################################# -Coming soon. +Part 1 : Learn with clamped weights +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In this section, we will see how to apply the ``SAT`` quantization methodology using the python API. +We will apply the SAT quantization procedure in a handmade LeNet model. + +You can get the script used in this example by clicking here : :download:`LeNet quantization example`. + +The first step is to learn ``LeNet`` on ``MNIST`` database with clamped weights. + +Let's start by importing the folowing libraries and setting some global variables : + +.. code-block:: + + import n2d2 + import n2d2_ip + from n2d2.cells.nn import Dropout, Fc, Conv, Pool2d, BatchNorm2d + import math + + nb_epochs = 100 + batch_size = 256 + n2d2.global_variables.cuda_device = 2 + n2d2.global_variables.default_model = "Frame_CUDA" + +Let's create a ``database`` driver for MNIST, a ``dataprovider`` and apply ``transformation`` to the data. + +.. code-block:: python + + print("\n### Create database ###") + database = n2d2.database.MNIST(data_path=data_path, validation=0.1) + print(database) + print("\n### Create Provider ###") + provider = n2d2.provider.DataProvider(database, [32, 32, 1], batch_size=batch_size) + provider.add_transformation(n2d2.transform.Rescale(width=32, height=32)) + print(provider) + +In our example we decided to quantize every convolutions and fully-connected layers. +We will use the object :py:class:`n2d2.ConfigSection` to provide common parameters to the cells. + +.. note:: + We need to use a function that will generate a new config section object to avoid giving the same objects to the one we are configuring. + If we defined ``conv_conf`` as the ``solver_conf`` every ``Conv cells`` would have the same solver and quantizer object ! + +.. code-block:: python + + solver_conf = n2d2.ConfigSection( + learning_rate=0.05, + learning_rate_policy="None", + momentum=0.0, + decay=0.0, + ) + def conv_conf(): + return n2d2.ConfigSection( + activation=n2d2.activation.Linear(), + no_bias=True, + weights_solver=n2d2.solver.SGD(**solver_conf), + bias_solver=n2d2.solver.SGD(**solver_conf), + quantizer=n2d2_ip.quantizer.SATCell( + apply_scaling=False, # No scaling needed because each conv is followed by batch-normalization layers + apply_quantization=False, # Only clamp mode for the 1st step + ),) + def fc_conf(): + return n2d2.ConfigSection( + activation=n2d2.activation.Linear(), + no_bias=True, + weights_solver=n2d2.solver.SGD(**solver_conf), + bias_solver=n2d2.solver.SGD(**solver_conf), + quantizer=n2d2_ip.quantizer.SATCell( + apply_scaling=True, # Scaling needed for Full-Connected + apply_quantization=False, # Only clamp mode for the 1st step + ), + ) + def bn_conf(): + return n2d2.ConfigSection( + activation=n2d2.activation.Rectifier(), + scale_solver=n2d2.solver.SGD(**solver_conf), + bias_solver=n2d2.solver.SGD(**solver_conf), + ) + +Once we have defined the global parameters for each cell, we can define our ``LeNet`` model. + +.. code-block:: python + + print("\n### Loading Model ###") + model = n2d2.cells.Sequence([ + Conv(1, 6, kernel_dims=[5, 5], **conv_conf()), + BatchNorm2d(6, **bn_conf()), + Pool2d(pool_dims=[2, 2], stride_dims=[2, 2], pooling="Max"), + Conv(6, 16, [5, 5], **conv_conf()), + BatchNorm2d(16, **bn_conf()), + Pool2d(pool_dims=[2, 2], stride_dims=[2, 2], pooling="Max"), + Conv(16, 120, [5, 5], **conv_conf()), + Dropout(name="Conv3.Dropout"), + BatchNorm2d(120, **bn_conf()), + Fc(120, 84, **fc_conf()), + Dropout(name="Fc1.Dropout"), + Fc(84, 10, **fc_conf()), + ]) + print(model) + + softmax = n2d2.cells.Softmax(with_loss=True) + + loss_function = n2d2.target.Score(provider) + +The model defined, we can train it with a classic training loop : + +.. code-block:: python + + print("\n### Training ###") + for epoch in range(nb_epochs): + + provider.set_partition("Learn") + model.learn() + + print("\n# Train Epoch: " + str(epoch) + " #") + + for i in range(math.ceil(database.get_nb_stimuli('Learn')/batch_size)): + + x = provider.read_random_batch() + x = model(x) + x = softmax(x) + x = loss_function(x) + x.back_propagate() + x.update() + + print("Example: " + str(i * batch_size) + ", loss: " + + "{0:.3f}".format(x[0]), end='\r') + + + print("\n### Validation ###") + + loss_function.clear_success() + + provider.set_partition('Validation') + model.test() + + for i in range(math.ceil(database.get_nb_stimuli('Validation') / batch_size)): + batch_idx = i * batch_size + + x = provider.read_batch(batch_idx) + x = model(x) + x = softmax(x) + x = loss_function(x) + + print("Validate example: " + str(i * batch_size) + ", val success: " + + "{0:.2f}".format(100 * loss_function.get_average_success()) + "%", end='\r') + + + print("\n\n### Testing ###") + + provider.set_partition('Test') + model.test() + + for i in range(math.ceil(provider.get_database().get_nb_stimuli('Test')/batch_size)): + batch_idx = i*batch_size + + x = provider.read_batch(batch_idx) + x = model(x) + x = softmax(x) + x = loss_function(x) + + print("Example: " + str(i * batch_size) + ", test success: " + + "{0:.2f}".format(100 * loss_function.get_average_success()) + "%", end='\r') + + print("\n") + +Then, we can export the weights we have learned in order to use them for the second step. + +.. code-block:: python + + ### Exporting weights ### + x.get_deepnet().export_network_free_parameters("./weights_clamped") + +If you check the generated file : *conv3_weights_quant.distrib.png* you should see the `clamped weights`_. + +Part 2 : Quantized LeNet with SAT +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Now that we have learned clamped weights, we will quantize our network. + +You can get the script used in this example by clicking here : :download:`LeNet quantization example`. + +To do so, we will create a second script. We can begin by importing the ``MNIST`` database and create a ``dataprovider`` just like in the previous section. + +Then we will copy the :py:class:`n2d2.ConfigSection` from the previous section and add a quantizer argument. + +.. code-block:: python + + solver_conf = n2d2.ConfigSection( + learning_rate=0.05, + learning_rate_policy="None", + momentum=0.0, + decay=0.0, + ) + def conv_conf(): + return n2d2.ConfigSection( + activation=n2d2.activation.Linear(), + no_bias=True, + weights_solver=n2d2.solver.SGD(**solver_conf), + bias_solver=n2d2.solver.SGD(**solver_conf), + quantizer=n2d2_ip.quantizer.SATCell( + apply_scaling=False, + apply_quantization=True, # ApplyQuantization is now set to True + range=15, # Conv is now quantized in 4-bits range (2^4 - 1) + )) + def fc_conf(): + return n2d2.ConfigSection( + activation=n2d2.activation.Linear(), + no_bias=True, + weights_solver=n2d2.solver.SGD(**solver_conf), + bias_solver=n2d2.solver.SGD(**solver_conf), + quantizer=n2d2_ip.quantizer.SATCell( + apply_scaling=True, + apply_quantization=True, # ApplyQuantization is now set to True + range=15, # Fc is now quantized in 4-bits range (2^4 - 1) + )) + def bn_conf(): + return n2d2.ConfigSection( + activation=n2d2.activation.Linear( + quantizer=n2d2_ip.quantizer.SATAct( + alpha=6.0, + range=15, # -> 15 for 4-bits range (2^4-1) + )), + scale_solver=n2d2.solver.SGD(**solver_conf), + bias_solver=n2d2.solver.SGD(**solver_conf), + ) + +The configuration done, we will defined our new network. + +.. note:: + The first ``Convolution`` and last ``Fully Connected`` layer have differents parameters because we will quantize them in 8-bits instead of 4-bit as it is a common practice. + +.. code-block:: python + + ### Creating model ### + print("\n### Loading Model ###") + model = n2d2.cells.Sequence([ + Conv(1, 6, kernel_dims=[5, 5], + activation=n2d2.activation.Linear(), + no_bias=True, + weights_solver=n2d2.solver.SGD(**solver_conf), + bias_solver=n2d2.solver.SGD(**solver_conf), + quantizer=n2d2_ip.quantizer.SATCell( + apply_scaling=False, + apply_quantization=True, # ApplyQuantization is now set to True + range=255, # Conv_0 is now quantized in 8-bits range (2^8 - 1) + )), + BatchNorm2d(6, **bn_conf()), + Pool2d(pool_dims=[2, 2], stride_dims=[2, 2], pooling="Max"), + Conv(6, 16, [5, 5], **conv_conf()), + BatchNorm2d(16, **bn_conf()), + Pool2d(pool_dims=[2, 2], stride_dims=[2, 2], pooling="Max"), + Conv(16, 120, [5, 5], **conv_conf()), + Dropout(name="Conv3.Dropout"), + BatchNorm2d(120, **bn_conf()), + Fc(120, 84, **fc_conf()), + Dropout(name="Fc1.Dropout"), + Fc(84, 10, + activation=n2d2.activation.Linear(), + no_bias=True, + weights_solver=n2d2.solver.SGD(**solver_conf), + bias_solver=n2d2.solver.SGD(**solver_conf), + quantizer=n2d2_ip.quantizer.SATCell( + apply_scaling=True, + apply_quantization=True, # ApplyQuantization is now set to True + range=255, # Fc_1 is now quantized in 8-bits range (2^8 - 1) + )), + ]) + print(model) + +The model created we can import the learned parameter. + +.. code-block:: python + + # Importing the clamped weights + model.import_free_parameters("./weights_clamped", ignore_not_exists=True) + +The model is now ready for a training (you can use the training loop presented in the previous section). + +The training done, you can save the new quantized weights with the following line : + +.. code-block:: python + + ### Exporting weights ### + x.get_deepnet().export_network_free_parameters("./new_weights") + +If you check the generated file : *conv3_weights_quant.distrib.png* you should see the `quantize weights`_. + +You can fuse ``BatchNorm`` and ``Conv`` layers by using the following line : + +.. code-block:: python + + ### Fuse ### + n2d2_ip.quantizer.fuse_qat(x.get_deepnet(), provider, "NONE") + x.get_deepnet().draw_graph("./lenet_quant.py") + +You can check the generated file : *lenet_quant.py.png* which should looks like the fig `QAT without Batchnorm`_. + Results diff --git a/docs/requirements.txt b/docs/requirements.txt index 8cf19e13..8a8bfdb3 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,3 @@ sphinxcontrib-bibtex -breathe \ No newline at end of file +breathe +sphinx-pyreverse \ No newline at end of file diff --git a/include/utils/Helper.hpp b/include/utils/Helper.hpp index 849b7bbd..0073bfbc 100644 --- a/include/utils/Helper.hpp +++ b/include/utils/Helper.hpp @@ -76,6 +76,7 @@ namespace N2D2_HELPER{ int nbBits = 8; int calibration = 0; bool calibrationReload = false; + bool calibOnly = false; // TODO : these attributes are not used as default on parser (see Options ctor) WeightsApprox cRoundMode = weightsScalingMode("NONE"); WeightsApprox bRoundMode = weightsScalingMode("NONE"); @@ -122,6 +123,8 @@ namespace N2D2_HELPER{ void test(const Options&, std::shared_ptr&, bool); void importFreeParameters(const Options& opt, DeepNet& deepNet); bool generateExport(const Options&, std::shared_ptr&); + bool calibNetwork(const Options&, std::shared_ptr&); + void generateExportFromCalibration(const Options&, std::shared_ptr&, std::string=""); void findLearningRate(const Options&, std::shared_ptr&); void learn_epoch(const Options&, std::shared_ptr&); void learn(const Options&, std::shared_ptr&); diff --git a/python/examples/data_augmentation.py b/python/examples/data_augmentation.py new file mode 100644 index 00000000..c8f28338 --- /dev/null +++ b/python/examples/data_augmentation.py @@ -0,0 +1,29 @@ +import n2d2 +import matplotlib.pyplot as plt + +def plot_tensor(tensor, path): + plt.imshow(tensor[0,0,:], cmap='gray', vmin=0, vmax=255) + plt.savefig(path) + +database = n2d2.database.MNIST(data_path="/local/DATABASE/mnist", validation=0.1) +provider = n2d2.provider.DataProvider(database, [28, 28, 1], batch_size=1) + +database.get_partition_summary() + +image = provider.read_batch(idx=0).to_numpy() * 255 +# next function work too ! +# image = next(provider).to_numpy() * 255 + +plot_tensor(image, "first_stimuli.png") + +# Note : add_transformation would not have change the image as it has already been loaded +provider.add_on_the_fly_transformation(n2d2.transform.Flip(vertical_flip=True)) + +image = provider.read_batch(idx=0).to_numpy() * 255 +plot_tensor(image, "first_stimuli_fliped.png") + +# negating the first transformation with another one +provider.add_transformation(n2d2.transform.Flip(vertical_flip=True)) +image = provider.read_batch(idx=1).to_numpy() * 255 +plot_tensor(image, "second_stimuli.png") +print("Second stimuli label :", provider.get_labels()[0]) \ No newline at end of file diff --git a/python/examples/keras_example.py b/python/examples/keras_example.py index f295b0e4..2a0f59ad 100644 --- a/python/examples/keras_example.py +++ b/python/examples/keras_example.py @@ -75,6 +75,11 @@ layers.Dense(num_classes, activation="softmax"), ] ) + +# Asking N2D2 to use GPU 0 +n2d2.global_variables.cuda_device = 0 +n2d2.global_variables.default_model = 'Frame_CUDA' + model = keras_interoperability.wrap(tf_model, batch_size=batch_size, for_export=True) @@ -101,9 +106,9 @@ provider.add_transformation(n2d2.transform.Rescale(width=28, height=28)) print(provider) - -# Generating C export -n2d2.export.export_c( +model.get_deepnet_cell().remove("dense") +# Generating CPP export +n2d2.export.export_cpp( model.get_deepnet_cell(), provider=provider, nb_bits=8, diff --git a/python/examples/lenet_onnx.py b/python/examples/lenet_onnx.py index b24b27f9..7af1d4ca 100644 --- a/python/examples/lenet_onnx.py +++ b/python/examples/lenet_onnx.py @@ -134,4 +134,4 @@ model.remove("18") # removing Softmax layer before export ! n2d2.export.export_cpp(model, nb_bits=8, calibration=1) -print(f"\nExceution time : {time.time()-t}s") \ No newline at end of file +print(f"\nExecution time : {time.time()-t}s") \ No newline at end of file diff --git a/python/examples/sat_lenet/quantizer_handmade_part1.py b/python/examples/sat_lenet/quantizer_handmade_part1.py new file mode 100644 index 00000000..8ffe432a --- /dev/null +++ b/python/examples/sat_lenet/quantizer_handmade_part1.py @@ -0,0 +1,166 @@ +""" + (C) Copyright 2021 CEA LIST. All Rights Reserved. + Contributor(s): Cyril MOINEAU (cyril.moineau@cea.fr) + + This software is governed by the CeCILL-C license under French law and + abiding by the rules of distribution of free software. You can use, + modify and/ or redistribute the software under the terms of the CeCILL-C + license as circulated by CEA, CNRS and INRIA at the following URL + "http://www.cecill.info". + + As a counterpart to the access to the source code and rights to copy, + modify and redistribute granted by the license, users are provided only + with a limited warranty and the software's author, the holder of the + economic rights, and the successive licensors have only limited + liability. + + The fact that you are presently reading this means that you have had + knowledge of the CeCILL-C license and that you accept its terms. +""" +### Import + global var ### +import n2d2 +import math +from n2d2.cells.nn import Dropout, Fc, Conv, Pool2d, BatchNorm2d +from n2d2_ip.quantizer import SATCell +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--data_path", type=str, help='Path to the MNIST Dataset') +args = parser.parse_args() + +nb_epochs = 100 +batch_size = 256 +n2d2.global_variables.cuda_device = 2 +n2d2.global_variables.default_model = "Frame_CUDA" + +print("\n### Create database ###") +database = n2d2.database.MNIST(data_path=args.data_path, validation=0.1) +print(database) + +print("\n### Create Provider ###") +provider = n2d2.provider.DataProvider(database, [32, 32, 1], batch_size=batch_size) +provider.add_transformation(n2d2.transform.Rescale(width=32, height=32)) +print(provider) + +### Configuration ### + +solver_conf = n2d2.ConfigSection( + learning_rate=0.05, + learning_rate_policy="None", + momentum=0.0, + decay=0.0, +) +def conv_conf(): + return n2d2.ConfigSection( + activation=n2d2.activation.Linear(), + no_bias=True, + weights_solver=n2d2.solver.SGD(**solver_conf), + bias_solver=n2d2.solver.SGD(**solver_conf), + quantizer=SATCell( + apply_scaling=False, # No scaling needed because each conv is followed by batch-normalization layers + apply_quantization=False, # Only clamp mode for the 1st step + ),) +def fc_conf(): + return n2d2.ConfigSection( + activation=n2d2.activation.Linear(), + no_bias=True, + weights_solver=n2d2.solver.SGD(**solver_conf), + bias_solver=n2d2.solver.SGD(**solver_conf), + quantizer=SATCell( + apply_scaling=True, # Scaling needed for Full-Connected + apply_quantization=False, # Only clamp mode for the 1st step + ), + ) +def bn_conf(): + return n2d2.ConfigSection( + activation=n2d2.activation.Rectifier(), + scale_solver=n2d2.solver.SGD(**solver_conf), + bias_solver=n2d2.solver.SGD(**solver_conf), + ) + +### Creating model ### +print("\n### Loading Model ###") +model = n2d2.cells.Sequence([ + Conv(1, 6, kernel_dims=[5, 5], **conv_conf()), + BatchNorm2d(6, **bn_conf()), + Pool2d(pool_dims=[2, 2], stride_dims=[2, 2], pooling="Max"), + Conv(6, 16, [5, 5], **conv_conf()), + BatchNorm2d(16, **bn_conf()), + Pool2d(pool_dims=[2, 2], stride_dims=[2, 2], pooling="Max"), + Conv(16, 120, [5, 5], **conv_conf()), + Dropout(name="Conv3.Dropout"), + BatchNorm2d(120, **bn_conf()), + Fc(120, 84, **fc_conf()), + Dropout(name="Fc1.Dropout"), + Fc(84, 10, **fc_conf()), +]) +print(model) + +softmax = n2d2.cells.Softmax(with_loss=True) + +loss_function = n2d2.target.Score(provider) + +print("\n### Training ###") +for epoch in range(nb_epochs): + + provider.set_partition("Learn") + model.learn() + + print("\n# Train Epoch: " + str(epoch) + " #") + + for i in range(math.ceil(database.get_nb_stimuli('Learn')/batch_size)): + + x = provider.read_random_batch() + x = model(x) + x = softmax(x) + x = loss_function(x) + + x.back_propagate() + x.update() + + print("Example: " + str(i * batch_size) + ", loss: " + + "{0:.3f}".format(x[0]), end='\r') + + + print("\n### Validation ###") + + loss_function.clear_success() + + provider.set_partition('Validation') + model.test() + + for i in range(math.ceil(database.get_nb_stimuli('Validation') / batch_size)): + batch_idx = i * batch_size + + x = provider.read_batch(batch_idx) + x = model(x) + x = softmax(x) + x = loss_function(x) + + print("Validate example: " + str(i * batch_size) + ", val success: " + + "{0:.2f}".format(100 * loss_function.get_average_success()) + "%", end='\r') + + +print("\n\n### Testing ###") + +provider.set_partition('Test') +model.test() + +for i in range(math.ceil(provider.get_database().get_nb_stimuli('Test')/batch_size)): + batch_idx = i*batch_size + + x = provider.read_batch(batch_idx) + x = model(x) + x = softmax(x) + x = loss_function(x) + + print("Example: " + str(i * batch_size) + ", test success: " + + "{0:.2f}".format(100 * loss_function.get_average_success()) + "%", end='\r') + +print("\n") + +### Exporting weights ### +x.get_deepnet().export_network_free_parameters("./weights_clamped") + + + diff --git a/python/examples/sat_lenet/quantizer_handmade_part2.py b/python/examples/sat_lenet/quantizer_handmade_part2.py new file mode 100644 index 00000000..655a11b8 --- /dev/null +++ b/python/examples/sat_lenet/quantizer_handmade_part2.py @@ -0,0 +1,193 @@ +""" + (C) Copyright 2021 CEA LIST. All Rights Reserved. + Contributor(s): Cyril MOINEAU (cyril.moineau@cea.fr) + + This software is governed by the CeCILL-C license under French law and + abiding by the rules of distribution of free software. You can use, + modify and/ or redistribute the software under the terms of the CeCILL-C + license as circulated by CEA, CNRS and INRIA at the following URL + "http://www.cecill.info". + + As a counterpart to the access to the source code and rights to copy, + modify and redistribute granted by the license, users are provided only + with a limited warranty and the software's author, the holder of the + economic rights, and the successive licensors have only limited + liability. + + The fact that you are presently reading this means that you have had + knowledge of the CeCILL-C license and that you accept its terms. +""" +### Import + global var ### +import n2d2 +import math +from n2d2.cells.nn import Dropout, Fc, Conv, Pool2d, BatchNorm2d +from n2d2_ip.quantizer import SATCell, SATAct, fuse_qat + +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--data_path", type=str, help='Path to the MNIST Dataset') +args = parser.parse_args() + +nb_epochs = 100 +batch_size = 256 +n2d2.global_variables.cuda_device = 2 +n2d2.global_variables.default_model = "Frame_CUDA" + +print("\n### Create database ###") +database = n2d2.database.MNIST(data_path=args.data_path, validation=0.1) +print(database) + +print("\n### Create Provider ###") +provider = n2d2.provider.DataProvider(database, [32, 32, 1], batch_size=batch_size) +provider.add_transformation(n2d2.transform.Rescale(width=32, height=32)) +print(provider) + + +### Configuration ### + +solver_conf = n2d2.ConfigSection( + learning_rate=0.05, + learning_rate_policy="None", + momentum=0.0, + decay=0.0, +) +def conv_conf(): + return n2d2.ConfigSection( + activation=n2d2.activation.Linear(), + no_bias=True, + weights_solver=n2d2.solver.SGD(**solver_conf), + bias_solver=n2d2.solver.SGD(**solver_conf), + quantizer=SATCell( + apply_scaling=False, + apply_quantization=True, # ApplyQuantization is now set to True + range=15, # Conv is now quantized in 4-bits range (2^4 - 1) + )) +def fc_conf(): + return n2d2.ConfigSection( + activation=n2d2.activation.Linear(), + no_bias=True, + weights_solver=n2d2.solver.SGD(**solver_conf), + bias_solver=n2d2.solver.SGD(**solver_conf), + quantizer=SATCell( + apply_scaling=True, + apply_quantization=True, # ApplyQuantization is now set to True + range=15, # Fc is now quantized in 4-bits range (2^4 - 1) + )) +def bn_conf(): + return n2d2.ConfigSection( + activation=n2d2.activation.Linear( + quantizer=SATAct( + alpha=6.0, + range=15, # -> 15 for 4-bits range (2^4-1) + )), + scale_solver=n2d2.solver.SGD(**solver_conf), + bias_solver=n2d2.solver.SGD(**solver_conf), + ) + +### Creating model ### +print("\n### Loading Model ###") +model = n2d2.cells.Sequence([ + Conv(1, 6, kernel_dims=[5, 5], + activation=n2d2.activation.Linear(), + no_bias=True, + weights_solver=n2d2.solver.SGD(**solver_conf), + bias_solver=n2d2.solver.SGD(**solver_conf), + quantizer=SATCell( + apply_scaling=False, + apply_quantization=True, # ApplyQuantization is now set to True + range=255, # Conv_0 is now quantized in 8-bits range (2^8 - 1) + )), + BatchNorm2d(6, **bn_conf()), + Pool2d(pool_dims=[2, 2], stride_dims=[2, 2], pooling="Max"), + Conv(6, 16, [5, 5], **conv_conf()), + BatchNorm2d(16, **bn_conf()), + Pool2d(pool_dims=[2, 2], stride_dims=[2, 2], pooling="Max"), + Conv(16, 120, [5, 5], **conv_conf()), + Dropout(name="Conv3.Dropout"), + BatchNorm2d(120, **bn_conf()), + Fc(120, 84, **fc_conf()), + Dropout(name="Fc1.Dropout"), + Fc(84, 10, + activation=n2d2.activation.Linear(), + no_bias=True, + weights_solver=n2d2.solver.SGD(**solver_conf), + bias_solver=n2d2.solver.SGD(**solver_conf), + quantizer=SATCell( + apply_scaling=True, + apply_quantization=True, # ApplyQuantization is now set to True + range=255, # Fc_1 is now quantized in 8-bits range (2^8 - 1) + )), +]) +print(model) + +# Importing the clamped weights +model.import_free_parameters("./weights_clamped", ignore_not_exists=True) + +softmax = n2d2.cells.Softmax(with_loss=True) + +loss_function = n2d2.target.Score(provider) + +print("\n### Training ###") +for epoch in range(nb_epochs): + + provider.set_partition("Learn") + model.learn() + + print("\n# Train Epoch: " + str(epoch) + " #") + + for i in range(math.ceil(database.get_nb_stimuli('Learn')/batch_size)): + + x = provider.read_random_batch() + x = model(x) + x = softmax(x) + x = loss_function(x) + x.back_propagate() + x.update() + + print("Example: " + str(i * batch_size) + ", loss: " + + "{0:.3f}".format(x[0]), end='\r') + + + print("\n### Validation ###") + + loss_function.clear_success() + + provider.set_partition('Validation') + model.test() + + for i in range(math.ceil(database.get_nb_stimuli('Validation') / batch_size)): + batch_idx = i * batch_size + + x = provider.read_batch(batch_idx) + x = model(x) + x = softmax(x) + x = loss_function(x) + print("Validate example: " + str(i * batch_size) + ", val success: " + + "{0:.2f}".format(100 * loss_function.get_average_success()) + "%", end='\r') + + +print("\n\n### Testing ###") + +provider.set_partition('Test') +model.test() + +for i in range(math.ceil(provider.get_database().get_nb_stimuli('Test')/batch_size)): + batch_idx = i*batch_size + + x = provider.read_batch(batch_idx) + x = model(x) + x = softmax(x) + x = loss_function(x) + print("Example: " + str(i * batch_size) + ", test success: " + + "{0:.2f}".format(100 * loss_function.get_average_success()) + "%", end='\r') + +print("\n") + +### Fuse ### +fuse_qat(x.get_deepnet(), provider, "NONE") + +### Exporting weights ### +x.get_deepnet().export_network_free_parameters("./new_weights") + +x.get_deepnet().draw_graph("./lenet_quant.py") diff --git a/python/examples/sat_qat_resnet-18.py b/python/examples/sat_qat_resnet-18.py new file mode 100644 index 00000000..ddbdd20e --- /dev/null +++ b/python/examples/sat_qat_resnet-18.py @@ -0,0 +1,256 @@ +""" + (C) Copyright 2021 CEA LIST. All Rights Reserved. + Contributor(s): Cyril MOINEAU (cyril.moineau@cea.fr) + + This software is governed by the CeCILL-C license under French law and + abiding by the rules of distribution of free software. You can use, + modify and/ or redistribute the software under the terms of the CeCILL-C + license as circulated by CEA, CNRS and INRIA at the following URL + "http://www.cecill.info". + + As a counterpart to the access to the source code and rights to copy, + modify and redistribute granted by the license, users are provided only + with a limited warranty and the software's author, the holder of the + economic rights, and the successive licensors have only limited + liability. + + The fact that you are presently reading this means that you have had + knowledge of the CeCILL-C license and that you accept its terms. +""" + +""" +This file contain an example of the usage of the quantization. +We want to quantize a ResNet-18 ONNX model with 1-bits weights and 4-bits activations using the SAT quantization method. +Source to the ONNX file : https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet18v1/resnet18v1.onnx +""" + +### Import + global var ### +import n2d2 +from n2d2_ip.quantizer import SATCell, SATAct +import math +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--data_path", type=str, help='Path to the ILSVRC2012 dataset') +parser.add_argument("--label_path", type=str, help='Path to the ILSVRC2012 labels') +args = parser.parse_args() + + +nb_epochs = 100 +batch_size = 128 +n2d2.global_variables.cuda_device = 2 +n2d2.global_variables.default_model = "Frame_CUDA" +### Creating a database driver ### + +print("Create database") +database = n2d2.database.ILSVRC2012(learn=1.0, random_partitioning=True) +database.load(args.data_path, label_path=args.label_path) +print(database) +print("Create provider") +provider = n2d2.provider.DataProvider(database=database, size=[224, 224, 3], batch_size=batch_size) +print(provider) + +### Applying Transformations ### +print("Adding transformations") +transformations = n2d2.transform.Composite([ + n2d2.transform.ColorSpace("RGB"), + n2d2.transform.RangeAffine("Divides", 255.0), + n2d2.transform.RandomResizeCrop(224, 224, scale_min=0.2, scale_max=1.0, ratio_min=0.75, + ratio_max=1.33, apply_to="LearnOnly"), + n2d2.transform.Rescale(256, 256, keep_aspect_ratio=True, resize_to_fit=False, + apply_to="NoLearn"), + n2d2.transform.PadCrop(256, 256, apply_to="NoLearn"), + n2d2.transform.SliceExtraction(224, 224, offset_x=16, offset_y=16, apply_to="NoLearn"), +]) + +print(transformations) + +flip_trans = n2d2.transform.Flip(apply_to="LearnOnly", random_horizontal_flip=True) + +provider.add_transformation(transformations) +provider.add_on_the_fly_transformation(flip_trans) + +print(provider) + +### Loading ONNX ### + +model = n2d2.cells.DeepNetCell.load_from_ONNX(provider, "./resnet18v1.onnx") + + +print("BEFORE MODIFICATION :") +print(model) +### Updating DeepNet parameters ### + +print("Updating cells ...") + +for cell in model: + ### Updating Conv Cells ### + if isinstance(cell, n2d2.cells.Conv): + # You need to replace weights filler before adding the quantizer. + cell.set_weights_filler( + n2d2.filler.Xavier( + variance_norm="FanOut", + scaling=1.0, + ), refill=True) + + if cell.has_bias(): + cell.refill_bias() + cell.quantizer = SATCell( + apply_scaling=False, + apply_quantization=False + ) + + cell.set_solver_parameter("learning_rate_policy", "CosineDecay") + cell.set_solver_parameter("learning_rate", 0.05) + cell.set_solver_parameter("momentum", 0.9) + cell.set_solver_parameter("decay", 0.00004) + cell.set_solver_parameter("max_iterations", 192175050) + cell.set_solver_parameter("iteration_size", 2) + + ### Updating Fc Cells ### + if isinstance(cell, n2d2.cells.Fc): + cell.set_weights_filler( + n2d2.filler.Xavier( + variance_norm="FanOut", + scaling=1.0, + ), refill=True) + cell.set_bias_filler( + n2d2.filler.Constant( + value=0.0, + ), refill=True) + + + cell.quantizer = SATCell( + apply_scaling=False, + apply_quantization=False + ) + cell.set_solver_parameter("learning_rate_policy", "CosineDecay") + cell.set_solver_parameter("learning_rate", 0.05) + cell.set_solver_parameter("momentum", 0.9) + cell.set_solver_parameter("decay", 0.00004) + cell.set_solver_parameter("max_iterations", 192175050) + cell.set_solver_parameter("iteration_size", 2) + + ### Updating BatchNorm Cells ### + if isinstance(cell, n2d2.cells.BatchNorm2d): + cell.set_solver_parameter("learning_rate_policy", "CosineDecay") + cell.set_solver_parameter("learning_rate", 0.05) + cell.set_solver_parameter("momentum", 0.9) + cell.set_solver_parameter("decay", 0.00004) + cell.set_solver_parameter("max_iterations", 192175050) + cell.set_solver_parameter("iteration_size", 2) +print("AFTER MODIFICATION :") +print(model) + +softmax = n2d2.cells.Softmax(with_loss=True) + +loss_function = n2d2.target.Score(provider) + +print("\n### Training ###") +for epoch in range(nb_epochs): + provider.set_partition("Learn") + model.learn() + + print("\n# Train Epoch: " + str(epoch) + " #") + + for i in range(math.ceil(database.get_nb_stimuli('Learn') / batch_size)): + x = provider.read_random_batch() + x = model(x) + x = softmax(x) + x = loss_function(x) + + x.back_propagate() + x.update() + + print("Example: " + str(i * batch_size) + ", loss: " + + "{0:.3f}".format(x[0]), end='\r') + + print("\n### Validation ###") + + loss_function.clear_success() + + provider.set_partition('Validation') + model.test() + + for i in range(math.ceil(database.get_nb_stimuli('Validation') / batch_size)): + batch_idx = i * batch_size + + x = provider.read_batch(batch_idx) + x = model(x) + x = softmax(x) + x = loss_function(x) + + print("Validate example: " + str(i * batch_size) + ", val success: " + + "{0:.2f}".format(100 * loss_function.get_average_score(metric="Precision")) + "%", end='\r') + +print("\nPloting the network ...") +x.get_deepnet().draw_graph("./resnet18v1_clamped") +x.get_deepnet().log_stats("./resnet18v1_clamped_stats") +print("Saving weights !") +model.get_embedded_deepnet().export_network_free_parameters("resnet_weights_clamped") + +print("Updating cells") + +for cell in model: + ### Updating Rectifier ### + if isinstance(cell.activation, n2d2.activation.Rectifier): + cell.activation = n2d2.activation.Linear( + quantizer=SATAct( + range=15, + solver=n2d2.solver.SGD( + learning_rate_policy = "CosineDecay", + learning_rate=0.05, + momentum=0.9, + decay=0.00004, + max_iterations=115305030 + ))) + + if isinstance(cell, (n2d2.cells.Conv, n2d2.cells.Fc)): + cell.quantizer.set_quantization(True) + cell.quantizer.set_range(15) + +# The first and last cell are in full precision ! +model["resnetv15_conv0_fwd"].quantizer.set_range(255) +model["resnetv15_dense0_fwd"].quantizer.set_range(255) + +print("\n### Training ###") +for epoch in range(nb_epochs): + + provider.set_partition("Learn") + model.learn() + + print("\n# Train Epoch: " + str(epoch) + " #") + + for i in range(math.ceil(database.get_nb_stimuli('Learn') / batch_size)): + x = provider.read_random_batch() + x = model(x) + x = softmax(x) + x = loss_function(x) + + x.back_propagate() + x.update() + + print("Example: " + str(i * batch_size) + ", loss: " + + "{0:.3f}".format(x[0]), end='\r') + + print("\n### Validation ###") + + loss_function.clear_success() + + provider.set_partition('Validation') + model.test() + + for i in range(math.ceil(database.get_nb_stimuli('Validation') / batch_size)): + batch_idx = i * batch_size + + x = provider.read_batch(batch_idx) + x = model(x) + x = softmax(x) + x = loss_function(x) + + print("Validate example: " + str(i * batch_size) + ", val success: " + + "{0:.2f}".format(100 * loss_function.get_average_score(metric="Precision")) + "%", end='\r') + +x.get_deepnet().draw_graph("./resnet18v1_quant") +x.get_deepnet().log_stats("./resnet18v1_quant_stats") +model.get_embedded_deepnet().export_network_free_parameters("resnet_weights_SAT") diff --git a/python/keras_interoperability/keras_interface.py b/python/keras_interoperability/keras_interface.py index 31c1780c..ec25ee5d 100644 --- a/python/keras_interoperability/keras_interface.py +++ b/python/keras_interoperability/keras_interface.py @@ -275,7 +275,7 @@ def wrap(tf_model: keras.Sequential, batch_size: int, name: str=None, for_export spec = [tf.TensorSpec(inputs_shape, tf.float32, name=input_name) for input_name in input_names] - with ContextNoBatchNormFuse() as ctx: + with ContextNoBatchNormFuse() as ctx: tf2onnx.convert.from_keras( tf_model, input_signature=spec, @@ -290,6 +290,10 @@ def wrap(tf_model: keras.Sequential, batch_size: int, name: str=None, for_export # assert check, "Simplified ONNX model could not be validated" # onnx.save(model_simp, model_name + ".onnx") + if n2d2.global_variables.cuda_compiled: + # Making sure Keras did not changed the device ! + n2d2.global_variables.cuda_device = n2d2.global_variables.cuda_device + database = n2d2.database.Database() if len(inputs_shape) == 4: diff --git a/python/n2d2/cells/cell.py b/python/n2d2/cells/cell.py index 73fcc546..56590319 100755 --- a/python/n2d2/cells/cell.py +++ b/python/n2d2/cells/cell.py @@ -116,6 +116,16 @@ def __getitem__(self, item): return self.get_cell(item) raise n2d2.error_handler.WrongInputType("item", type(item), ["str"]) + def is_integral(self): + """ + Check if the parameters of every cell have an integral precision. + """ + for cell in self._cells.values(): + # mQuantizedNbBits is initialize to 0 + if "quantizer" in cell._parameters.keys() and cell.N2D2().getQuantizedNbBits() <= 0: + return False + return True + def get_cells(self): """ Returns dictionary with all cells that are not Blocks (i.e. NeuralNetworkCells). This allows @@ -212,7 +222,7 @@ def items(self): class Iterable(Block, ABC): """ This abstract class describes a Block object with order, i.e. an array/list-like object. - It implements several methods of python lists. The __call__ method is implicitly defined by the order + It implements several methods of python lists. The ``__call__`` method is implicitly defined by the order of the list. """ @abstractmethod @@ -294,7 +304,6 @@ def to_deepnet_cell(self, provider, target=None): """ if not isinstance(provider, n2d2.provider.DataProvider): raise n2d2.error_handler.WrongInputType("provider", type(provider), ["n2d2.provider.DataProvider"]) - # dummy_input = provider.read_random_batch() dummy_input = n2d2.Tensor(provider.shape()) provider._deepnet = n2d2.deepnet.DeepNet() @@ -350,7 +359,7 @@ def __call__(self, x): class DeepNetCell(Block): """ n2d2 wrapper for a N2D2 deepnet object. Allows chaining a N2D2 deepnet (for example loaded from a ONNX or INI file) - into the dynamic computation graph of the n2d2 API. During each use of the the __call__ method, + into the dynamic computation graph of the n2d2 API. During each use of the the ``__call__`` method, the N2D2 deepnet is converted to a n2d2 representation and the N2D2 deepnet is concatenated to the deepnet of the incoming tensor object. The object is manipulated with the bound methods of the N2D2 DeepNet object, and its computation graph is @@ -387,9 +396,9 @@ def load_from_ONNX(cls, provider, model_path, ini_file=None, ignore_cells=None): :param provider: Provider object to base deepnet upon :type provider: :py:class:`n2d2.provider.DataProvider` - :param model_path: Path to the model. + :param model_path: Path to the ``onnx`` model. :type model_path: str - :param ini_file: Path to an optional .ini file with additional onnx import instructions + :param ini_file: Path to an optional ``.ini`` file with additional onnx import instructions :type ini_file: str :param ignore_cells: List of cells name to ignore, default=None :type ignore_cells: list, optional @@ -418,7 +427,7 @@ def load_from_ONNX(cls, provider, model_path, ini_file=None, ignore_cells=None): def load_from_INI(cls, path): """Load a deepnet from an INI file. - :param model_path: Path to the ini file. + :param model_path: Path to the ``ini`` file. :type model_path: str """ n2d2_deepnet = N2D2.DeepNetGenerator.generateFromINI(n2d2.global_variables.default_net, path) @@ -506,7 +515,7 @@ def remove(self, name:str, reconnect:bool=True)->None: """Remove a cell from the encapsulated deepnet. :param name: Name of cell that shall be removed. :type name: str - :param reconnect: If `True`, reconnects the parents with the child of the removed cell, default=True + :param reconnect: If ``True``, reconnects the parents with the child of the removed cell, default=True :type reconnect: bool, optional """ self._embedded_deepnet.remove(name, reconnect) @@ -587,17 +596,17 @@ def run_test(self, log = 1000, report = 100, test_index = -1, test_id = -1, :type test_index: int, optional :param test_id: Test a single specific stimulus ID (takes precedence over `test_index`), default=-1 :type test_id: int, optional - :param qat_sat: Fuse a QAT trained with SAT method, default=False + :param qat_sat: Fuse a QAT trained model with the SAT method, default=False :type qat_sat: bool, optional :param log_kernels: Log kernels after learning, default=False :type log_kernels: bool, optional - :param wt_round_mode: Weights clipping mode on export, can be `NONE`,`RINTF`, default="NONE" + :param wt_round_mode: Weights clipping mode on export, can be ``NONE``, ``RINTF``, default="NONE" :type wt_round_mode: str, optional - :param b_round_mode: Biases clipping mode on export, can be `NONE`,`RINTF`, default="NONE" + :param b_round_mode: Biases clipping mode on export, can be ``NONE``, ``RINTF``, default="NONE" :type b_round_mode: str, optional - :param c_round_mode: Clip clipping mode on export, can be `NONE`,`RINTF`, default="NONE" + :param c_round_mode: Clip clipping mode on export, can be ``NONE``,``RINTF``, default="NONE" :type c_round_mode: str, optional - :param act_scaling_mode: activation scaling mode on export, can be `NONE`, `FLOAT_MULT`, `FIXED_MULT16`, `SINGLE_SHIFT` or `DOUBLE_SHIFT`, default="FLOAT_MULT" + :param act_scaling_mode: activation scaling mode on export, can be ``NONE``, ``FLOAT_MULT``, ``FIXED_MULT16``, ``SINGLE_SHIFT`` or ``DOUBLE_SHIFT``, default="FLOAT_MULT" :type act_scaling_mode: str, optional :param log_JSON: If ``True``, log JSON annotations, default=False :type log_JSON: bool, optional diff --git a/python/n2d2/cells/nn/abstract_cell.py b/python/n2d2/cells/nn/abstract_cell.py index 4d0ab6fa..a3e69093 100644 --- a/python/n2d2/cells/nn/abstract_cell.py +++ b/python/n2d2/cells/nn/abstract_cell.py @@ -282,7 +282,7 @@ def get_activation(self): return self._config_parameters['activation'] return None - def get_inputs(self): + def get_input_cells(self): return self._input_cells def clear_input(self): @@ -334,9 +334,9 @@ def _sync_inputs_and_parents(self): def __str__(self): output = "\'" + self.get_name() + "\' " + self.get_type() + "(" + self._model_key + ")" output += N2D2_Interface.__str__(self) - if len(self.get_inputs()) > 0: + if len(self.get_input_cells()) > 0: output += "([" - for idx, name in enumerate(self.get_inputs()): + for idx, name in enumerate(self.get_input_cells()): if idx > 0: output += ", " output += "'" + name + "'" diff --git a/python/n2d2/deepnet.py b/python/n2d2/deepnet.py index 419fa4b1..ec861a1d 100755 --- a/python/n2d2/deepnet.py +++ b/python/n2d2/deepnet.py @@ -25,7 +25,7 @@ import n2d2.global_variables import n2d2.cells.nn from n2d2.n2d2_interface import N2D2_Interface - +from n2d2.utils import generate_name """ """ class DeepNet(N2D2_Interface): @@ -33,11 +33,12 @@ class DeepNet(N2D2_Interface): _convention_converter= n2d2.ConventionConverter({ "name": "Name", }) + def __init__(self, **config_parameters): N2D2_Interface.__init__(self, **config_parameters) - self._config_parameters['name'] = "DeepNet(id=" + str(id(self)) + ")" + self._config_parameters['name'] = generate_name(self) self._set_N2D2_object(N2D2.DeepNet(n2d2.global_variables.default_net)) self._set_N2D2_parameters(self._config_parameters) diff --git a/python/n2d2/export.py b/python/n2d2/export.py index 60d45ea3..57cf27e0 100644 --- a/python/n2d2/export.py +++ b/python/n2d2/export.py @@ -20,6 +20,53 @@ import n2d2 import N2D2 +from os import mkdir +from os.path import exists + +# This is the default docstring for export. +# Parameters description can be override by the docstring defined inside the export function. +# The docstring header is always the one defined in the function ! +export_doc_string = \ +""" +:param deepnet_cell: The Neural network you want to export. +:type deepnet_cell: :py:class:`n2d2.cells.DeepNetCell` +:param provider: Data provider to use for calibration, default=None +:type provider: :py:class:`n2d2.provider.DataProvider`, optional +:param nb_bits: Number of bits for the weights and signals. Must be ``8``, ``16``, ``32`` or ``64`` for integer export, or ``-32``, ``-64`` for floating point export, default=8 +:type nb_bits: int, optional +:param qat_sat: Fuse a QAT trained with SAT method, default=False +:type qat_sat: bool, optional +:param export_no_unsigned: If True, disable the use of unsigned data type in integer exports, default=False +:type export_no_unsigned: bool, optional +:param calibration: The number of stimuli used for the calibration (``0`` = no calibration, ``-1`` = use the full test dataset), default=0 +:type calibration: int, optional +:param export_no_cross_layer_equalization: If True, disable the use of cross layer equalization in integer exports, default=False +:type export_no_cross_layer_equalization: bool, optional +:param wt_clipping_mode: Weights clipping mode on export, can be ``NONE``, ``MSE`` or ``KL_DIVERGENCE``, default="NONE" +:type wt_clipping_mode: str, optional +:param act_clipping_mode: activation clipping mode on export, can be ``NONE``, ``MSE`` or ``KL_DIVERGENCE`` or ``Quantile``, default="MSE" +:type act_clipping_mode: str, optional +:param act_scaling_mode: activation scaling mode on export, can be ``NONE``, ``FLOAT_MULT``, ``FIXED_MULT16``, ``SINGLE_SHIFT`` or ``DOUBLE_SHIFT``, default="FLOAT_MULT" +:type act_scaling_mode: str, optional +:param act_quantile_value: Quantile value for ``Quantile`` clipping mode, default=0.9999 +:type act_quantile_value: float, optional +:param act_rescale_per_output: If True, rescale activation per output on export, default=False +:type act_rescale_per_output: bool, optional +:param calibration_reload: If True, reload and reuse the data of a previous calibration, default=False +:type calibration_reload: bool, optional +:param report: Number of steps between reportings, default=100 +:type report: int, optional +:param export_nb_stimuli_max: Maximum number of stimuli to export (0 = no dataset export, -1 = unlimited), default=-1 +:type export_nb_stimuli_max: int, optional +:param wt_round_mode: Weights clipping mode on export, can be ``NONE``, ``RINTF``, default="NONE" +:type wt_round_mode: str, optional +:param b_round_mode: Biases clipping mode on export, can be ``NONE``, ``RINTF``, default="NONE" +:type b_round_mode: str, optional +:param c_round_mode: Clip clipping mode on export, can be ``NONE``, ``RINTF``, default="NONE" +:type c_round_mode: str, optional +:param find_lr: Find an appropriate learning rate over a number of iterations, default=0 +:type find_lr: int, optional +""" def _parse_export_parameters(gen_export=None, nb_bits=8, qat_SAT=False, export_no_unsigned=False, calibration=0, @@ -28,7 +75,7 @@ def _parse_export_parameters(gen_export=None, nb_bits=8, qat_SAT=False, act_scaling_mode="FLOAT_MULT", act_quantile_value=0.9999, act_rescale_per_output=False, calibration_reload=False, report=100, export_nb_stimuli_max= -1, wt_round_mode = "NONE", - b_round_mode="NONE", c_round_mode="NONE", find_lr=0): + b_round_mode="NONE", c_round_mode="NONE", find_lr=0, log_kernels=False): if wt_round_mode not in N2D2.WeightsApprox.__members__.keys(): raise n2d2.error_handler.WrongValue("wt_round_mode", wt_round_mode, ", ".join(N2D2.WeightsApprox.__members__.keys())) @@ -70,88 +117,39 @@ def _parse_export_parameters(gen_export=None, nb_bits=8, qat_SAT=False, wt_round_mode=N2D2_wt_round_mode, b_round_mode=N2D2_b_round_mode, c_round_mode=N2D2_c_round_mode, - find_lr=find_lr).N2D2() + find_lr=find_lr, + log_kernels=log_kernels).N2D2() -def _export_deepnet_operation(deepnet_cell: n2d2.cells.DeepNetCell, - provider: n2d2.provider.Provider=None, - **kwargs) -> None : - """Retrieve N2D2 DeepNet and prepare it for an export. - """ +def _generate_export(deepnet_cell, provider=None, **kwargs): - N2D2_deepnet = deepnet_cell.get_embedded_deepnet().N2D2() - N2D2_deepnet.initialize() + export_folder_name = None if "export_folder_name" not in kwargs else kwargs.pop("export_folder_name") - if provider is not None: - N2D2_provider = provider.N2D2() - N2D2_database = N2D2_provider.getDatabase() - N2D2_deepnet.setDatabase(N2D2_database) - N2D2_deepnet.setStimuliProvider(N2D2_provider) - - if "calibration" in kwargs and kwargs["calibration"] != 0 and \ - N2D2_deepnet.getDatabase().getNbStimuli() == 0: - raise ValueError("Cannot calibrate the network with an empty database,\ - please add a dataprovider to run the calibration.") + N2D2_option = _parse_export_parameters(**kwargs) + N2D2_deepnet = deepnet_cell.get_embedded_deepnet().N2D2() + if N2D2_option.calibration != 0: + if "nb_bits" not in kwargs: + kwargs["nb_bits"] = N2D2_option.nb_bits + n2d2.quantizer.PTQ(deepnet_cell, provider=provider, **kwargs) - if len(N2D2_deepnet.getTargets()) == 0: - # No target associated to the DeepNet - # We create a Target for the last cell of the network - last_cell = deepnet_cell[-1].N2D2() - N2D2_target = N2D2.TargetScore("Target", last_cell, provider.N2D2()) - N2D2_deepnet.addTarget(N2D2_target) - elif provider is not None: - for target in N2D2_deepnet.getTargets(): - target.setStimuliProvider(provider.N2D2()) + if not deepnet_cell.is_integral() and N2D2_option.nb_bits > 0: + raise RuntimeError(f"You need to calibrate the network to export it in {abs(N2D2_option.nb_bits)} bits integer" \ + "set the 'calibration' option to something else than 0 or quantize the deepnetcell before export.") + if not export_folder_name: + export_folder_name = f"export_{N2D2_option.gen_export}_{'int' if N2D2_option.nb_bits > 0 else 'float'}{abs(N2D2_option.nb_bits)}" - if (N2D2_deepnet.getDatabase().getNbStimuli(N2D2.Database.StimuliSet.__members__["Validation"]) > 0): - N2D2_deepnet.exportNetworkFreeParameters("weights_validation") - else: - N2D2_deepnet.exportNetworkFreeParameters("weights") - return N2D2_deepnet + if not exists(export_folder_name): + mkdir(export_folder_name) + N2D2.generateExportFromCalibration(N2D2_option, N2D2_deepnet, fileName=export_folder_name) +@n2d2.utils.add_docstring(export_doc_string) def export_c(deepnet_cell: n2d2.cells.DeepNetCell, provider: n2d2.provider.Provider=None, **kwargs) -> None: """Generate a C export of the neural network. - :param deepnet_cell: The Neural network you want to export. - :type deepnet_cell: :py:class:`n2d2.cells.DeepNetCell` - :param provider: Data provider to use for calibration, default=None - :type provider: :py:class:`n2d2.provider.DataProvider`, optional - :param nb_bits: Number of bits per weight for exports, default=8 - :type nb_bits: int, optional - :param qat_sat: Fuse a QAT trained with SAT method, default=False - :type qat_sat: bool, optional - :param export_no_unsigned: If True, disable the use of unsigned data type in integer exports, default=False - :type export_no_unsigned: bool, optional - :param calibration: The number of stimuli used for the calibration (``0`` = no calibration, ``-1`` = use the full test dataset), default=0 - :type calibration: int, optional - :param export_no_cross_layer_equalization: If True, disable the use of cross layer equalization in integer exports, default=False - :type export_no_cross_layer_equalization: bool, optional - :param wt_clipping_mode: Weights clipping mode on export, can be ``NONE``, ``MSE`` or ``KL_DIVERGENCE``, default="NONE" - :type wt_clipping_mode: str, optional - :param act_clipping_mode: activation clipping mode on export, can be ``NONE``, ``MSE`` or ``KL_DIVERGENCE`` or ``Quantile``, default="MSE" - :type act_clipping_mode: str, optional :param act_scaling_mode: activation scaling mode on export, can be ``NONE``, ``FIXED_MULT16``, ``SINGLE_SHIFT`` or ``DOUBLE_SHIFT``, default="SINGLE_SHIFT" :type act_scaling_mode: str, optional - :param act_quantile_value: Quantile value for ``Quantile`` clipping mode, default=0.9999 - :type act_quantile_value: float, optional - :param act_rescale_per_output: If True, rescale activation per output on export, default=False - :type act_rescale_per_output: bool, optional - :param calibration_reload: If True, reload and reuse the data of a previous calibration, default=False - :type calibration_reload: bool, optional - :param report: Number of steps between reportings, default=100 - :type report: int, optional - :param export_nb_stimuli_max: Maximum number of stimuli to export (0 = no dataset export, -1 = unlimited), default=-1 - :type export_nb_stimuli_max: int, optional - :param wt_round_mode: Weights clipping mode on export, can be ``NONE``, ``RINTF``, default="NONE" - :type wt_round_mode: str, optional - :param b_round_mode: Biases clipping mode on export, can be ``NONE``, ``RINTF``, default="NONE" - :type b_round_mode: str, optional - :param c_round_mode: Clip clipping mode on export, can be ``NONE``, ``RINTF``, default="NONE" - :type c_round_mode: str, optional - :param find_lr: Find an appropriate learning rate over a number of iterations, default=0 - :type find_lr: int, optional """ if "act_scaling_mode" in kwargs: @@ -160,106 +158,33 @@ def export_c(deepnet_cell: n2d2.cells.DeepNetCell, else: kwargs["act_scaling_mode"]="SINGLE_SHIFT" # Default value - N2D2_option = _parse_export_parameters(gen_export="C", **kwargs) - N2D2_deepnet = _export_deepnet_operation(deepnet_cell, provider, **kwargs) - N2D2.generateExport(N2D2_option, N2D2_deepnet) + kwargs["gen_export"] = "C" + _generate_export(deepnet_cell, provider, **kwargs) +@n2d2.utils.add_docstring(export_doc_string) def export_cpp(deepnet_cell: n2d2.cells.DeepNetCell, provider: n2d2.provider.Provider=None, **kwargs) -> None: """Generate a CPP export of the neural network. - - :param deepnet_cell: The Neural network you want to export. - :type deepnet_cell: :py:class:`n2d2.cells.DeepNetCell` - :param provider: Data provider to use for calibration, default=None - :type provider: :py:class:`n2d2.provider.DataProvider`, optional - :param nb_bits: Number of bits per weight for exports, default=8 - :type nb_bits: int, optional - :param qat_sat: Fuse a QAT trained with SAT method, default=False - :type qat_sat: bool, optional - :param export_no_unsigned: If True, disable the use of unsigned data type in integer exports, default=False - :type export_no_unsigned: bool, optional - :param calibration: The number of stimuli used for the calibration (``0`` = no calibration, ``-1`` = use the full test dataset), default=0 - :type calibration: int, optional - :param export_no_cross_layer_equalization: If True, disable the use of cross layer equalization in integer exports, default=False - :type export_no_cross_layer_equalization: bool, optional - :param wt_clipping_mode: Weights clipping mode on export, can be ``NONE``, ``MSE`` or ``KL_DIVERGENCE``, default="NONE" - :type wt_clipping_mode: str, optional - :param act_clipping_mode: activation clipping mode on export, can be ``NONE``, ``MSE`` or ``KL_DIVERGENCE`` or ``Quantile``, default="MSE" - :type act_clipping_mode: str, optional - :param act_scaling_mode: activation scaling mode on export, can be ``NONE``, ``FLOAT_MULT``, ``FIXED_MULT16``, ``SINGLE_SHIFT`` or ``DOUBLE_SHIFT``, default="FLOAT_MULT" - :type act_scaling_mode: str, optional - :param act_quantile_value: Quantile value for ``Quantile`` clipping mode, default=0.9999 - :type act_quantile_value: float, optional - :param act_rescale_per_output: If True, rescale activation per output on export, default=False - :type act_rescale_per_output: bool, optional - :param calibration_reload: If True, reload and reuse the data of a previous calibration, default=False - :type calibration_reload: bool, optional - :param report: Number of steps between reportings, default=100 - :type report: int, optional - :param export_nb_stimuli_max: Maximum number of stimuli to export (0 = no dataset export, -1 = unlimited), default=-1 - :type export_nb_stimuli_max: int, optional - :param wt_round_mode: Weights clipping mode on export, can be ``NONE``, ``RINTF``, default="NONE" - :type wt_round_mode: str, optional - :param b_round_mode: Biases clipping mode on export, can be ``NONE``, ``RINTF``, default="NONE" - :type b_round_mode: str, optional - :param c_round_mode: Clip clipping mode on export, can be ``NONE``, ``RINTF``, default="NONE" - :type c_round_mode: str, optional - :param find_lr: Find an appropriate learning rate over a number of iterations, default=0 - :type find_lr: int, optional """ - N2D2_option = _parse_export_parameters(gen_export="CPP", **kwargs) - N2D2_deepnet = _export_deepnet_operation(deepnet_cell, provider, **kwargs) - - N2D2.generateExport(N2D2_option, N2D2_deepnet) - + kwargs["gen_export"] = "CPP" + _generate_export(deepnet_cell, provider, **kwargs) +@n2d2.utils.add_docstring(export_doc_string) def export_tensor_rt(deepnet_cell: n2d2.cells.DeepNetCell, provider: n2d2.provider.Provider=None, **kwargs) -> None: """Generate a TensorRT export of the neural network. - :param deepnet_cell: The Neural network you want to export. - :type deepnet_cell: :py:class:`n2d2.cells.DeepNetCell` - :param provider: Data provider to use for calibration, default=None - :type provider: :py:class:`n2d2.provider.DataProvider`, optional - :param nb_bits: Number of bits per weight for exports, default=8 + :param nb_bits: Only 32 floating point precision is available for this export. You can calibrate your network later with the export tools, default=-32 :type nb_bits: int, optional - :param qat_sat: Fuse a QAT trained with SAT method, default=False - :type qat_sat: bool, optional - :param export_no_unsigned: If True, disable the use of unsigned data type in integer exports, default=False - :type export_no_unsigned: bool, optional - :param calibration: The number of stimuli used for the calibration (``0`` = no calibration, ``-1`` = use the full test dataset), default=0 - :type calibration: int, optional - :param export_no_cross_layer_equalization: If True, disable the use of cross layer equalization in integer exports, default=False - :type export_no_cross_layer_equalization: bool, optional - :param wt_clipping_mode: Weights clipping mode on export, can be ``NONE``, ``MSE`` or ``KL-Diveregence``, default="NONE" - :type wt_clipping_mode: str, optional - :param act_clipping_mode: activation clipping mode on export, can be ``NONE``, ``MSE`` or ``KL-Divergence`` or ``Quantile``, default="MSE" - :type act_clipping_mode: str, optional - :param act_scaling_mode: activation scaling mode on export, can be ``NONE``, ``FLOAT_MULT``, ``FIXED_MULT16``, ``SINGLE_SHIFT`` or ``DOUBLE_SHIFT``, default="FLOAT_MULT" - :type act_scaling_mode: str, optional - :param act_quantile_value: Quantile value for ``Quantile`` clipping mode, default=0.9999 - :type act_quantile_value: float, optional - :param act_rescale_per_output: If True, rescale activation per output on export, default=False - :type act_rescale_per_output: bool, optional - :param calibration_reload: If True, reload and reuse the data of a previous calibration, default=False - :type calibration_reload: bool, optional - :param report: Number of steps between reportings, default=100 - :type report: int, optional - :param export_nb_stimuli_max: Maximum number of stimuli to export (0 = no dataset export, -1 = unlimited), default=-1 - :type export_nb_stimuli_max: int, optional - :param wt_round_mode: Weights clipping mode on export, can be ``NONE``, ``RINTF``, default="NONE" - :type wt_round_mode: str, optional - :param b_round_mode: Biases clipping mode on export, can be ``NONE``, ``RINTF``, default="NONE" - :type b_round_mode: str, optional - :param c_round_mode: Clip clipping mode on export, can be ``NONE``, ``RINTF``, default="NONE" - :type c_round_mode: str, optional - :param find_lr: Find an appropriate learning rate over a number of iterations, default=0 - :type find_lr: int, optional """ - N2D2_option = _parse_export_parameters(gen_export="CPP_TensorRT", **kwargs) - N2D2_deepnet = _export_deepnet_operation(deepnet_cell, provider, **kwargs) - - N2D2.generateExport(N2D2_option, N2D2_deepnet) + kwargs["gen_export"] = "CPP_TensorRT" + if "nb_bits" not in kwargs: + kwargs["nb_bits"] = -32 + else: + if kwargs["nb_bits"] != -32: + raise ValueError("The TensorRT export only support 32 floating point precision.\ +Calibration needs to be done once the export is generated (see : https://cea-list.github.io/N2D2-docs/export/TensorRT.html)") + _generate_export(deepnet_cell, provider, **kwargs) diff --git a/python/n2d2/n2d2_interface.py b/python/n2d2/n2d2_interface.py index 9b78c404..4f3ca8f7 100755 --- a/python/n2d2/n2d2_interface.py +++ b/python/n2d2/n2d2_interface.py @@ -317,6 +317,7 @@ class Options(): def __init__(self, **parameters): self._N2D2 = N2D2.Options() self.set_parameters(**parameters) + self.options = parameters def set_parameters(self, **parameters): for key, value in parameters.items(): @@ -330,4 +331,10 @@ def set_parameters(self, **parameters): raise def N2D2(self): - return self._N2D2 \ No newline at end of file + return self._N2D2 + + def __str__(self): + result = "Option : \n" + for option_name, option_value in self.options.items(): + result += f"{option_name} : {option_value}\n" + return result \ No newline at end of file diff --git a/python/n2d2/provider.py b/python/n2d2/provider.py index b0a4ad51..1160acc0 100755 --- a/python/n2d2/provider.py +++ b/python/n2d2/provider.py @@ -63,11 +63,18 @@ def set_deepnet(self, deepnet): def get_size(self): return self._N2D2_object.getSize() + def get_batch_size(self): + """ + :returns: Batch size + :rtype: int + """ + return self._N2D2_object.getBatchSize() + def dims(self): - return self._N2D2_object.getData().dims() + return self.get_size() + [self.get_batch_size()] def shape(self): - return [i for i in reversed(self._N2D2_object.getData().dims())] + return list(reversed(self.dims())) def get_name(self): """ @@ -161,12 +168,6 @@ def get_labels(self): """ return n2d2.Tensor.from_N2D2(self._N2D2_object.getLabelsData()) - def get_batch_size(self): - """ - :returns: Batch size - :rtype: int - """ - return self._N2D2_object.getBatchSize() def get_database(self): """ diff --git a/python/n2d2/quantizer.py b/python/n2d2/quantizer.py index 7557d08d..07ce65c6 100755 --- a/python/n2d2/quantizer.py +++ b/python/n2d2/quantizer.py @@ -31,6 +31,7 @@ cuda_compiled = gb.cuda_compiled + # def fuse_qat(deep_net, provider, act_scaling_mode, w_mode="NONE", b_mode="NONE", c_mode="NONE"): # """This method allow you to fuse BatchNorm parameters into Conv layers once you have trained your model. @@ -72,6 +73,86 @@ # return deep_net_qat +def PTQ(deepnet_cell, + nb_bits, + nb_sitmuli=-1, + provider=None, + no_unsigned=False, + cross_layer_equalization=True, + wt_clipping_mode="NONE", + act_clipping_mode="MSE", + act_scaling_mode="FLOAT_MULT", + **kwargs): + """ + :param nb_bits: Number of bits per weight for exports (can be for example `-16` for float 16 bits or `8` int 8 bits) + :type nb_bits: int + :param nb_sitmuli: The number of stimuli used for the calibration (``0`` = no calibration, ``-1`` = use the full test dataset), default=-1 + :type nb_sitmuli: int, optional + :param provider: Data provider to use for calibration, default=None + :type provider: :py:class:`n2d2.provider.DataProvider`, optional + :param no_unsigned: If True, disable the use of unsigned data type in integer calibration, default=False + :type no_unsigned: bool, optional + :param cross_layer_equalization: If True, disable the use of cross layer equalization in integer calibration, default=False + :type cross_layer_equalization: bool, optional + :param wt_clipping_mode: Weights clipping mode on calibration, can be ``NONE``, ``MSE`` or ``KL_DIVERGENCE``, default="NONE" + :type wt_clipping_mode: str, optional + :param act_clipping_mode: activation clipping mode on calibration, can be ``NONE``, ``MSE`` or ``KL_DIVERGENCE`` or ``Quantile``, default="MSE" + :type act_clipping_mode: str, optional + """ + if "export_no_unsigned" in kwargs: + no_unsigned = kwargs["export_no_unsigned"] + if "export_no_cross_layer_equalization" in kwargs: + cross_layer_equalization = not kwargs["export_no_cross_layer_equalization"] + if "calibration" in kwargs: + nb_sitmuli=kwargs["calibration"] + if act_clipping_mode not in N2D2.ClippingMode.__members__.keys(): + raise n2d2.error_handler.WrongValue("act_clipping_mode", act_clipping_mode, ", ".join(N2D2.ClippingMode.__members__.keys())) + N2D2_act_clipping_mode = N2D2.ClippingMode.__members__[act_clipping_mode] + if wt_clipping_mode not in N2D2.ClippingMode.__members__.keys(): + raise n2d2.error_handler.WrongValue("wt_clipping_mode", wt_clipping_mode, ", ".join(N2D2.ClippingMode.__members__.keys())) + N2D2_wt_clipping_mode = N2D2.ClippingMode.__members__[wt_clipping_mode] + if act_scaling_mode not in N2D2.ScalingMode.__members__.keys(): + raise n2d2.error_handler.WrongValue("act_scaling_mode", act_scaling_mode, ", ".join(N2D2.ScalingMode.__members__.keys())) + N2D2_act_scaling_mode = N2D2.ScalingMode.__members__[act_scaling_mode] + parameters = n2d2.n2d2_interface.Options( + nb_bits=nb_bits, + export_no_unsigned=no_unsigned, + calibration=nb_sitmuli, + qat_SAT=False, + export_no_cross_layer_equalization=not cross_layer_equalization, + wt_clipping_mode=N2D2_wt_clipping_mode, + act_clipping_mode=N2D2_act_clipping_mode, + act_scaling_mode=N2D2_act_scaling_mode, + ).N2D2() + + N2D2_deepnet = deepnet_cell.get_embedded_deepnet().N2D2() + N2D2_deepnet.initialize() + + if provider is not None: + N2D2_provider = provider.N2D2() + N2D2_database = N2D2_provider.getDatabase() + N2D2_deepnet.setDatabase(N2D2_database) + N2D2_deepnet.setStimuliProvider(N2D2_provider) + + if len(N2D2_deepnet.getTargets()) == 0: + # No target associated to the DeepNet + # We create a Target for the last cell of the network + last_cell = deepnet_cell[-1].N2D2() + N2D2_target = N2D2.TargetScore("Target", last_cell, provider.N2D2()) + N2D2_deepnet.addTarget(N2D2_target) + elif provider is not None: + # We already have a Target, so we attach the new provider to it + for target in N2D2_deepnet.getTargets(): + target.setStimuliProvider(provider.N2D2()) + + if N2D2_deepnet.getDatabase().getNbStimuli(N2D2.Database.StimuliSet.__members__["Validation"]) > 0: + N2D2_deepnet.exportNetworkFreeParameters("weights_validation") + else: + N2D2_deepnet.exportNetworkFreeParameters("weights") + + N2D2.calibNetwork(parameters, N2D2_deepnet) + + class Quantizer(N2D2_Interface, ABC): @abstractmethod diff --git a/python/n2d2/tensor.py b/python/n2d2/tensor.py index edf097f8..ab229685 100755 --- a/python/n2d2/tensor.py +++ b/python/n2d2/tensor.py @@ -275,6 +275,14 @@ def reshape(self, new_dims): old_dims_str += str(dim) +" " raise ValueError(f"new size ({new_dims_str}= {str(reduce((lambda x,y: x*y), new_dims))}) does not match current size ({old_dims_str}= {str(self.__len__())})") self._tensor.reshape([int(d) for d in reversed(new_dims)]) + + def resize(self, new_dims): + """Reshape the Tensor to the specified dims (defined by the Numpy convention). + + :param new_dims: New dimensions + :type new_dims: list + """ + self._tensor.resize([int(d) for d in reversed(new_dims)]) def copy(self): """Copy in memory the Tensor object. @@ -464,7 +472,7 @@ def __eq__(self, other_tensor)->bool: def __str__(self)->str: if self.is_cuda: # Updating the host before printing the Tensor - self.dtoh() + self.N2D2().synchronizeDBasedToH() output = "n2d2.Tensor([\n" output += str(self._tensor) output += "], device=" + ("cuda" if self.is_cuda else "cpu") diff --git a/python/n2d2/transform/color_space.py b/python/n2d2/transform/color_space.py index 0b3a6631..c14b71fa 100644 --- a/python/n2d2/transform/color_space.py +++ b/python/n2d2/transform/color_space.py @@ -36,30 +36,31 @@ class ColorSpace(Transformation): def __init__(self, color_space, **config_parameters): """ - Possible values for color_space parameter : - ``BGR``: convert any gray, BGR or BGRA image to BGR, - ``RGB``: convert any gray, BGR or BGRA image to RGB, - ``HSV``: convert BGR image to HSV, - ``HLS``: convert BGR image to HLS, - ``YCrCb``: convert BGR image to YCrCb, - ``CIELab``: convert BGR image to CIELab, - ``CIELuv``: convert BGR image to CIELuv, - ``RGB_to_BGR``: convert RGB image to BGR, - ``RGB_to_HSV``: convert RGB image to HSV, - ``RGB_to_HLS``: convert RGB image to HLS, - ``RGB_to_YCrCb``: convert RGB image to YCrCb, - ``RGB_to_CIELab``: convert RGB image to CIELab, - ``RGB_to_CIELuv``: convert RGB image to CIELuv, - ``HSV_to_BGR``: convert HSV image to BGR, - ``HSV_to_RGB``: convert HSV image to RGB, - ``HLS_to_BGR``: convert HLS image to BGR, - ``HLS_to_RGB``: convert HLS image to RGB, - ``YCrCb_to_BGR``: convert YCrCb image to BGR, - ``YCrCb_to_RGB``: convert YCrCb image to RGB, - ``CIELab_to_BGR``: convert CIELab image to BGR, - ``CIELab_to_RGB``: convert CIELab image to RGB, - ``CIELuv_to_BGR``: convert CIELuv image to BGR, - ``CIELuv_to_RGB``: convert CIELuv image to RGB. + Possible values for ``color_space`` parameter : + + - ``BGR``: convert any gray, BGR or BGRA image to BGR, + - ``RGB``: convert any gray, BGR or BGRA image to RGB, + - ``HSV``: convert BGR image to HSV, + - ``HLS``: convert BGR image to HLS, + - ``YCrCb``: convert BGR image to YCrCb, + - ``CIELab``: convert BGR image to CIELab, + - ``CIELuv``: convert BGR image to CIELuv, + - ``RGB_to_BGR``: convert RGB image to BGR, + - ``RGB_to_HSV``: convert RGB image to HSV, + - ``RGB_to_HLS``: convert RGB image to HLS, + - ``RGB_to_YCrCb``: convert RGB image to YCrCb, + - ``RGB_to_CIELab``: convert RGB image to CIELab, + - ``RGB_to_CIELuv``: convert RGB image to CIELuv, + - ``HSV_to_BGR``: convert HSV image to BGR, + - ``HSV_to_RGB``: convert HSV image to RGB, + - ``HLS_to_BGR``: convert HLS image to BGR, + - ``HLS_to_RGB``: convert HLS image to RGB, + - ``YCrCb_to_BGR``: convert YCrCb image to BGR, + - ``YCrCb_to_RGB``: convert YCrCb image to RGB, + - ``CIELab_to_BGR``: convert CIELab image to BGR, + - ``CIELab_to_RGB``: convert CIELab image to RGB, + - ``CIELuv_to_BGR``: convert CIELuv image to BGR, + - ``CIELuv_to_RGB``: convert CIELuv image to RGB. :param color_space: Convert image color. :type color_space: str diff --git a/python/n2d2/typed.py b/python/n2d2/typed.py index 91012f98..8f064eb4 100644 --- a/python/n2d2/typed.py +++ b/python/n2d2/typed.py @@ -35,7 +35,7 @@ class Modeltyped(ABC): @abstractmethod def __init__(self, **config_parameters): """ - :param model: Specify the kind of object to run, can be `Frame` or `Frame_CUDA`, default=n2d2.global_variables.default_model + :param model: Specify the kind of object to run, can be ``Frame`` or ``Frame_CUDA``, default=n2d2.global_variables.default_model :type model: str, optional """ if 'model' in config_parameters: @@ -56,12 +56,6 @@ class ModelDatatyped(Datatyped, Modeltyped, ABC): @abstractmethod def __init__(self, **config_parameters): - """ - :param datatype: Datatype used by the object, can only be ``float`` at the moment, default=n2d2.global_variables.default_datatype - :type datatype: str, optional - :param model: Specify the kind of object to run, can be `Frame` or `Frame_CUDA`, default=n2d2.global_variables.default_model - :type model: str, optional - """ Datatyped.__init__(self, **config_parameters) datatype = self._model_key Modeltyped.__init__(self, **config_parameters) diff --git a/python/n2d2/utils.py b/python/n2d2/utils.py index dc1e9c3c..def87b31 100755 --- a/python/n2d2/utils.py +++ b/python/n2d2/utils.py @@ -113,4 +113,63 @@ def dec(obj): docstring = obj.__init__.__doc__ if obj.__init__.__doc__ else "" obj.__init__.__doc__ = docstring + parents_docstring return obj - return dec \ No newline at end of file + return dec + +def add_docstring(doc_string): + """Decorator to inherit the docstring of another function. + The docstring header is conserved. + A dictionnary of the parameter is made by parsing, the docstring of the function and the docstring to add. + The docstring available in the function override the docstring to add. + """ + def dec(func): + header = "" + flag_header=True + param_dic = {} + for line in doc_string.split("\n"): + if flag_header: + if ":param" in line: + flag_header = False + else: + continue + if ":param" in line: + param_name = line.split(":")[1].replace("param ", "") + param_desc = line.split(":")[2].lstrip(" ") + if param_name not in param_dic: + param_dic[param_name] = [param_desc, ""] + else: + param_dic[param_name][0] = param_desc + if ":type" in line: + param_name = line.split(":")[1].replace("type ", "") + param_desc = line.split(":")[2].lstrip(" ") + if param_name not in param_dic: + param_dic[param_name] = ["", param_desc] + else: + param_dic[param_name][1] = param_desc + flag_header = True + for line in func.__doc__.split("\n"): + if flag_header: + if ":param" in line: + flag_header = False + else: + header += line + "\n" + if ":param" in line: + param_name = line.split(":")[1].replace("param ", "") + param_desc = line.split(":")[2].lstrip(" ") + if param_name not in param_dic: + param_dic[param_name] = [param_desc, ""] + else: + param_dic[param_name][0] = param_desc + if ":type" in line: + param_name = line.split(":")[1].replace("type ", "") + param_desc = line.split(":")[2].lstrip(" ") + if param_name not in param_dic: + param_dic[param_name] = ["", param_desc] + else: + param_dic[param_name][1] = param_desc + + param_doc = "" + for param_name, param_desc in param_dic.items(): + param_doc += f":param {param_name}: {param_desc[0]}\n:type {param_name}: {param_desc[1]}\n" + func.__doc__ = header + param_doc + return func + return dec diff --git a/python/pytorch_interoperability/pytorch_interface.py b/python/pytorch_interoperability/pytorch_interface.py index ae17cc63..c9fbaa94 100755 --- a/python/pytorch_interoperability/pytorch_interface.py +++ b/python/pytorch_interoperability/pytorch_interface.py @@ -95,18 +95,28 @@ class Block(torch.nn.Module): """ _initialized = False - def __init__(self, block): + def __init__(self, block, need_to_flatten=False, batch_size=None): """ :param block: n2d2 block object to interface with PyTorch :type block: :py:class:`n2d2.cells.Block` """ super().__init__() if not isinstance(block, n2d2.cells.Block): - raise TypeError("sequence should be of type n2d2.cells.Block got " + str(type(block)) + " instead") - self._N2D2 = block + raise TypeError("Parameter block should be of type n2d2.cells.Block got " + str(type(block)) + " instead") + self._block = block # We need to add a random parameter to the module else pytorch refuse to compute gradient self.register_parameter(name='random_parameter', param=torch.nn.Parameter(torch.ones(1))) + self.need_to_flatten=need_to_flatten + self.batch_size = batch_size # Batchsize used to define the neural network + self.current_batch_size = None + self.output_tensor = None # "Saved" as an attribute to avoid python garbage collector ! + def get_block(self) -> n2d2.cells.Block: + """ + :return: The Block used by the custom sequential + :rtype: :py:class:`n2d2.cells.Block` + """ + return self._block def forward(self, inputs): """ @@ -115,22 +125,34 @@ def forward(self, inputs): """ class N2D2_computation(torch.autograd.Function): """ - An autograd function applied to a Torch tensor that will use the propagation/backpropagation/update of N2D2. + Autograd function that will use the propagation/backpropagation/update of N2D2. """ @staticmethod def forward(ctx, inputs): - self.batch_size = inputs.shape[0] - n2d2_tensor = _to_n2d2(inputs) + self.current_batch_size = inputs.shape[0] # Can be different than self.batch_size + # If we don't know batch size during the first propagation we set it to the batch size of the first stimuli, may be dangerous ? + if self.batch_size is None: + self.batch_size = self.current_batch_size + + if self.current_batch_size != self.batch_size: + # Pad incomplete batch with 0 as N2D2 doesn't support incomplete batch. + new_shape = list(inputs.shape) + new_shape[0] = self.batch_size + n2d2_tensor.resize(new_shape) + + if n2d2.global_variables.cuda_compiled: n2d2_tensor.cuda() n2d2_tensor.htod() if self.training: # training is a torch.nn.Module attribute (cf. https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module) - self._N2D2.learn() + self._block.learn() else: - self._N2D2.test() - n2d2_outputs = self._N2D2(n2d2_tensor) # Propagation + self._block.test() + + n2d2_outputs = self._block(n2d2_tensor) # Propagation + # Note : It's important to set diffOutputs as an attribute else when exiting this method # Python garbage collector will erase this variable while Cpp will still use it resulting in a SegFault self.diffOutputs = n2d2.Tensor(n2d2_tensor.dims(), value=0, dim_format="N2D2") @@ -144,6 +166,14 @@ def forward(ctx, inputs): self.output_tensor = n2d2_outputs outputs = _to_torch(n2d2_outputs.N2D2()) + if self.current_batch_size != self.batch_size: + # Warning for future : do not change the shape of n2d2_outputs ! + # Doing so will change the size of the variable mOutputs. + # This will cause a crash when the next full stimuli will come. + new_shape = list(n2d2_outputs.shape()) + new_shape[0] = self.current_batch_size + outputs = outputs.resize_(new_shape) # in place operation + # The conversion back to pytorch can alter the type so we need to set it back outputs = outputs.to(dtype=inputs.dtype) if inputs.is_cuda: # If N2D2 is compiled with CUDA the output Tensor will always be CUDA @@ -154,11 +184,21 @@ def forward(ctx, inputs): @staticmethod def backward(ctx, grad_output): + self.current_batch_size = grad_output.shape[0] if grad_output.is_cuda: grad_output = grad_output.cuda() grad_output = torch.mul(grad_output, -self.batch_size) - t_grad_output = _to_n2d2(grad_output).N2D2() + t_grad_output = _to_n2d2(grad_output) + if self.current_batch_size < self.batch_size: + # Making sure we have a full batch + new_shape = list(grad_output.shape) + new_shape[0] = self.batch_size + tmp_numpy = t_grad_output.to_numpy(copy=True) + tmp_numpy.resize(new_shape) + t_grad_output = n2d2.Tensor.from_numpy(tmp_numpy) + + t_grad_output=t_grad_output.N2D2() if len(self.deepnet.getLayers()[-1]) > 1: raise RuntimeError("Deepnet has more than one output cell") diffInputs = self.deepnet.getCell_Frame_Top(self.deepnet.getLayers()[-1][0]).getDiffInputs() @@ -171,11 +211,18 @@ def backward(ctx, grad_output): self.output_tensor.back_propagate() self.output_tensor.update() - diffOutput = self.deepnet.getCell_Frame_Top(self.deepnet.getLayers()[1][0]).getDiffOutputs() outputs = _to_torch(diffOutput) + if self.current_batch_size != self.batch_size: + # Warning for future : do not change the shape of n2d2_outputs ! + # Doing so will change the size of the variable mOutputs. + # This will cause a crash when the next full stimuli will come. + new_shape = list(outputs.shape) + new_shape[0] = self.current_batch_size + outputs = outputs.resize_(new_shape) # in place operation outputs = torch.mul(outputs, -1/self.batch_size) + if grad_output.is_cuda: outputs = outputs.cuda() else: @@ -183,9 +230,12 @@ def backward(ctx, grad_output): return outputs.clone() # If the layer is at the beginning of the network requires grad is False. - if not inputs.requires_grad: - inputs.requires_grad = True - return N2D2_computation.apply(inputs) + inputs.requires_grad = True + + outputs = N2D2_computation.apply(inputs) + if self.need_to_flatten: + outputs = outputs.view(self.current_batch_size, -1) + return outputs def wrap(torch_model, input_size): """Function generating a ``torch.nn.Module`` which embed a :py:class:`n2d2.cells.DeepNetCell`. @@ -198,18 +248,21 @@ def wrap(torch_model, input_size): :return: A custom ``torch.nn.Module`` which embed a :py:class:`n2d2.cells.DeepNetCell`. :rtype: :py:class:`pytorch_interoperability.Block` """ - model_path = './tmp.onnx' + model_path = f'./{torch_model.__class__.__name__}.onnx' print("Exporting torch module to ONNX ...") dummy_in = torch.randn(input_size) + + # Update dummy tensor to the model device + dummy_in = dummy_in.to(next(torch_model.parameters()).device) + torch.onnx.export(torch_model, dummy_in, model_path, verbose=True, training=torch.onnx.TrainingMode.TRAINING) # Importing the ONNX to N2D2 print("Importing ONNX model to N2D2 ...") db = n2d2.database.Database() provider = n2d2.provider.DataProvider(db,[input_size[3], input_size[2], input_size[1]], batch_size=input_size[0]) - deepNet = n2d2.cells.DeepNetCell.load_from_ONNX(provider, "./tmp.onnx") - # print("Cleaning temporary ONNX file.") - # remove(model_path) + deepNet = n2d2.cells.DeepNetCell.load_from_ONNX(provider, model_path) + deepNet.set_solver(n2d2.solver.SGD( decay=0.0, iteration_size=1, learning_rate=0.01, learning_rate_decay=0.1, learning_rate_policy="None", learning_rate_step_size=1, max_iterations=0, min_decay=0.0, @@ -225,19 +278,8 @@ def wrap(torch_model, input_size): need_to_flatten = True else: pass - # Creating an N2D2 Module specific - class n2d2_module(torch.nn.Module): - def __init__(self): - super(n2d2_module, self).__init__() - self.n2d2_block = Block(deepNet) - def forward(self, x): - x = self.n2d2_block(x) - if need_to_flatten: - x = x.view(input_size[0], -1) - return x - def __str__(self): - return self.n2d2_block() + print(deepNet) + converted_model = Block(deepNet, need_to_flatten=need_to_flatten, batch_size=input_size[0]) - converted_model = n2d2_module() return converted_model diff --git a/python/tests/test_keras_export.py b/python/tests/test_keras_export.py new file mode 100644 index 00000000..6ef111b1 --- /dev/null +++ b/python/tests/test_keras_export.py @@ -0,0 +1,209 @@ +""" + (C) Copyright 2022 CEA LIST. All Rights Reserved. + Contributor(s): Cyril MOINEAU (cyril.moineau@cea.fr) + + This software is governed by the CeCILL-C license under French law and + abiding by the rules of distribution of free software. You can use, + modify and/ or redistribute the software under the terms of the CeCILL-C + license as circulated by CEA, CNRS and INRIA at the following URL + "http://www.cecill.info". + + As a counterpart to the access to the source code and rights to copy, + modify and redistribute granted by the license, users are provided only + with a limited warranty and the software's author, the holder of the + economic rights, and the successive licensors have only limited + liability. + + The fact that you are presently reading this means that you have had + knowledge of the CeCILL-C license and that you accept its terms. +""" + +import unittest +import n2d2 +import keras_interoperability + +from tensorflow.random import uniform +from os.path import exists +from os import remove + +from tiny_ml_keras.anomaly_model import get_model as get_anomaly_model +from tiny_ml_keras.kws_model import kws_dscnn as get_kws_model +from tiny_ml_keras.resnet_model import resnet_v1_eembc as get_resnet_model +from tiny_ml_keras.vww_model import mobilenet_v1 as get_mobilenet_model + + +DATA_PATH="/local/DATABASE/" + + +class test_keras_export(unittest.TestCase): + """ + The class needs to inherit unittest.TestCase, the name doesn't matter and the class doesn't need to be instantiated. + """ + absolute_precision = 0.0001 + relative_precision = 0.001 + + def setUp(self): + n2d2.global_variables.cuda_device = 0 + n2d2.global_variables.default_model = 'Frame_CUDA' + + def tearDown(self): + n2d2.global_variables.cuda_device = 0 + n2d2.global_variables.default_model = 'Frame' + + def check_tensor_equality(self, x, y): + for i,j in zip(x.numpy().flatten(), y.numpy().flatten()): + self.assertTrue(abs(i-j) < self.relative_precision * abs(j) + self.absolute_precision, + "N2D2 and Keras give different output tensor !") + + def test_anomaly_CPP(self): + + net_test=get_anomaly_model(640) + n2d2_net_test = keras_interoperability.wrap(net_test, batch_size=5, for_export=True) + input_test= uniform([5, 640]) + keras_out = net_test(input_test) + n2d2_out = n2d2_net_test(input_test) + print("Keras output :") + print(keras_out) + print("N2D2 output :") + print(n2d2_out) + self.check_tensor_equality(keras_out, n2d2_out) + + print("Model have been wrapped !") + # Importing data for calibration. + db = n2d2.database.DIR(DATA_PATH+"tif_database_ToyCar", + learn=0.8, validation=0.2, random_partitioning=True) + + provider = n2d2.provider.DataProvider(db,[640, 1, 1], batch_size=5) + + # Generating CPP export + n2d2.export.export_cpp( + n2d2_net_test.get_deepnet_cell(), + provider=provider, + nb_bits=8, + calibration=1, + export_nb_stimuli_max=0) + export_generated = exists("./export_CPP_int8") + self.assertTrue(export_generated) + if not export_generated: + remove("./export_CPP_int8") + + def test_kws_CPP(self): + + net_test=get_kws_model(49,10,12, for_tflite=True, BNorm=True) + n2d2_net_test = keras_interoperability.wrap(net_test, batch_size=5, for_export=True) + input_test= uniform([5, 49, 10, 1]) + keras_out = net_test(input_test) + n2d2_out = n2d2_net_test(input_test) + print("Keras output :") + print(keras_out) + print("N2D2 output :") + print(n2d2_out) + self.check_tensor_equality(keras_out, n2d2_out) + + print("Model have been wrapped !") + # Importing data for calibration. + db = n2d2.database.DIR(DATA_PATH+"speech_commands_v0.02_mfcc_10words", + learn=0.8, validation=0.2, depth=1, + ignore_mask=["*/_background_noise_"], valid_extensions=["tiff"]) + + provider = n2d2.provider.DataProvider(db, [10, 49, 1], batch_size=5) + deepnet_cell = n2d2_net_test.get_deepnet_cell() + # remove SoftMax + deepnet_cell.remove(deepnet_cell[-1].get_name()) + # Generating CPP export + n2d2.export.export_cpp( + deepnet_cell, + provider=provider, + nb_bits=8, + calibration=1, + export_nb_stimuli_max=0) + export_generated = exists("./export_CPP_int8") + + self.assertTrue(export_generated) + if not export_generated: + remove("./export_CPP_int8") + + def test_resnet_CPP(self): + + net_test=get_resnet_model() + n2d2_net_test = keras_interoperability.wrap(net_test, batch_size=5, for_export=True) + input_test= uniform([5, 32, 32, 3]) + keras_out = net_test(input_test) + n2d2_out = n2d2_net_test(input_test) + print("Keras output :") + print(keras_out) + print("N2D2 output :") + print(n2d2_out) + self.check_tensor_equality(keras_out, n2d2_out) + + print("Model have been wrapped !") + # Importing data for calibration. + db = n2d2.database.DIR(DATA_PATH+"CIFAR-10-images/test", + learn=0.4, + validation=0.2, + random_partitioning=True, + depth=1, + valid_extensions=["jpg"]) + + provider = n2d2.provider.DataProvider(db, [32, 32, 3], batch_size=5) + deepnet_cell = n2d2_net_test.get_deepnet_cell() + # remove SoftMax + deepnet_cell.remove(deepnet_cell[-1].get_name()) + # Generating CPP export + n2d2.export.export_cpp( + deepnet_cell, + provider=provider, + nb_bits=8, + calibration=1, + export_no_unsigned=True, + export_nb_stimuli_max=0 + ) + export_generated = exists("./export_CPP_int8") + + self.assertTrue(export_generated) + if not export_generated: + remove("./export_CPP_int8") + + def test_mobilenet_CPP(self): + + net_test=get_mobilenet_model() + n2d2_net_test = keras_interoperability.wrap(net_test, batch_size=5, for_export=True) + input_test= uniform([5, 96, 96, 3]) + keras_out = net_test(input_test) + n2d2_out = n2d2_net_test(input_test) + print("Keras output :") + print(keras_out) + print("N2D2 output :") + print(n2d2_out) + self.check_tensor_equality(keras_out, n2d2_out) + + print("Model have been wrapped !") + # Importing data for calibration. + db = n2d2.database.DIR(DATA_PATH+"vw_coco2014_96", + learn=0.8, validation=0.2, random_partitioning=True) + + provider = n2d2.provider.DataProvider(db, [96, 96, 3], batch_size=5) + + deepnet_cell = n2d2_net_test.get_deepnet_cell() + + # remove SoftMax + deepnet_cell.remove(deepnet_cell[-1].get_name()) + + # Generating CPP export + n2d2.export.export_cpp( + deepnet_cell, + provider=provider, + nb_bits=8, + calibration=1, + export_nb_stimuli_max=0) + export_generated = exists("./export_CPP_int8") + + self.assertTrue(export_generated) + if not export_generated: + remove("./export_CPP_int8") + +if __name__ == '__main__': + """ + You need to add this line for the tests to be run. + """ + unittest.main() \ No newline at end of file diff --git a/python/tests/test_pytorch.py b/python/tests/test_pytorch.py index 705de831..079d85e5 100755 --- a/python/tests/test_pytorch.py +++ b/python/tests/test_pytorch.py @@ -722,6 +722,58 @@ def test_LeNet_GPU(self): res = tester.test_multiple_step((batch_size, 1, 32, 32), (batch_size, 10)) self.assertNotEqual(res, -1, msg="CUDA eval failed") + def test_incomplete_batch(self): + n2d2.global_variables.default_model = "Frame_CUDA" + weight_value = 0.01 + learning_rate = 0.01 + first_stimuli = torch.randn((10, 1, 3, 3)) # Stimuli with batch_size = 10 + incomplete_stimuli = torch.randn((5, 1, 3, 3)) # Stimuli with batch_size = 5 + torch_model = TorchConv() + n2d2_model = pytorch.wrap(torch_model, (10, 1, 3, 3)) + + n2d2_model.get_block().set_solver(n2d2.solver.SGD(learning_rate=learning_rate, momentum=0.0, decay=0.0, learning_rate_decay=0.993)) + + torch_out2 = torch_model(incomplete_stimuli) + n2d2_out2 = n2d2_model(incomplete_stimuli) + # Testing the incomplete batch : + for i, j in zip(torch.flatten(torch_out2), torch.flatten(n2d2_out2)): + i = i.item() + j = j.item() + if j != 0: + self.assertFalse(abs(i-j) > comparison_precision * abs(j)) + # # print(torch_out2) + # # print(n2d2_out2) + + incomplete_stimuli_label = torch.randn((5, 1, 3, 3)) # Stimuli with batch_size = 5 + optimizer_torch = torch.optim.SGD(torch_model.parameters(), lr=learning_rate) + optimizer_n2d2 = torch.optim.SGD(n2d2_model.parameters(), lr=learning_rate) + criterion_torch = torch.nn.MSELoss() + criterion_n2d2 = torch.nn.MSELoss() + + loss1 = criterion_torch(torch_out2, incomplete_stimuli_label) + optimizer_torch.zero_grad() + loss1.backward() + optimizer_torch.step() + + loss2 = criterion_n2d2(n2d2_out2, incomplete_stimuli_label) + optimizer_n2d2.zero_grad() + loss2.backward() + optimizer_n2d2.step() + # print(loss1) + # print(loss2) + self.assertEqual(loss1.item(), loss2.item()) + # Testing a complete batch after backpropagation + torch_out2 = torch_model(first_stimuli) + n2d2_out2 = n2d2_model(first_stimuli) + + for i, j in zip(torch.flatten(torch_out2), torch.flatten(n2d2_out2)): + i = i.item() + j = j.item() + if j != 0: + self.assertFalse(abs(i-j) > comparison_precision * abs(j)) + print(torch_out2) + print(n2d2_out2) + n2d2.global_variables.default_model = "Frame" if __name__ == '__main__': unittest.main() diff --git a/python/tests/tiny_ml_keras/anomaly_model.py b/python/tests/tiny_ml_keras/anomaly_model.py new file mode 100644 index 00000000..0bc91067 --- /dev/null +++ b/python/tests/tiny_ml_keras/anomaly_model.py @@ -0,0 +1,67 @@ +""" + @github: https://github.com/mlcommons/tiny + @license: https://apache.org/licenses/LICENSE-2.0 + @file keras_model.py + @brief Script for keras model definition + @author Toshiki Nakamura, Yuki Nikaido, and Yohei Kawaguchi (Hitachi Ltd.) + Copyright (C) 2020 Hitachi, Ltd. All right reserved. +""" + +######################################################################## +# import python-library +######################################################################## +# from import +from tensorflow.keras.models import Model +from tensorflow.keras.layers import Input, Dense, BatchNormalization, Activation + + +######################################################################## +# keras model +######################################################################## +def get_model(inputDim): + """ + define the keras model + the model based on the simple dense auto encoder + (128*128*128*128*8*128*128*128*128) + """ + inputLayer = Input(shape=(inputDim,)) + + h = Dense(128)(inputLayer) + h = BatchNormalization()(h) + h = Activation('relu')(h) + + h = Dense(128)(h) + h = BatchNormalization()(h) + h = Activation('relu')(h) + + h = Dense(128)(h) + h = BatchNormalization()(h) + h = Activation('relu')(h) + + h = Dense(128)(h) + h = BatchNormalization()(h) + h = Activation('relu')(h) + + h = Dense(8)(h) + h = BatchNormalization()(h) + h = Activation('relu')(h) + + h = Dense(128)(h) + h = BatchNormalization()(h) + h = Activation('relu')(h) + + h = Dense(128)(h) + h = BatchNormalization()(h) + h = Activation('relu')(h) + + h = Dense(128)(h) + h = BatchNormalization()(h) + h = Activation('relu')(h) + + h = Dense(128)(h) + h = BatchNormalization()(h) + h = Activation('relu')(h) + + h = Dense(inputDim)(h) + + return Model(inputs=inputLayer, outputs=h) diff --git a/python/tests/tiny_ml_keras/kws_model.py b/python/tests/tiny_ml_keras/kws_model.py new file mode 100644 index 00000000..1ddf1c64 --- /dev/null +++ b/python/tests/tiny_ml_keras/kws_model.py @@ -0,0 +1,142 @@ +''' +@author: https://github.com/mlcommons/tiny +@license: https://apache.org/licenses/LICENSE-2.0 +''' +import tensorflow as tf +from tensorflow import keras +import numpy as np + +# Note on channel location +# channels_first corresponds to inputs with shape batch_shape + (channels, spatial_dim1, spatial_dim2) or NCHW +# Channel_last (the default) corresponds to batch_shape + (spatial_dim1, spatial_dim2, channels) or NHWC + +# Input is a single dimension array of length = (spectrogram length x nb of bins) +# Can be given as it is for FC. But shall be reshaped to 2D for convolution + + +class kws_dscnn (tf.keras.Sequential): + def __init__(self, INPUT_H, INPUT_W, nb_of_words, + BNorm=True, DO_rate=0.0, + kernel_initializer=keras.initializers.TruncatedNormal(stddev=0.01), + model_name='dscnn', for_tflite =False): + super(kws_dscnn, self).__init__(name=model_name) #Mandatory + + # (None, InputH, InputW, 1 channel) + self.dim_h=INPUT_H + self.dim_w=INPUT_W + self.BNorm=BNorm + self.words=nb_of_words + self.DO_rate=DO_rate + self.kernel_initializer=kernel_initializer + self.ReLU_max=None + + # TFLITE does no support GlobalAveragePooling layer + # Modification of network shall be done in this case + # Refer to bug (b/144955155) in tensorflow github + self.tflite=for_tflite + + # Model description + self.add(keras.Input(shape=(self.dim_h, self.dim_w,1), name="input_49_10")) + + self._conv_block(64, kernel=(10,4), strides=(2, 2)) + + for i in range(1,5): + self._dw_pw_conv_blocks(64, block_id=i) + + # Take the stride 2,2 into account + self.dim_h=int(np.ceil(self.dim_h/float(2))) + self.dim_w=int(np.ceil(self.dim_w/float(2))) + + if self.tflite: + self.add( + keras.layers.AveragePooling2D(pool_size=(self.dim_h,self.dim_w)) + ) + self.add( + keras.layers.Flatten() + ) + else: + self.add( + keras.layers.GlobalAveragePooling2D(name='global_avg_pooling') + ) + + self.add( + keras.layers.Dense(nb_of_words, + activation='softmax', + use_bias=True, + kernel_initializer=kernel_initializer, + name='final_fc') + ) + self.summary() + + def _conv_block(self, filters, kernel, strides): + """ + Full convolution + Dropout at input and output if rate>0 + + """ + #channel_axis = 1 if keras.backend.image_data_format() == 'channels_first' else -1 + self.add ( + keras.layers.Conv2D( + filters, + kernel, + padding='same', + use_bias=True, + strides=strides, + kernel_initializer=self.kernel_initializer, + name='conv0' + ) + ) + self.add(keras.layers.ReLU(self.ReLU_max, name='conv0_relu')) + self.add(keras.layers.Dropout(self.DO_rate)) + + def _dw_pw_conv_blocks(self, pointwise_conv_filters, block_id=1): + """ + Sequence of Depthwise + Pointwise convolutions + Number of DW filters inherits from previous layer + Pointwise kernel is 1x1 + BatchNormalization and dropout included + + """ + + # Default is channel_last, HWC + channel_axis = 1 if keras.backend.image_data_format() == 'channels_first' else -1 + + # DW + self.add( + keras.layers.DepthwiseConv2D((3, 3), + use_bias=True, + padding='same', + kernel_initializer=self.kernel_initializer, + name='conv_dw_%d' % block_id) + ) + if self.BNorm: + self.add( + keras.layers.BatchNormalization(axis=channel_axis, name='conv_dw_%d_bn' % block_id) + ) + self.add( + keras.layers.ReLU(self.ReLU_max, name='conv_dw_%d_relu' % block_id) + ) + self.add( + keras.layers.Dropout(self.DO_rate) + ) + + # PW + self.add( + keras.layers.Conv2D( + pointwise_conv_filters, (1, 1), + padding='same', + use_bias=True, + strides=(1, 1), + kernel_initializer=self.kernel_initializer, + name='conv_pw_%d' % block_id) + ) + if self.BNorm: + self.add( + keras.layers.BatchNormalization(axis=channel_axis, name='conv_pw_%d_bn' % block_id) + ) + self.add( + keras.layers.ReLU(self.ReLU_max, name='conv_pw_%d_relu' % block_id) + ) + self.add( + keras.layers.Dropout(self.DO_rate) + ) diff --git a/python/tests/tiny_ml_keras/resnet_model.py b/python/tests/tiny_ml_keras/resnet_model.py new file mode 100644 index 00000000..1cfe4d1a --- /dev/null +++ b/python/tests/tiny_ml_keras/resnet_model.py @@ -0,0 +1,187 @@ +''' +@author: https://github.com/mlcommons/tiny +@license: https://apache.org/licenses/LICENSE-2.0 + +MLCommons +group: TinyMLPerf (https://github.com/mlcommons/tiny) +image classification on cifar10 +keras_model.py: CIFAR10_ResNetv1 from eembc +''' + +import numpy as np + +import tensorflow as tf +from tensorflow.keras.models import Model +from tensorflow.keras.layers import Input, Dense, Activation, Flatten, BatchNormalization +from tensorflow.keras.layers import Conv2D, AveragePooling2D, MaxPooling2D +from tensorflow.keras.regularizers import l2 + +#get model +def get_model_name(): + if os.path.exists("trained_models/trainedResnet.h5"): + return "trainedResnet" + else: + return "pretrainedResnet" + +def get_quant_model_name(): + if os.path.exists("trained_models/trainedResnet.h5"): + return "trainedResnet" + else: + return "pretrainedResnet" + +#define model +def resnet_v1_eembc(): + # Resnet parameters + input_shape=[32,32,3] # default size for cifar10 + num_classes=10 # default class number for cifar10 + num_filters = 16 # this should be 64 for an official resnet model + + # Input layer, change kernel size to 7x7 and strides to 2 for an official resnet + inputs = Input(shape=input_shape) + x = Conv2D(num_filters, + kernel_size=3, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(inputs) + x = BatchNormalization()(x) + x = Activation('relu')(x) + #x = MaxPooling2D(pool_size=(2, 2))(x) # uncomment this for official resnet model + + + # First stack + + # Weight layers + y = Conv2D(num_filters, + kernel_size=3, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + y = BatchNormalization()(y) + y = Activation('relu')(y) + y = Conv2D(num_filters, + kernel_size=3, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(y) + y = BatchNormalization()(y) + + # Overall residual, connect weight layer and identity paths + x = tf.keras.layers.add([x, y]) + x = Activation('relu')(x) + + + # Second stack + + # Weight layers + num_filters = 32 # Filters need to be double for each stack + y = Conv2D(num_filters, + kernel_size=3, + strides=2, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + y = BatchNormalization()(y) + y = Activation('relu')(y) + y = Conv2D(num_filters, + kernel_size=3, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(y) + y = BatchNormalization()(y) + + # Adjust for change in dimension due to stride in identity + x = Conv2D(num_filters, + kernel_size=1, + strides=2, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + + # Overall residual, connect weight layer and identity paths + x = tf.keras.layers.add([x, y]) + x = Activation('relu')(x) + + + # Third stack + + # Weight layers + num_filters = 64 + y = Conv2D(num_filters, + kernel_size=3, + strides=2, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + y = BatchNormalization()(y) + y = Activation('relu')(y) + y = Conv2D(num_filters, + kernel_size=3, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(y) + y = BatchNormalization()(y) + + # Adjust for change in dimension due to stride in identity + x = Conv2D(num_filters, + kernel_size=1, + strides=2, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + + # Overall residual, connect weight layer and identity paths + x = tf.keras.layers.add([x, y]) + x = Activation('relu')(x) + + + # Fourth stack. + # While the paper uses four stacks, for cifar10 that leads to a large increase in complexity for minor benefits + # Uncomments to use it + +# # Weight layers +# num_filters = 128 +# y = Conv2D(num_filters, +# kernel_size=3, +# strides=2, +# padding='same', +# kernel_initializer='he_normal', +# kernel_regularizer=l2(1e-4))(x) +# y = BatchNormalization()(y) +# y = Activation('relu')(y) +# y = Conv2D(num_filters, +# kernel_size=3, +# strides=1, +# padding='same', +# kernel_initializer='he_normal', +# kernel_regularizer=l2(1e-4))(y) +# y = BatchNormalization()(y) +# +# # Adjust for change in dimension due to stride in identity +# x = Conv2D(num_filters, +# kernel_size=1, +# strides=2, +# padding='same', +# kernel_initializer='he_normal', +# kernel_regularizer=l2(1e-4))(x) +# +# # Overall residual, connect weight layer and identity paths +# x = tf.keras.layers.add([x, y]) +# x = Activation('relu')(x) + + + # Final classification layer. + pool_size = int(np.amin(x.shape[1:3])) + x = AveragePooling2D(pool_size=pool_size)(x) + y = Flatten()(x) + outputs = Dense(num_classes, + activation='softmax', + kernel_initializer='he_normal')(y) + + # Instantiate model. + model = Model(inputs=inputs, outputs=outputs) + return model diff --git a/python/tests/tiny_ml_keras/vww_model.py b/python/tests/tiny_ml_keras/vww_model.py new file mode 100644 index 00000000..1d9f88fe --- /dev/null +++ b/python/tests/tiny_ml_keras/vww_model.py @@ -0,0 +1,294 @@ +''' +@author: https://github.com/mlcommons/tiny +@license: https://apache.org/licenses/LICENSE-2.0 + +MobilnetV1 from Silican Labs github page: +https://github.com/SiliconLabs/platform_ml_models/blob/master/eembc/Person_detection/mobilenet_v1_eembc.py +''' + +# from import +from tensorflow.keras.models import Model +from tensorflow.keras.layers import Input, Dense, Activation, Flatten, BatchNormalization +from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, AveragePooling2D, MaxPooling2D + +from tensorflow.keras.regularizers import l2 + + +#define model +def mobilenet_v1(): + # Mobilenet parameters + input_shape = [96,96,3] # resized to 96x96 per EEMBC requirement + num_classes = 2 # person and non-person + num_filters = 8 # normally 32, but running with alpha=.25 per EEMBC requirement + + inputs = Input(shape=input_shape) + x = inputs # Keras model uses ZeroPadding2D() + + # 1st layer, pure conv + # Keras 2.2 model has padding='valid' and disables bias + x = Conv2D(num_filters, + kernel_size=3, + strides=2, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) # Keras uses ReLU6 instead of pure ReLU + + # 2nd layer, depthwise separable conv + # Filter size is always doubled before the pointwise conv + # Keras uses ZeroPadding2D() and padding='valid' + x = DepthwiseConv2D(kernel_size=3, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + num_filters = 2*num_filters + x = Conv2D(num_filters, + kernel_size=1, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + # 3rd layer, depthwise separable conv + x = DepthwiseConv2D(kernel_size=3, + strides=2, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + num_filters = 2*num_filters + x = Conv2D(num_filters, + kernel_size=1, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + # 4th layer, depthwise separable conv + x = DepthwiseConv2D(kernel_size=3, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + x = Conv2D(num_filters, + kernel_size=1, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + # 5th layer, depthwise separable conv + x = DepthwiseConv2D(kernel_size=3, + strides=2, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + num_filters = 2*num_filters + x = Conv2D(num_filters, + kernel_size=1, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + # 6th layer, depthwise separable conv + x = DepthwiseConv2D(kernel_size=3, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + x = Conv2D(num_filters, + kernel_size=1, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + # 7th layer, depthwise separable conv + x = DepthwiseConv2D(kernel_size=3, + strides=2, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + num_filters = 2*num_filters + x = Conv2D(num_filters, + kernel_size=1, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + # 8th-12th layers, identical depthwise separable convs + # 8th + x = DepthwiseConv2D(kernel_size=3, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + x = Conv2D(num_filters, + kernel_size=1, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + # 9th + x = DepthwiseConv2D(kernel_size=3, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + x = Conv2D(num_filters, + kernel_size=1, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + # 10th + x = DepthwiseConv2D(kernel_size=3, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + x = Conv2D(num_filters, + kernel_size=1, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + # 11th + x = DepthwiseConv2D(kernel_size=3, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + x = Conv2D(num_filters, + kernel_size=1, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + # 12th + x = DepthwiseConv2D(kernel_size=3, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + x = Conv2D(num_filters, + kernel_size=1, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + # 13th layer, depthwise separable conv + x = DepthwiseConv2D(kernel_size=3, + strides=2, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + num_filters = 2*num_filters + x = Conv2D(num_filters, + kernel_size=1, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + # 14th layer, depthwise separable conv + x = DepthwiseConv2D(kernel_size=3, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + x = Conv2D(num_filters, + kernel_size=1, + strides=1, + padding='same', + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4))(x) + x = BatchNormalization()(x) + x = Activation('relu')(x) + + # Average pooling, max polling may be used also + # Keras employs GlobalAveragePooling2D + x = AveragePooling2D(pool_size=x.shape[1:3])(x) + #x = MaxPooling2D(pool_size=x.shape[1:3])(x) + + # Keras inserts Dropout() and a pointwise Conv2D() here + # We are staying with the paper base structure + + # Flatten, FC layer and classify + x = Flatten()(x) + outputs = Dense(num_classes, activation='softmax')(x) + + # Instantiate model. + model = Model(inputs=inputs, outputs=outputs) + return model diff --git a/src/Export/CellExport.cpp b/src/Export/CellExport.cpp index 3d5877d9..1df7d8e4 100755 --- a/src/Export/CellExport.cpp +++ b/src/Export/CellExport.cpp @@ -26,7 +26,7 @@ #include "Export/DeepNetExport.hpp" #include "utils/Utils.hpp" -N2D2::CellExport::Precision N2D2::CellExport::mPrecision = Int8; +N2D2::CellExport::Precision N2D2::CellExport::mPrecision = Float64; N2D2::CellExport::IntApprox N2D2::CellExport::mIntApprox = Round; void N2D2::CellExport::generate(Cell& cell, diff --git a/src/Histogram.cpp b/src/Histogram.cpp index 4f976657..3f00f99d 100644 --- a/src/Histogram.cpp +++ b/src/Histogram.cpp @@ -125,9 +125,10 @@ void N2D2::Histogram::log(const std::string& fileName, const std::unordered_map& thresholds) const { const std::string dirName = Utils::dirName(fileName); - - if (!dirName.empty()) + if (!dirName.empty()){ + #pragma omp critical(createHistogramDir) Utils::createDirectories(dirName); + } std::ofstream histData(fileName.c_str()); diff --git a/src/python/Cell/pybind_Cell.cpp b/src/python/Cell/pybind_Cell.cpp index 9fb91131..17bd28bb 100755 --- a/src/python/Cell/pybind_Cell.cpp +++ b/src/python/Cell/pybind_Cell.cpp @@ -257,6 +257,7 @@ void init_Cell(py::module &m) { .def("getParentsCells", &Cell::getParentsCells) // .def("isConnection", &Cell::isConnection, py::arg("channel"), py::arg("output")) .def("getMapping", &Cell::getMapping) + .def("getQuantizedNbBits", &Cell::getQuantizedNbBits) ; } } diff --git a/src/python/utils/pybind_helper.cpp b/src/python/utils/pybind_helper.cpp index 30e4e8ce..db8807c2 100644 --- a/src/python/utils/pybind_helper.cpp +++ b/src/python/utils/pybind_helper.cpp @@ -85,6 +85,8 @@ namespace N2D2 { m.def("learn_epoch", &learn_epoch, py::arg("opt"), py::arg("deepNet")); m.def("test", &test, py::arg("opt"), py::arg("deepNet"), py::arg("afterCalibration")); m.def("generateExport", &generateExport, py::arg("opt"), py::arg("deepNet")); + m.def("calibNetwork", &calibNetwork, py::arg("opt"), py::arg("deepNet")); + m.def("generateExportFromCalibration", &generateExportFromCalibration, py::arg("opt"), py::arg("deepNet"), py::arg("fileName")=""); #ifdef CUDA m.def("setCudaDeviceOption", &setCudaDeviceOption, py::arg("value")); m.def("setMultiDevices", &setMultiDevices, py::arg("cudaDev")); diff --git a/src/utils/Helper.cpp b/src/utils/Helper.cpp index 4c9b4267..da971781 100755 --- a/src/utils/Helper.cpp +++ b/src/utils/Helper.cpp @@ -94,8 +94,8 @@ namespace N2D2_HELPER{ } } std::copy(devices.begin(), - devices.end(), - std::ostream_iterator(devString, " ")); + devices.end(), + std::ostream_iterator(devString, " ")); #ifdef WIN32 _putenv_s("N2D2_GPU_DEVICES", devString.str().c_str()); @@ -277,6 +277,7 @@ namespace N2D2_HELPER{ "test dataset)"); calibrationReload = opts.parse("-calib-reload", "reload and reuse the data of a " " previous calibration."); + calibOnly = opts.parse("-calibOnly", "perform standalone calibration, no export"); cRoundMode = weightsScalingMode( opts.parse("-c-round-mode", std::string("NONE"), "clip clipping mode on export, " @@ -375,25 +376,18 @@ namespace N2D2_HELPER{ if(opt.qatSAT) { deepNet->initialize(); //deepNet->exportNetworkFreeParameters("weights_init"); - std::cout << "N2D2_IP : " << std::endl; - #ifdef N2D2_IP - std::cout << "N2D2_IP is true" << std::endl; - + #ifdef N2D2_IP if (opt.logKernels) deepNet->logFreeParameters("kernels_fake_quantized"); DeepNetQAT dnQAT(*deepNet); - dnQAT.fuseQATGraph(*sp, opt.actScalingMode, opt.wtRoundMode, opt.wtRoundMode, opt.wtRoundMode); + dnQAT.fuseQATGraph(*sp, opt.actScalingMode, opt.wtRoundMode, opt.bRoundMode, opt.cRoundMode); DrawNet::drawGraph(*deepNet, Utils::baseName(opt.iniConfig)); if (opt.logKernels) deepNet->logFreeParameters("kernels_quantized"); #endif - //deepNet->exportNetworkFreeParameters("weights_quantized"); - - } - else { - deepNet->initialize(); + deepNet->exportNetworkFreeParameters("weights_quantized"); } startTimeSp = std::chrono::high_resolution_clock::now(); @@ -579,12 +573,9 @@ namespace N2D2_HELPER{ << "% / Informedness: " << (100.0 * targetScore->getAverageScore(Database::Test, ConfusionTableMetric::Informedness)) - << "% / IU: " << (100.0 - * targetScore->getAverageScore(Database::Test, - ConfusionTableMetric::IU)) << "%\n" << std::endl; } - + std::shared_ptr targetBBox = std::dynamic_pointer_cast(*itTargets); @@ -618,8 +609,7 @@ namespace N2D2_HELPER{ } } - - bool generateExport(const Options& opt, std::shared_ptr& deepNet) { + bool calibNetwork(const Options& opt, std::shared_ptr& deepNet) { const std::shared_ptr& database = deepNet->getDatabase(); const std::shared_ptr& sp = deepNet->getStimuliProvider(); @@ -630,9 +620,18 @@ namespace N2D2_HELPER{ if(!opt.qatSAT) { deepNet->fuseBatchNorm(); } - const std::string exportDir = "export_" + opt.genExport + "_" + - ((opt.nbBits > 0) ? "int" : "float") + - std::to_string(std::abs(opt.nbBits)); + std::string exportDir; + if(opt.genExport.empty()){ + + // calibration without export ! + exportDir = "calib_"+ ((deepNet->getName().empty() ? "network": deepNet->getName())) + + "_" + ((opt.nbBits > 0) ? "int" : "float") + + std::to_string(std::abs(opt.nbBits)); + }else{ + exportDir = "export_" + opt.genExport + "_" + + ((opt.nbBits > 0) ? "int" : "float") + + std::to_string(std::abs(opt.nbBits)); + } Database::StimuliSet dbSet = (database->getNbStimuli(Database::Validation) > 0) @@ -762,15 +761,6 @@ namespace N2D2_HELPER{ afterCalibration = true; } - - sp->logTransformations(exportDir + "/transformations.dot", Database::TestOnly); - if(!opt.qatSAT) { - - StimuliProviderExport::generate(*deepNet, *sp, exportDir + "/stimuli", opt.genExport, Database::Test, - DeepNetExport::mEnvDataUnsigned, CellExport::mPrecision, - opt.exportNbStimuliMax); - } - if(opt.qatSAT) { deepNet->initialize(); #ifdef N2D2_IP @@ -778,81 +768,49 @@ namespace N2D2_HELPER{ deepNet->logFreeParameters("kernels_fake_quantized"); DeepNetQAT dnQAT(*deepNet); - dnQAT.fuseQATGraph(*sp, opt.actScalingMode, opt.wtRoundMode, opt.wtRoundMode, opt.wtRoundMode); + dnQAT.fuseQATGraph(*sp, opt.actScalingMode, opt.wtRoundMode, opt.bRoundMode, opt.cRoundMode); DrawNet::drawGraph(*deepNet, Utils::baseName(opt.iniConfig)); - /* - Utils::createDirectories(exportDir + "/range_qat"); - const std::string outputsRangeFile = exportDir + "/range_qat/outputs_range.bin"; - const std::string outputsHistogramFile = exportDir + "/range_qat/outputs_histogram.bin"; - std::unordered_map outputsRange; - std::unordered_map outputsHistogram; - - - const std::size_t batchSize = sp->getMultiBatchSize(); - const std::size_t nbBatches = std::ceil(1.0*nbStimuli/batchSize); - - std::cout << "Calculating calibration data range and histogram..." << std::endl; - std::size_t nextReport = opt.report; - - // Globally disable logistic activation, in order to evaluate the - // correct range and shifting required for layers with logistic - LogisticActivationDisabled = true; - - sp->readBatch(dbSet, 0); - for(std::size_t b = 1; b <= nbBatches; ++b) { - const std::size_t istimulus = b * batchSize; - - sp->synchronize(); - - // TODO Use a pool of threads - auto reportTask = std::async(std::launch::async, [&]() { - #ifdef CUDA - CudaContext::setDevice(cudaDevice); + StimuliProviderExport::generate(*deepNet, *sp, exportDir + "/stimuli", opt.genExport, Database::Test, + DeepNetExport::mEnvDataUnsigned, CellExport::mPrecision, + opt.exportNbStimuliMax); + dnQAT.exportOutputsLayers(*sp, exportDir + "/stimuli", Database::Test, opt.exportNbStimuliMax); #endif - deepNet->test(dbSet); - dnQAT.reportOutputsRange(outputsRange); - dnQAT.reportOutputsHistogram(outputsHistogram, outputsRange, - 4, opt.actClippingMode); - }); - - if(b < nbBatches) { - sp->future(); - sp->readBatch(dbSet, istimulus); - } - - reportTask.wait(); - - if(istimulus >= nextReport && b < nbBatches) { - nextReport += opt.report; - std::cout << "Calibration data " << istimulus << "/" << nbStimuli << std::endl; - } - } - - LogisticActivationDisabled = false; + } + return afterCalibration; + } + void generateExportFromCalibration(const Options& opt, std::shared_ptr& deepNet, std::string fileName){ + const std::shared_ptr& database = deepNet->getDatabase(); + const std::shared_ptr& sp = deepNet->getStimuliProvider(); + // TODO : add an option to override export folder name + + std::string exportDir; + if (!fileName.empty()) + exportDir = fileName; + else + exportDir = "export_" + opt.genExport + "_" + + ((opt.nbBits > 0) ? "int" : "float") + + std::to_string(std::abs(opt.nbBits)); - RangeStats::saveOutputsRange(outputsRangeFile, outputsRange); - Histogram::saveOutputsHistogram(outputsHistogramFile, outputsHistogram); - RangeStats::logOutputsRange(exportDir + "/range_qat/outputs_range.dat", outputsRange); - Histogram::logOutputsHistogram(exportDir + "/range_qat/outputs_histogram", outputsHistogram, - 4, opt.actClippingMode); + sp->logTransformations(exportDir + "/transformations.dot", Database::TestOnly); + if(!opt.qatSAT) { - */ StimuliProviderExport::generate(*deepNet, *sp, exportDir + "/stimuli", opt.genExport, Database::Test, DeepNetExport::mEnvDataUnsigned, CellExport::mPrecision, opt.exportNbStimuliMax); - - dnQAT.exportOutputsLayers(*sp, exportDir + "/stimuli", Database::Test, opt.exportNbStimuliMax); - #endif } DeepNetExport::generate(*deepNet, exportDir, opt.genExport); deepNet->exportNetworkFreeParameters("weights_export"); + } - return afterCalibration; + bool generateExport(const Options& opt, std::shared_ptr& deepNet) { + bool afterCalibration = calibNetwork(opt, deepNet); + generateExportFromCalibration(opt, deepNet); + return afterCalibration; } void findLearningRate(const Options& opt, std::shared_ptr& deepNet) { @@ -1346,9 +1304,6 @@ namespace N2D2_HELPER{ << "% / Informedness: " << (100.0 * targetScore->getAverageScore(Database::Validation, ConfusionTableMetric::Informedness)) - << "% / IU: " << (100.0 - * targetScore->getAverageScore(Database::Validation, - ConfusionTableMetric::IU)) << "%\n" << std::endl; if (!bestValidation) { diff --git a/tests/Export/class_CPP_Export.cpp b/tests/Export/class_CPP_Export.cpp index a86594c2..bef6e4fa 100755 --- a/tests/Export/class_CPP_Export.cpp +++ b/tests/Export/class_CPP_Export.cpp @@ -69,6 +69,7 @@ TEST(CPP_Export, generateMemory) { UnitTest::FileWriteContent("net_test.ini", data); Network net(SEED,false); + CellExport::mPrecision = static_cast(8); std::shared_ptr deepNet = DeepNetGenerator::generate(net, "net_test.ini"); @@ -185,6 +186,7 @@ TEST_DATASET(CPP_Export, UnitTest::FileWriteContent("net_test.ini", data); Network net(SEED,false); + CellExport::mPrecision = static_cast(8); std::shared_ptr deepNet = DeepNetGenerator::generate(net, "net_test.ini"); @@ -375,6 +377,8 @@ TEST_DATASET(CPP_Export, std::make_tuple(true, MemoryManager::OptimizeMaxHoleMaxLifetimeFirst, 112*112*64 + 112*64*2)) { Network net(SEED,false); + CellExport::mPrecision = static_cast(8); + std::shared_ptr deepNet = DeepNetGenerator::generate(net, "tests_data/ResNet-18.ini");