-
Notifications
You must be signed in to change notification settings - Fork 134
/
openmv_sparse_training.patch
634 lines (578 loc) · 23.7 KB
/
openmv_sparse_training.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
diff --git a/src/hal/cmsis/include/arm_nnsupportfunctions.h b/src/hal/cmsis/include/arm_nnsupportfunctions.h
index 84601904..abc6fe04 100644
--- a/src/hal/cmsis/include/arm_nnsupportfunctions.h
+++ b/src/hal/cmsis/include/arm_nnsupportfunctions.h
@@ -39,6 +39,9 @@ extern "C"
{
#endif
+#define MAX(A,B) ((A) > (B) ? (A) : (B))
+#define MIN(A,B) ((A) < (B) ? (A) : (B))
+#define CLAMP(x, h, l) MAX(MIN((x), (h)), (l))
/**
* @brief Union for SIMD access of Q31/Q15/Q7 types
*/
@@ -179,6 +182,7 @@ void arm_nn_mult_q15(
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
*/
+
void arm_nn_mult_q7(
q7_t * pSrcA,
q7_t * pSrcB,
@@ -186,6 +190,128 @@ void arm_nn_mult_q7(
const uint16_t out_shift,
uint32_t blockSize);
+//TinyEngine required
+#define LEFT_SHIFT(_shift) (_shift > 0 ? _shift : 0)
+#define RIGHT_SHIFT(_shift) (_shift > 0 ? 0 : -_shift)
+#define Q31_MAX ((q31_t)(0x7FFFFFFFL))
+#define Q31_MIN ((q31_t)(0x80000000L))
+
+static __INLINE void write_q15x2_ia (
+ q15_t ** pQ15,
+ q31_t value)
+ {
+ q31_t val = value;
+ #ifdef __ARM_FEATURE_UNALIGNED
+ memcpy (*pQ15, &val, 4);
+ #else
+ (*pQ15)[0] = (val & 0x0FFFF);
+ (*pQ15)[1] = (val >> 16) & 0x0FFFF;
+ #endif
+
+ *pQ15 += 2;
+ }
+
+/**
+ @brief Read 2 q15 elements and post increment pointer.
+ @param[in] in_q15 Pointer to pointer that holds address of input.
+ @return q31 value
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_read_q15x2_ia(const q15_t **in_q15)
+{
+ q31_t val;
+
+ memcpy(&val, *in_q15, 4);
+ *in_q15 += 2;
+
+ return (val);
+}
+
+/**
+ * @brief Saturating doubling high multiply. Result matches
+ * NEON instruction VQRDMULH.
+ * @param[in] m1 Multiplicand
+ * @param[in] m2 Multiplier
+ * @return Result of multiplication.
+ *
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_sat_doubling_high_mult(const q31_t m1, const q31_t m2)
+{
+ q31_t result = 0;
+ // Rounding offset to add for a right shift of 31
+ q63_t mult = 1 << 30;
+
+ if ((m1 < 0) ^ (m2 < 0))
+ {
+ mult = 1 - mult;
+ }
+ // Gets resolved as a SMLAL instruction
+ mult = mult + (q63_t)m1 * m2;
+
+ // Utilize all of the upper 32 bits. This is the doubling step
+ // as well.
+ result = mult / (1UL << 31);
+
+ if ((m1 == m2) && (m1 == (int32_t)Q31_MIN))
+ {
+ result = Q31_MAX;
+ }
+ return result;
+}
+
+/**
+ * @brief Rounding divide by power of two.
+ * @param[in] dividend - Dividend
+ * @param[in] exponent - Divisor = power(2, exponent)
+ * Range: [0, 31]
+ * @return Rounded result of division. Midpoint is rounded away from zero.
+ *
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_divide_by_power_of_two(const q31_t dividend, const q31_t exponent)
+{
+ q31_t result = 0;
+
+ const q31_t remainder_mask = (1l << exponent) - 1;
+ int32_t remainder = remainder_mask & dividend;
+
+ // Basic division
+ result = dividend >> exponent;
+
+ // Adjust 'result' for rounding (mid point away from zero)
+ q31_t threshold = remainder_mask >> 1;
+ if (result < 0)
+ {
+ threshold++;
+ }
+ if (remainder > threshold)
+ {
+ result++;
+ }
+
+ return result;
+}
+
+__STATIC_FORCEINLINE q31_t arm_nn_requantize(const q31_t val, const q31_t multiplier, const q31_t shift)
+{
+ return arm_nn_divide_by_power_of_two(arm_nn_sat_doubling_high_mult(val * (1 << LEFT_SHIFT(shift)), multiplier),
+ RIGHT_SHIFT(shift));
+}
+
+/**
+ @brief Read 4 q7 from q7 pointer and post increment pointer.
+ @param[in] in_q7 Pointer to pointer that holds address of input.
+ @return q31 value
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_read_q7x4_ia(const q7_t **in_q7)
+{
+ q31_t val;
+ memcpy(&val, *in_q7, 4);
+ *in_q7 += 4;
+
+ return (val);
+}
+
+
+
/**
* @brief defition to adding rouding offset
*/
diff --git a/src/omv/Makefile b/src/omv/Makefile
index 159d07a5..239fa50a 100644
--- a/src/omv/Makefile
+++ b/src/omv/Makefile
@@ -96,6 +96,50 @@ SRCS += $(addprefix imlib/, \
zbar.c \
)
+SRCS += $(addprefix modules/TinyEngine/, \
+ codegen/Source/genModel.c \
+ codegen/Source/depthwise_kernel3x3_stride1_inplace_CHW_fpreq.c \
+ codegen/Source/depthwise_kernel3x3_stride2_inplace_CHW_fpreq.c \
+ codegen/Source/depthwise_kernel5x5_stride1_inplace_CHW_fpreq.c \
+ codegen/Source/depthwise_kernel7x7_stride1_inplace_CHW_fpreq.c \
+ codegen/Source/depthwise_kernel7x7_stride2_inplace_CHW_fpreq.c \
+ codegen/Source/depthwise_kernel3x3_stride1_inplace_CHW_fpreq_bitmask.c \
+ codegen/Source/depthwise_kernel3x3_stride2_inplace_CHW_fpreq_bitmask.c \
+ codegen/Source/depthwise_kernel5x5_stride1_inplace_CHW_fpreq_bitmask.c \
+ codegen/Source/depthwise_kernel7x7_stride1_inplace_CHW_fpreq_bitmask.c \
+ codegen/Source/depthwise_kernel7x7_stride2_inplace_CHW_fpreq_bitmask.c \
+ src/kernels/fp_requantize_op/add_fpreq.c \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch8_fpreq.c \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch16_fpreq.c \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch24_fpreq.c \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch48_fpreq.c \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_fpreq.c \
+ src/kernels/int_forward_op/avgpooling.c \
+ src/kernels/fp_requantize_op/convolve_s8_kernel3_inputch3_stride2_pad1_fpreq.c \
+ src/kernels/fp_requantize_op/mat_mul_kernels_fpreq.c \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_fpreq_mask.c \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_fpreq_mask_partialCH.c \
+ src/kernels/fp_backward_op/sum_4D_exclude_fp.c \
+ src/kernels/fp_backward_op/where_fp.c \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel3_stride1_inpad1_outpad0.c \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel3_stride2_inpad1_outpad1.c \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel5_stride1_inpad2_outpad0.c \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel5_stride2_inpad2_outpad1.c \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel7_stride1_inpad3_outpad0.c \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel7_stride2_inpad3_outpad1.c \
+ src/kernels/fp_backward_op/tte_exp_fp.c \
+ src/kernels/fp_backward_op/sub_fp.c \
+ src/kernels/fp_backward_op/mul_fp.c \
+ src/kernels/fp_backward_op/pointwise_conv_fp.c \
+ src/kernels/fp_backward_op/group_pointwise_conv_fp.c \
+ src/kernels/fp_backward_op/group_conv_fp_kernel4_stride1_pad0.c \
+ src/kernels/fp_backward_op/group_conv_fp_kernel8_stride1_pad0.c \
+ src/kernels/fp_backward_op/strided_slice_4Dto4D_fp.c \
+ src/kernels/fp_backward_op/sum_3D_fp.c \
+ src/kernels/fp_backward_op/nll_loss_fp.c \
+ src/kernels/fp_backward_op/log_softmax_fp.c \
+ )
+
SRCS += $(wildcard ports/$(PORT)/*.c)
OBJS = $(addprefix $(BUILD)/, $(SRCS:.c=.o))
diff --git a/src/omv/boards/OPENMV4/imlib_config.h b/src/omv/boards/OPENMV4/imlib_config.h
index fd395d87..262d527f 100644
--- a/src/omv/boards/OPENMV4/imlib_config.h
+++ b/src/omv/boards/OPENMV4/imlib_config.h
@@ -18,90 +18,90 @@
#define IMLIB_ENABLE_IMAGE_FILE_IO
// Enable LAB LUT
-#define IMLIB_ENABLE_LAB_LUT
+// #define IMLIB_ENABLE_LAB_LUT
// Enable YUV LUT
//#define IMLIB_ENABLE_YUV_LUT
// Enable mean pooling
-#define IMLIB_ENABLE_MEAN_POOLING
+// #define IMLIB_ENABLE_MEAN_POOLING
// Enable midpoint pooling
-#define IMLIB_ENABLE_MIDPOINT_POOLING
+// #define IMLIB_ENABLE_MIDPOINT_POOLING
// Enable binary ops
-#define IMLIB_ENABLE_BINARY_OPS
+// #define IMLIB_ENABLE_BINARY_OPS
// Enable math ops
-#define IMLIB_ENABLE_MATH_OPS
+// #define IMLIB_ENABLE_MATH_OPS
// Enable flood_fill()
-#define IMLIB_ENABLE_FLOOD_FILL
+// #define IMLIB_ENABLE_FLOOD_FILL
// Enable mean()
-#define IMLIB_ENABLE_MEAN
+// #define IMLIB_ENABLE_MEAN
// Enable median()
-#define IMLIB_ENABLE_MEDIAN
+// #define IMLIB_ENABLE_MEDIAN
// Enable mode()
-#define IMLIB_ENABLE_MODE
+// #define IMLIB_ENABLE_MODE
// Enable midpoint()
-#define IMLIB_ENABLE_MIDPOINT
+// #define IMLIB_ENABLE_MIDPOINT
// Enable morph()
-#define IMLIB_ENABLE_MORPH
+// #define IMLIB_ENABLE_MORPH
// Enable Gaussian
-#define IMLIB_ENABLE_GAUSSIAN
+// #define IMLIB_ENABLE_GAUSSIAN
// Enable Laplacian
-#define IMLIB_ENABLE_LAPLACIAN
+// #define IMLIB_ENABLE_LAPLACIAN
// Enable bilateral()
-#define IMLIB_ENABLE_BILATERAL
+// #define IMLIB_ENABLE_BILATERAL
// Enable cartoon()
// #define IMLIB_ENABLE_CARTOON
// Enable linpolar()
-#define IMLIB_ENABLE_LINPOLAR
+// #define IMLIB_ENABLE_LINPOLAR
// Enable logpolar()
-#define IMLIB_ENABLE_LOGPOLAR
+// #define IMLIB_ENABLE_LOGPOLAR
// Enable lens_corr()
-#define IMLIB_ENABLE_LENS_CORR
+// #define IMLIB_ENABLE_LENS_CORR
// Enable rotation_corr()
-#define IMLIB_ENABLE_ROTATION_CORR
+// #define IMLIB_ENABLE_ROTATION_CORR
// Enable phasecorrelate()
#if defined(IMLIB_ENABLE_ROTATION_CORR)
-#define IMLIB_ENABLE_FIND_DISPLACEMENT
+// #define IMLIB_ENABLE_FIND_DISPLACEMENT
#endif
// Enable get_similarity()
-#define IMLIB_ENABLE_GET_SIMILARITY
+// #define IMLIB_ENABLE_GET_SIMILARITY
// Enable find_lines()
-#define IMLIB_ENABLE_FIND_LINES
+// #define IMLIB_ENABLE_FIND_LINES
// Enable find_line_segments()
-#define IMLIB_ENABLE_FIND_LINE_SEGMENTS
+// #define IMLIB_ENABLE_FIND_LINE_SEGMENTS
// Enable find_circles()
-#define IMLIB_ENABLE_FIND_CIRCLES
+// #define IMLIB_ENABLE_FIND_CIRCLES
// Enable find_rects()
-#define IMLIB_ENABLE_FIND_RECTS
+// #define IMLIB_ENABLE_FIND_RECTS
// Enable find_qrcodes() (14 KB)
-#define IMLIB_ENABLE_QRCODES
+//#define IMLIB_ENABLE_QRCODES
// Enable find_apriltags() (64 KB)
-#define IMLIB_ENABLE_APRILTAGS
+//#define IMLIB_ENABLE_APRILTAGS
// Enable fine find_apriltags() - (8-way connectivity versus 4-way connectivity)
// #define IMLIB_ENABLE_FINE_APRILTAGS
@@ -110,10 +110,10 @@
// #define IMLIB_ENABLE_HIGH_RES_APRILTAGS
// Enable find_datamatrices() (26 KB)
-#define IMLIB_ENABLE_DATAMATRICES
+//#define IMLIB_ENABLE_DATAMATRICES
// Enable find_barcodes() (42 KB)
-#define IMLIB_ENABLE_BARCODES
+//#define IMLIB_ENABLE_BARCODES
// Enable CMSIS NN
// #if !defined(CUBEAI)
@@ -122,26 +122,26 @@
// Enable Tensor Flow
#if !defined(CUBEAI)
-#define IMLIB_ENABLE_TF
+//#define IMLIB_ENABLE_TF
#endif
// Enable FAST (20+ KBs).
// #define IMLIB_ENABLE_FAST
// Enable find_template()
-#define IMLIB_FIND_TEMPLATE
+// #define IMLIB_FIND_TEMPLATE
// Enable find_lbp()
-#define IMLIB_ENABLE_FIND_LBP
+// #define IMLIB_ENABLE_FIND_LBP
// Enable find_keypoints()
-#define IMLIB_ENABLE_FIND_KEYPOINTS
+// #define IMLIB_ENABLE_FIND_KEYPOINTS
// Enable load, save and match descriptor
-#define IMLIB_ENABLE_DESCRIPTOR
+// #define IMLIB_ENABLE_DESCRIPTOR
// Enable find_hog()
-#define IMLIB_ENABLE_HOG
+// #define IMLIB_ENABLE_HOG
// Enable selective_search()
// #define IMLIB_ENABLE_SELECTIVE_SEARCH
diff --git a/src/omv/boards/OPENMV4/omv_boardconfig.h b/src/omv/boards/OPENMV4/omv_boardconfig.h
index 412de472..f7da2c03 100644
--- a/src/omv/boards/OPENMV4/omv_boardconfig.h
+++ b/src/omv/boards/OPENMV4/omv_boardconfig.h
@@ -150,16 +150,18 @@
// The maximum available fb_alloc memory = FB_ALLOC_SIZE + FB_SIZE - (w*h*bpp).
#define OMV_FFS_MEMORY DTCM // Flash filesystem cache memory
#define OMV_MAIN_MEMORY SRAM1 // data, bss and heap memory
+#define OMV_MAIN_MEMORY2 SRAM5 // my memory
#define OMV_STACK_MEMORY ITCM // stack memory
#define OMV_DMA_MEMORY SRAM2 // DMA buffers memory.
#define OMV_FB_MEMORY AXI_SRAM // Framebuffer, fb_alloc
#define OMV_JPEG_MEMORY SRAM3 // JPEG buffer memory.
#define OMV_VOSPI_MEMORY SRAM4 // VoSPI buffer memory.
-#define OMV_FB_SIZE (400K) // FB memory: header + VGA/GS image
-#define OMV_FB_ALLOC_SIZE (100K) // minimum fb alloc size
+#define OMV_FB_SIZE (100K) // defualt: 400 FB memory: header + VGA/GS image
+#define OMV_FB_ALLOC_SIZE (50K) // default: 100 minimum fb alloc size
#define OMV_STACK_SIZE (64K)
-#define OMV_HEAP_SIZE (236K)
+#define OMV_HEAP_SIZE (136K)
+// #define OMV_HEAP_SIZE (236K)
#define OMV_LINE_BUF_SIZE (3 * 1024) // Image line buffer round(640 * 2BPP * 2 buffers).
#define OMV_MSC_BUF_SIZE (2K) // USB MSC bot data
@@ -175,21 +177,27 @@
#define OMV_DTCM_LENGTH 128K
#define OMV_ITCM_ORIGIN 0x00000000
#define OMV_ITCM_LENGTH 64K
-#define OMV_SRAM1_ORIGIN 0x30000000
-#define OMV_SRAM1_LENGTH 248K
+// #define OMV_SRAM1_ORIGIN 0x30000000
+// #define OMV_SRAM1_LENGTH 248K
+#define OMV_SRAM1_ORIGIN 0x24000000
+#define OMV_SRAM1_LENGTH 512K
#define OMV_SRAM2_ORIGIN 0x3003E000 // 8KB of SRAM1
#define OMV_SRAM2_LENGTH 8K
#define OMV_SRAM3_ORIGIN 0x30040000
#define OMV_SRAM3_LENGTH 32K
#define OMV_SRAM4_ORIGIN 0x38000000
#define OMV_SRAM4_LENGTH 64K
-#define OMV_AXI_SRAM_ORIGIN 0x24000000
-#define OMV_AXI_SRAM_LENGTH 512K
+#define OMV_AXI_SRAM_ORIGIN 0x30000000
+#define OMV_AXI_SRAM_LENGTH 248K
+// #define OMV_AXI_SRAM_ORIGIN 0x24000000
+// #define OMV_AXI_SRAM_LENGTH 512K
+
// Domain 1 DMA buffers region.
#define OMV_DMA_MEMORY_D1 AXI_SRAM
#define OMV_DMA_MEMORY_D1_SIZE (8*1024) // Reserved memory for DMA buffers
-#define OMV_DMA_REGION_D1_BASE (OMV_AXI_SRAM_ORIGIN+(500*1024))
+#define OMV_DMA_REGION_D1_BASE (OMV_AXI_SRAM_ORIGIN+(400*1024))
+// #define OMV_DMA_REGION_D1_BASE (OMV_AXI_SRAM_ORIGIN+(500*1024))
#define OMV_DMA_REGION_D1_SIZE MPU_REGION_SIZE_8KB
// Domain 2 DMA buffers region.
diff --git a/src/omv/modules/examplemodule.c b/src/omv/modules/examplemodule.c
index 37e2b4f4..52d1bda2 100644
--- a/src/omv/modules/examplemodule.c
+++ b/src/omv/modules/examplemodule.c
@@ -1,17 +1,81 @@
// Include MicroPython API.
#include "py/runtime.h"
+#include "genNN.h"
+#include "detectionUtility.h"
+#include <stdio.h>
+#include "py_image.h"
-// This is the function which will be called from Python as cexample.add_ints(a, b).
-STATIC mp_obj_t example_add_ints(mp_obj_t a_obj, mp_obj_t b_obj) {
- // Extract the ints from the micropython input objects.
- int a = mp_obj_get_int(a_obj);
- int b = mp_obj_get_int(b_obj);
+#define TEST_SIZE 1 * 1024
+#define TN_MAX(A,B) ((A) > (B) ? (A) : (B))
+#define TN_MIN(A,B) ((A) < (B) ? (A) : (B))``
+
+// for fc only
+#define ORIGIN_H 128
+#define ORIGIN_W 128
+#define IMAGE_H 128
+#define IMAGE_W 128
- // Calculate the addition and convert to MicroPython object.
- return mp_obj_new_int(a + b);
+uint16_t color;
+float labels[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+// This is the function which will be called from Python as cexample.add_ints(a, b).
+STATIC mp_obj_t example_train_demo_fn(mp_obj_t a, mp_obj_t b) {
+ image_t* img = py_image_cobj(a);
+ // >= 0, for training with the label, -1 is for inference
+ int command = mp_obj_get_int(b);
+ // preprocessing
+ signed char *input = getInput();
+ int i, j;
+ for (j = 0; j < IMAGE_H; j++){
+ for (i = 0; i < IMAGE_W; i++){
+ int index = j + IMAGE_W * i;
+ if (i >= ORIGIN_W || j >= ORIGIN_H){
+ input[index * 3] = (int8_t) 0;
+ input[index * 3 + 1] = (int8_t) 0;
+ input[index * 3 + 2] = (int8_t) 0;
+ continue;
+ }
+ uint16_t color = IMAGE_GET_RGB565_PIXEL(img, i + MAX((ORIGIN_W-IMAGE_W)/2,0),
+ j + MAX((ORIGIN_H-IMAGE_H)/2,0));
+ // uint16_t color = IMAGE_GET_RGB565_PIXEL(img, j, 87 - i);
+ int r, g, b;
+ r = ((color & 0xF800) >> 11) * 8;
+ g = ((color & 0x07E0) >> 5) * 4;
+ b = ((color & 0x001F) >> 0) * 8;
+ input[index * 3] = (int8_t) (r - 128);
+ input[index * 3 + 1] = (int8_t) (g - 128);
+ input[index * 3 + 2] = (int8_t) (b - 128);
+ }
+ }
+ if (command >= 0){
+ labels[0] = 0;
+ labels[1] = 0;
+ labels[command] = 1;
+ invoke(labels);
+ printf("train class %d\n", command);
+ }
+ else{
+ invoke_inf();
+ uint8_t* output = (uint8_t*)getOutput();
+ if(output[0] > output[1]){
+ printf("infer class 0\n");
+ color = 63488;
+ }
+ else{
+ printf("infer class 1\n");
+ color = 2016;
+ }
+ int x_start = 3, y_start = 3;
+ for (i = 0; i < 5; i++){
+ for (j = 0; j < 5; j++){
+ IMAGE_PUT_RGB565_PIXEL(img, i + x_start, j + y_start, color);
+ }
+ }
+ }
+ return mp_obj_new_int(0);
}
+
// Define a Python reference to the function above.
-STATIC MP_DEFINE_CONST_FUN_OBJ_2(example_add_ints_obj, example_add_ints);
+STATIC MP_DEFINE_CONST_FUN_OBJ_2(example_train_demo, example_train_demo_fn);
// Define all properties of the module.
// Table entries are key/value pairs of the attribute name (a string)
@@ -20,7 +84,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_2(example_add_ints_obj, example_add_ints);
// optimized to word-sized integers by the build system (interned strings).
STATIC const mp_rom_map_elem_t example_module_globals_table[] = {
{ MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_cexample) },
- { MP_ROM_QSTR(MP_QSTR_add_ints), MP_ROM_PTR(&example_add_ints_obj) },
+ { MP_ROM_QSTR(MP_QSTR_train_demo), MP_ROM_PTR(&example_train_demo) },
};
STATIC MP_DEFINE_CONST_DICT(example_module_globals, example_module_globals_table);
@@ -33,4 +97,4 @@ const mp_obj_module_t example_user_cmodule = {
// Register the module to make it available in Python.
// Note: This module is disabled, set the thrid argument to 1 to enable it, or
// use a macro like MODULE_CEXAMPLE_ENABLED to conditionally enable this module.
-MP_REGISTER_MODULE(MP_QSTR_cexample, example_user_cmodule, 0);
+MP_REGISTER_MODULE(MP_QSTR_cexample, example_user_cmodule, 1);
diff --git a/src/omv/ports/stm32/omv_portconfig.mk b/src/omv/ports/stm32/omv_portconfig.mk
index 200ffb7d..b3049e25 100644
--- a/src/omv/ports/stm32/omv_portconfig.mk
+++ b/src/omv/ports/stm32/omv_portconfig.mk
@@ -4,7 +4,7 @@ STARTUP ?= st/startup_$(shell echo $(MCU) | tr '[:upper:]' '[:lower:]')
LDSCRIPT ?= stm32fxxx
# Compiler Flags
-CFLAGS += -std=gnu99 -Wall -Werror -Warray-bounds -mthumb -nostartfiles -fdata-sections -ffunction-sections
+CFLAGS += -std=gnu99 -Warray-bounds -mthumb -nostartfiles -fdata-sections -ffunction-sections -lm
CFLAGS += -fno-inline-small-functions -D$(MCU) -D$(CFLAGS_MCU) -D$(ARM_MATH) -DARM_NN_TRUNCATE\
-fsingle-precision-constant -Wdouble-promotion -mcpu=$(CPU) -mtune=$(CPU) -mfpu=$(FPU) -mfloat-abi=hard
CFLAGS += -D__FPU_PRESENT=1 -D__VFP_FP__ -DUSE_USB_FS -DUSE_DEVICE_MODE -DUSE_USB_OTG_ID=0 -DHSE_VALUE=$(OMV_HSE_VALUE)\
@@ -34,6 +34,10 @@ OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/alloc/
OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/common/
OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/imlib/
OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/modules/
+OMV_CFLAGS += -I$(TOP_DIR)/hal/cmsis/include/
+OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/modules/TinyEngine
+OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/modules/TinyEngine/include
+OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/modules/TinyEngine/codegen/Include
OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/sensors/
OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/ports/$(PORT)/
OMV_CFLAGS += -I$(TOP_DIR)/$(OMV_DIR)/ports/$(PORT)/modules/
@@ -213,6 +217,50 @@ FIRM_OBJ += $(addprefix $(BUILD)/$(OMV_DIR)/imlib/, \
zbar.o \
)
+FIRM_OBJ += $(addprefix $(BUILD)/$(OMV_DIR)/modules/TinyEngine/, \
+ codegen/Source/genModel.o \
+ codegen/Source/depthwise_kernel3x3_stride1_inplace_CHW_fpreq.o \
+ codegen/Source/depthwise_kernel3x3_stride2_inplace_CHW_fpreq.o \
+ codegen/Source/depthwise_kernel5x5_stride1_inplace_CHW_fpreq.o \
+ codegen/Source/depthwise_kernel7x7_stride1_inplace_CHW_fpreq.o \
+ codegen/Source/depthwise_kernel7x7_stride2_inplace_CHW_fpreq.o \
+ codegen/Source/depthwise_kernel3x3_stride1_inplace_CHW_fpreq_bitmask.o \
+ codegen/Source/depthwise_kernel3x3_stride2_inplace_CHW_fpreq_bitmask.o \
+ codegen/Source/depthwise_kernel5x5_stride1_inplace_CHW_fpreq_bitmask.o \
+ codegen/Source/depthwise_kernel7x7_stride1_inplace_CHW_fpreq_bitmask.o \
+ codegen/Source/depthwise_kernel7x7_stride2_inplace_CHW_fpreq_bitmask.o \
+ src/kernels/fp_requantize_op/add_fpreq.o \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch8_fpreq.o \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch16_fpreq.o \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch24_fpreq.o \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_ch48_fpreq.o \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_fpreq.o \
+ src/kernels/int_forward_op/avgpooling.o \
+ src/kernels/fp_requantize_op/convolve_s8_kernel3_inputch3_stride2_pad1_fpreq.o \
+ src/kernels/fp_requantize_op/mat_mul_kernels_fpreq.o \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_fpreq_mask.o \
+ src/kernels/fp_requantize_op/convolve_1x1_s8_fpreq_mask_partialCH.o \
+ src/kernels/fp_backward_op/sum_4D_exclude_fp.o \
+ src/kernels/fp_backward_op/where_fp.o \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel3_stride1_inpad1_outpad0.o \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel3_stride2_inpad1_outpad1.o \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel5_stride1_inpad2_outpad0.o \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel5_stride2_inpad2_outpad1.o \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel7_stride1_inpad3_outpad0.o \
+ src/kernels/fp_backward_op/transpose_depthwise_conv_fp_kernel7_stride2_inpad3_outpad1.o \
+ src/kernels/fp_backward_op/tte_exp_fp.o \
+ src/kernels/fp_backward_op/sub_fp.o \
+ src/kernels/fp_backward_op/mul_fp.o \
+ src/kernels/fp_backward_op/pointwise_conv_fp.o \
+ src/kernels/fp_backward_op/group_pointwise_conv_fp.o \
+ src/kernels/fp_backward_op/group_conv_fp_kernel4_stride1_pad0.o \
+ src/kernels/fp_backward_op/group_conv_fp_kernel8_stride1_pad0.o \
+ src/kernels/fp_backward_op/strided_slice_4Dto4D_fp.o \
+ src/kernels/fp_backward_op/sum_3D_fp.o \
+ src/kernels/fp_backward_op/nll_loss_fp.o \
+ src/kernels/fp_backward_op/log_softmax_fp.o \
+ )
+
FIRM_OBJ += $(wildcard $(BUILD)/$(OMV_DIR)/ports/$(PORT)/*.o)
FIRM_OBJ += $(wildcard $(BUILD)/$(MICROPY_DIR)/modules/*.o)
FIRM_OBJ += $(wildcard $(BUILD)/$(MICROPY_DIR)/ports/$(PORT)/modules/*.o)
@@ -625,7 +673,7 @@ endif
# This target generates the main/app firmware image located at 0x08010000
$(FIRMWARE): FIRMWARE_OBJS
$(CPP) -P -E -I$(OMV_BOARD_CONFIG_DIR) $(OMV_DIR)/ports/$(PORT)/$(LDSCRIPT).ld.S > $(BUILD)/$(LDSCRIPT).lds
- $(CC) $(LDFLAGS) $(FIRM_OBJ) -o $(FW_DIR)/$(FIRMWARE).elf $(LIBS) -lgcc
+ $(CC) $(LDFLAGS) $(FIRM_OBJ) -o $(FW_DIR)/$(FIRMWARE).elf $(LIBS) -lgcc -lm
$(OBJCOPY) -Obinary -R .big_const* $(FW_DIR)/$(FIRMWARE).elf $(FW_DIR)/$(FIRMWARE).bin
$(PYTHON) $(MKDFU) -D $(DFU_DEVICE) -b $(MAIN_APP_ADDR):$(FW_DIR)/$(FIRMWARE).bin $(FW_DIR)/$(FIRMWARE).dfu
@@ -633,7 +681,7 @@ ifeq ($(OMV_ENABLE_BL), 1)
# This target generates the bootloader.
$(BOOTLOADER): FIRMWARE_OBJS BOOTLOADER_OBJS
$(CPP) -P -E -I$(OMV_BOARD_CONFIG_DIR) $(BOOTLDR_DIR)/stm32fxxx.ld.S > $(BUILD)/$(BOOTLDR_DIR)/stm32fxxx.lds
- $(CC) $(BL_LDFLAGS) $(BOOT_OBJ) -o $(FW_DIR)/$(BOOTLOADER).elf -lgcc
+ $(CC) $(BL_LDFLAGS) $(BOOT_OBJ) -o $(FW_DIR)/$(BOOTLOADER).elf -lgcc -lm
$(OBJCOPY) -Obinary $(FW_DIR)/$(BOOTLOADER).elf $(FW_DIR)/$(BOOTLOADER).bin
$(PYTHON) $(MKDFU) -D $(DFU_DEVICE) -b 0x08000000:$(FW_DIR)/$(BOOTLOADER).bin $(FW_DIR)/$(BOOTLOADER).dfu
endif