-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmemlib.c
99 lines (80 loc) · 2.47 KB
/
memlib.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#include "memlib.h"
#include <string.h>
#include <stdbool.h>
#define MALLOC_ALIGNMENT (8)
#define OPT_STRIDED_MEMCPY_MAX_STRIDE (128) // should be multiple of target vlen
#ifdef __TINYC__
#define __builtin_unreachable() /**/
#endif
static void strided_memcpy_same_stride(char* dest,
const char *src,
size_t numEl,
size_t elSize,
size_t stride) {
if (stride > OPT_STRIDED_MEMCPY_MAX_STRIDE) {
__builtin_unreachable();
}
// no overlap
if (dest >= src && dest <= src + numEl * stride) {
__builtin_unreachable();
}
if (elSize > stride) {
__builtin_unreachable();
}
if (elSize == 0) {
__builtin_unreachable();
}
bool mask[OPT_STRIDED_MEMCPY_MAX_STRIDE];
{
size_t i = 0;
for (; i < elSize; i ++) {
mask[i] = true;
}
for (; i < OPT_STRIDED_MEMCPY_MAX_STRIDE; i ++) {
mask[i] = false;
}
}
for (size_t i = 0; i < numEl * stride; i++) {
if (mask[i % OPT_STRIDED_MEMCPY_MAX_STRIDE]) {
dest[i] = src[i];
}
}
}
void strided_memcpy(void* dest, size_t dest_stride,
const void *src, size_t src_stride,
size_t numEl,
size_t elSize) {
// no overlap
if (dest >= src && dest < src + numEl * src_stride) {
__builtin_unreachable();
}
if (elSize > dest_stride) {
__builtin_unreachable();
}
if (elSize > src_stride) {
__builtin_unreachable();
}
if (elSize == 0) {
__builtin_unreachable();
}
if (dest_stride - elSize > OPT_STRIDED_MEMCPY_MAX_STRIDE || src_stride - elSize > OPT_STRIDED_MEMCPY_MAX_STRIDE) {
// other methods are too slow because really sparse data
for (size_t i = 0; i < numEl; i ++) {
memcpy(&dest[dest_stride * i], &src[src_stride * i], elSize);
}
}
else if (dest_stride == src_stride) {
if (dest_stride == elSize) {
memcpy(dest, src, numEl * elSize);
}
else {
strided_memcpy_same_stride(dest, src, numEl, elSize, dest_stride);
}
}
else {
// TODO: implement new variant with scatter & gather
for (size_t i = 0; i < numEl; i ++) {
memcpy(&dest[dest_stride * i], &src[src_stride * i], elSize);
}
}
}