From 3b783c950b72f64a3e56479f074c7ff3f6e3325f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20Kl=C3=B6tzl?= Date: Wed, 27 Dec 2023 15:18:59 +0100 Subject: [PATCH] src: add a macro to create a FMV resolver --- src/dna4_count_mismatches_rc_x86.c | 69 ++++------------------- src/dna4_fill_random_x86.c | 58 +------------------ src/dna4_revcomp_x86.c | 59 +------------------- src/dna_internal.h | 89 ++++++++++++++++++++++++++++++ src/dnax_count_mismatches_x86.c | 67 +++------------------- src/dnax_extract_dna4_x86.c | 67 ++++------------------ 6 files changed, 123 insertions(+), 286 deletions(-) diff --git a/src/dna4_count_mismatches_rc_x86.c b/src/dna4_count_mismatches_rc_x86.c index 7324286..a321e9a 100644 --- a/src/dna4_count_mismatches_rc_x86.c +++ b/src/dna4_count_mismatches_rc_x86.c @@ -1,6 +1,6 @@ /** * SPDX-License-Identifier: MIT - * Copyright 2021 - 2022 (C) Fabian Klötzl + * Copyright 2021 - 2023 (C) Fabian Klötzl */ #include "config.h" @@ -27,61 +27,12 @@ dna4_count_mismatches_rc_select(void) } } -#if CAN_IFUNC && __has_attribute(ifunc) - -DNA_PUBLIC -size_t -dna4_count_mismatches_rc(const char *begin, const char *end, const char *other) - __attribute__((ifunc("dna4_count_mismatches_rc_select"))); - -#elif defined(__APPLE__) && 0 - -void * -dna4_count_mismatches_rc_macho(void) __asm__("_dna4_count_mismatches_rc"); - -DNA_PUBLIC -void * -dna4_count_mismatches_rc_macho(void) -{ - __asm__(".symbol_resolver _dna4_count_mismatches_rc"); - return (void *)dna4_count_mismatches_rc_select(); -} - -#else - -// If ifunc is unavailable (for instance on macOS or hurd) we have to implement -// the functionality ourselves. Using a function pointer is faster than a -// boolean variable. - -size_t -dna4_count_mismatches_rc_callonce( - const char *begin, const char *end, const char *other); - -static dna4_count_mismatches_rc_fn *dna4_count_mismatches_rc_fnptr = - dna4_count_mismatches_rc_callonce; - -DNA_LOCAL -size_t -dna4_count_mismatches_rc_callonce( - const char *begin, const char *end, const char *other) -{ - dna4_count_mismatches_rc_fnptr = dna4_count_mismatches_rc_select(); - return dna4_count_mismatches_rc_fnptr(begin, end, other); -} - -DNA_PUBLIC -size_t -dna4_count_mismatches_rc(const char *begin, const char *end, const char *other) -{ - return dna4_count_mismatches_rc_fnptr(begin, end, other); -} - -DNA_LOCAL -DNA_CONSTRUCTOR -void -dna4_count_mismatches_rc_init(void) -{ - dna4_count_mismatches_rc_fnptr = dna4_count_mismatches_rc_select(); -} - -#endif +RESOLVER( + size_t, + dna4_count_mismatches_rc, + const char *, + begin, + const char *, + end, + const char *, + other) diff --git a/src/dna4_fill_random_x86.c b/src/dna4_fill_random_x86.c index 09ec845..006b107 100644 --- a/src/dna4_fill_random_x86.c +++ b/src/dna4_fill_random_x86.c @@ -1,6 +1,6 @@ /** * SPDX-License-Identifier: MIT - * Copyright 2022 (C) Fabian Klötzl + * Copyright 2022 - 2023 (C) Fabian Klötzl */ #include "config.h" @@ -27,58 +27,4 @@ dna4_fill_random_select(void) } } -#if CAN_IFUNC && __has_attribute(ifunc) - -DNA_PUBLIC -void -dna4_fill_random(char *dest, char *end, uint32_t seed) - __attribute__((ifunc("dna4_fill_random_select"))); - -#elif defined(__APPLE__) && 0 - -void * -dna4_fill_random_macho(void) __asm__("_dna4_fill_random"); - -DNA_PUBLIC -void * -dna4_fill_random_macho(void) -{ - __asm__(".symbol_resolver _dna4_fill_random"); - return (void *)dna4_fill_random_select(); -} - -#else - -// If ifunc is unavailable (for instance on macOS or hurd) we have to implement -// the functionality ourselves. Using a function pointer is faster than a -// boolean variable. - -void -dna4_fill_random_callonce(char *dest, char *end, uint32_t seed); - -static dna4_fill_random_fn *dna4_fill_random_fnptr = dna4_fill_random_callonce; - -DNA_LOCAL -void -dna4_fill_random_callonce(char *dest, char *end, uint32_t seed) -{ - dna4_fill_random_fnptr = dna4_fill_random_select(); - dna4_fill_random_fnptr(dest, end, seed); -} - -DNA_PUBLIC -void -dna4_fill_random(char *dest, char *end, uint32_t seed) -{ - dna4_fill_random_fnptr(dest, end, seed); -} - -DNA_LOCAL -DNA_CONSTRUCTOR -void -dna4_fill_random_init(void) -{ - dna4_fill_random_fnptr = dna4_fill_random_select(); -} - -#endif +RESOLVER_VOID(void, dna4_fill_random, char *, dest, char *, end, uint32_t, seed) diff --git a/src/dna4_revcomp_x86.c b/src/dna4_revcomp_x86.c index 4822145..c9f2350 100644 --- a/src/dna4_revcomp_x86.c +++ b/src/dna4_revcomp_x86.c @@ -1,6 +1,6 @@ /** * SPDX-License-Identifier: MIT - * Copyright 2018 - 2022 (C) Fabian Klötzl + * Copyright 2018 - 2023 (C) Fabian Klötzl */ #include "config.h" @@ -27,58 +27,5 @@ dna4_revcomp_select(void) } } -#if CAN_IFUNC && __has_attribute(ifunc) - -DNA_PUBLIC -char * -dna4_revcomp(const char *begin, const char *end, char *dest) - __attribute__((ifunc("dna4_revcomp_select"))); - -#elif defined(__APPLE__) && 0 - -void * -dna4_revcomp_macho(void) __asm__("_dna4_revcomp"); - -DNA_PUBLIC -void * -dna4_revcomp_macho(void) -{ - __asm__(".symbol_resolver _dna4_revcomp"); - return (void *)dna4_revcomp_select(); -} - -#else - -// If ifunc is unavailable (for instance on macOS or hurd) we have to implement -// the functionality ourselves. Using a function pointer is faster than a -// boolean variable. - -char * -dna4_revcomp_callonce(const char *begin, const char *end, char *dest); - -static dna4_revcomp_fn *dna4_revcomp_fnptr = dna4_revcomp_callonce; - -DNA_LOCAL -char * -dna4_revcomp_callonce(const char *begin, const char *end, char *dest) -{ - dna4_revcomp_fnptr = dna4_revcomp_select(); - return dna4_revcomp_fnptr(begin, end, dest); -} - -DNA_PUBLIC -char * -dna4_revcomp(const char *begin, const char *end, char *dest) -{ - return dna4_revcomp_fnptr(begin, end, dest); -} - -DNA_LOCAL -DNA_CONSTRUCTOR -void -dna4_revcomp_init(void) -{ - dna4_revcomp_fnptr = dna4_revcomp_select(); -} - -#endif +RESOLVER( + char *, dna4_revcomp, const char *, begin, const char *, end, char *, dest) diff --git a/src/dna_internal.h b/src/dna_internal.h index cb0e682..2e82ae3 100644 --- a/src/dna_internal.h +++ b/src/dna_internal.h @@ -81,6 +81,95 @@ dna4_fill_random_fn dna4_fill_random_avx2; dna4_fill_random_fn dna4_fill_random_sse42; dna4_fill_random_fn dna4_fill_random_generic; +// Macros that help with defining a runtime resolver for function multi +// versioning. + +#define STRINGIFY(s) #s +#define GET_1(_1, ...) _1 +#define GET_2(_1, _2, ...) _2 +#define GET_3(_1, _2, _3, ...) _3 +#define GET_4(_1, _2, _3, _4, ...) _4 +#define GET_5(_1, _2, _3, _4, _5, ...) _5 +#define GET_6(_1, _2, _3, _4, _5, _6) _6 + +#define ARG_1_TYPE(...) GET_1(__VA_ARGS__) +#define ARG_1_NAME(...) GET_2(__VA_ARGS__) +#define ARG_2_TYPE(...) GET_3(__VA_ARGS__) +#define ARG_2_NAME(...) GET_4(__VA_ARGS__) +#define ARG_3_TYPE(...) GET_5(__VA_ARGS__) +#define ARG_3_NAME(...) GET_6(__VA_ARGS__) + +#define ARG_1(...) ARG_1_TYPE(__VA_ARGS__) ARG_1_NAME(__VA_ARGS__) +#define ARG_2(...) ARG_2_TYPE(__VA_ARGS__) ARG_2_NAME(__VA_ARGS__) +#define ARG_3(...) ARG_3_TYPE(__VA_ARGS__) ARG_3_NAME(__VA_ARGS__) + +#if CAN_IFUNC && __has_attribute(ifunc) + +// On Linux there is the special "ifunc" syntax we can use to pick the optimal +// implementation at runtime. + +#define RESOLVER(RETTYPE, NAME, ...) \ + DNA_PUBLIC \ + void NAME(ARG_1(__VA_ARGS__), ARG_2(__VA_ARGS__), ARG_3(__VA_ARGS__)) \ + __attribute__((ifunc(STRINGIFY(NAME) "_select"))); + +#define RESOLVER_VOID(RETTYPE, NAME, ...) \ + RESOLVER(RETTYPE, NAME, __VA_ARGS__) + +#else + +// If ifunc is unavailable (for instance on macOS or hurd) we have to implement +// the functionality ourselves. To this end we define a function pointer that +// points to the optimal implementation. Said pointer is initiallised to a +// "XXX_callonce" function which, when called, picks the best implementation, +// sets the pointer, and finally forwards the arguments to the actual function. +// This overhead on the first function call can be avoided if the system +// supports __attribute__((constructor)). Then the optimal implementation will +// be picked when the program is first loaded. + +#define RESOLVER_META(STATEMENT, RETTYPE, NAME, ...) \ + \ + RETTYPE NAME##_callonce( \ + ARG_1(__VA_ARGS__), ARG_2(__VA_ARGS__), ARG_3(__VA_ARGS__)); \ + \ + static NAME##_fn *NAME##_fnptr = NAME##_callonce; \ + \ + DNA_LOCAL \ + RETTYPE NAME##_callonce( \ + ARG_1(__VA_ARGS__), ARG_2(__VA_ARGS__), ARG_3(__VA_ARGS__)) \ + { \ + NAME##_fnptr = NAME##_select(); \ + STATEMENT NAME##_fnptr( \ + ARG_1_NAME(__VA_ARGS__), ARG_2_NAME(__VA_ARGS__), \ + ARG_3_NAME(__VA_ARGS__)); \ + } \ + \ + DNA_PUBLIC \ + RETTYPE NAME(ARG_1(__VA_ARGS__), ARG_2(__VA_ARGS__), ARG_3(__VA_ARGS__)) \ + { \ + STATEMENT NAME##_fnptr( \ + ARG_1_NAME(__VA_ARGS__), ARG_2_NAME(__VA_ARGS__), \ + ARG_3_NAME(__VA_ARGS__)); \ + } \ + \ + DNA_LOCAL \ + DNA_CONSTRUCTOR \ + void NAME##_init(void) \ + { \ + NAME##_fnptr = NAME##_select(); \ + } + +// In C it isn't allowed to have a return statement with a void expression in a +// void function. Thus, we need an extra overload that removes the return +// statement from the above code. In C++ this wouldn't be a problem. + +#define RESOLVER_VOID(RETTYPE, NAME, ...) \ + RESOLVER_META((void), RETTYPE, NAME, __VA_ARGS__) +#define RESOLVER(RETTYPE, NAME, ...) \ + RESOLVER_META(return, RETTYPE, NAME, __VA_ARGS__) + +#endif + #ifdef __cplusplus } #endif diff --git a/src/dnax_count_mismatches_x86.c b/src/dnax_count_mismatches_x86.c index d5af134..8848406 100644 --- a/src/dnax_count_mismatches_x86.c +++ b/src/dnax_count_mismatches_x86.c @@ -30,61 +30,12 @@ dnax_count_mismatches_select(void) } } -#if CAN_IFUNC && __has_attribute(ifunc) - -DNA_PUBLIC -size_t -dnax_count_mismatches(const char *begin, const char *end, const char *other) - __attribute__((ifunc("dnax_count_mismatches_select"))); - -#elif defined(__APPLE__) && 0 - -void * -dnax_count_mismatches_macho(void) __asm__("_dnax_count_mismatches"); - -DNA_PUBLIC -void * -dnax_count_mismatches_macho(void) -{ - __asm__(".symbol_resolver _dnax_count_mismatches"); - return (void *)dnax_count_mismatches_select(); -} - -#else - -// If ifunc is unavailable (for instance on macOS or hurd) we have to implement -// the functionality ourselves. Using a function pointer is faster than a -// boolean variable. - -size_t -dnax_count_mismatches_callonce( - const char *begin, const char *end, const char *other); - -static dnax_count_mismatches_fn *dnax_count_mismatches_fnptr = - dnax_count_mismatches_callonce; - -DNA_LOCAL -size_t -dnax_count_mismatches_callonce( - const char *begin, const char *end, const char *other) -{ - dnax_count_mismatches_fnptr = dnax_count_mismatches_select(); - return dnax_count_mismatches_fnptr(begin, end, other); -} - -DNA_PUBLIC -size_t -dnax_count_mismatches(const char *begin, const char *end, const char *other) -{ - return dnax_count_mismatches_fnptr(begin, end, other); -} - -DNA_LOCAL -DNA_CONSTRUCTOR -void -dnax_count_mismatches_init(void) -{ - dnax_count_mismatches_fnptr = dnax_count_mismatches_select(); -} - -#endif +RESOLVER( + size_t, + dnax_count_mismatches, + const char *, + begin, + const char *, + end, + const char *, + other) diff --git a/src/dnax_extract_dna4_x86.c b/src/dnax_extract_dna4_x86.c index c9ba045..2d65339 100644 --- a/src/dnax_extract_dna4_x86.c +++ b/src/dnax_extract_dna4_x86.c @@ -1,6 +1,6 @@ /** * SPDX-License-Identifier: MIT - * Copyright 2018 - 2022 (C) Fabian Klötzl + * Copyright 2018 - 2023 (C) Fabian Klötzl */ #include "config.h" @@ -25,59 +25,12 @@ dnax_extract_dna4_select(void) } } -#if CAN_IFUNC && __has_attribute(ifunc) - -DNA_PUBLIC -char * -dnax_extract_dna4(const char *begin, const char *end, char *dest) - __attribute__((ifunc("dnax_extract_dna4_select"))); - -#elif defined(__APPLE__) && 0 - -void * -dnax_extract_dna4_macho(void) __asm__("_dnax_extract_dna4"); - -DNA_PUBLIC -void * -dnax_extract_dna4_macho(void) -{ - __asm__(".symbol_resolver _dnax_extract_dna4"); - return (void *)dnax_extract_dna4_select(); -} - -#else - -// If ifunc is unavailable (for instance on macOS or hurd) we have to implement -// the functionality ourselves. Using a function pointer is faster than a -// boolean variable. - -char * -dnax_extract_dna4_callonce(const char *begin, const char *end, char *dest); - -static dnax_extract_dna4_fn *dnax_extract_dna4_fnptr = - dnax_extract_dna4_callonce; - -DNA_LOCAL -char * -dnax_extract_dna4_callonce(const char *begin, const char *end, char *dest) -{ - dnax_extract_dna4_fnptr = dnax_extract_dna4_select(); - return dnax_extract_dna4_fnptr(begin, end, dest); -} - -DNA_PUBLIC -char * -dnax_extract_dna4(const char *begin, const char *end, char *dest) -{ - return dnax_extract_dna4_fnptr(begin, end, dest); -} - -DNA_LOCAL -DNA_CONSTRUCTOR -void -dnax_extract_dna4_init(void) -{ - dnax_extract_dna4_fnptr = dnax_extract_dna4_select(); -} - -#endif +RESOLVER( + char *, + dnax_extract_dna4, + const char *, + begin, + const char *, + end, + char *, + dest)