Skip to content

Commit

Permalink
Merge pull request #4597 from martin-frbg/issue4581
Browse files Browse the repository at this point in the history
Add support for the ARM Cortex-A76 cpu
  • Loading branch information
martin-frbg authored Apr 3, 2024
2 parents 5865973 + 584e876 commit a87713f
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 3 deletions.
7 changes: 7 additions & 0 deletions Makefile.arm64
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,13 @@ FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
endif
endif

ifeq ($(CORE), CORTEXA76)
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a76
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a76
endif
endif

ifeq ($(CORE), FT2000)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
- **Cortex A57**: Optimized Level-3 and Level-2 functions
- **Cortex A72**: same as A57 ( different cpu specifications)
- **Cortex A73**: same as A57 (different cpu specifications)
- **Cortex A76**: same as A57 (different cpu specifications)
- **Falkor**: same as A57 (different cpu specifications)
- **ThunderX**: Optimized some Level-1 functions
- **ThunderX2T99**: Optimized Level-3 BLAS and parts of Levels 1 and 2
Expand Down
1 change: 1 addition & 0 deletions TargetList.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ CORTEXA53
CORTEXA57
CORTEXA72
CORTEXA73
CORTEXA76
CORTEXA510
CORTEXA710
CORTEXX1
Expand Down
2 changes: 1 addition & 1 deletion cmake/prebuild.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,7 @@ endif ()
set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "CORTEXA72" OR "${TCORE}" STREQUAL "CORTEXA73")
elseif ("${TCORE}" STREQUAL "CORTEXA72" OR "${TCORE}" STREQUAL "CORTEXA73" OR "${TCORE}" STREQUAL "CORTEXA76")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_CODE_SIZE\t49152\n"
"#define L1_CODE_LINESIZE\t64\n"
Expand Down
10 changes: 8 additions & 2 deletions cpuid_arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ size_t length64=sizeof(value64);
#define CPU_CORTEXA57 3
#define CPU_CORTEXA72 4
#define CPU_CORTEXA73 5
#define CPU_CORTEXA76 23
#define CPU_NEOVERSEN1 11
#define CPU_NEOVERSEV1 16
#define CPU_NEOVERSEN2 17
Expand Down Expand Up @@ -89,7 +90,8 @@ static char *cpuname[] = {
"CORTEXX2",
"CORTEXA510",
"CORTEXA710",
"FT2000"
"FT2000",
"CORTEXA76"
};

static char *cpuname_lower[] = {
Expand All @@ -115,7 +117,8 @@ static char *cpuname_lower[] = {
"cortexx2",
"cortexa510",
"cortexa710",
"ft2000"
"ft2000",
"cortexa76"
};

int get_feature(char *search)
Expand Down Expand Up @@ -210,6 +213,8 @@ int detect(void)
return CPU_CORTEXX2;
else if (strstr(cpu_part, "0xd4e")) //X3
return CPU_CORTEXX2;
else if (strstr(cpu_part, "0xd0b"))
return CPU_CORTEXA76;
}
// Qualcomm
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
Expand Down Expand Up @@ -391,6 +396,7 @@ void get_cpuconfig(void)
break;

case CPU_NEOVERSEV1:
case CPU_CORTEXA76:
printf("#define %s\n", cpuname[d]);
printf("#define L1_CODE_SIZE 65536\n");
printf("#define L1_CODE_LINESIZE 64\n");
Expand Down
15 changes: 15 additions & 0 deletions getarch.c
Original file line number Diff line number Diff line change
Expand Up @@ -1331,6 +1331,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "CORTEXA73"
#endif

#ifdef FORCE_CORTEXA76
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "CORTEXA76"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DCORTEXA76 " \
"-DL1_CODE_SIZE=49152 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa76"
#define CORENAME "CORTEXA76"
#endif

#ifdef FORCE_CORTEXX1
#define FORCE
#define ARCHITECTURE "ARM64"
Expand Down
3 changes: 3 additions & 0 deletions kernel/arm64/KERNEL.CORTEXA76
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
include $(KERNELDIR)/KERNEL.CORTEXA57


35 changes: 35 additions & 0 deletions param.h
Original file line number Diff line number Diff line change
Expand Up @@ -3351,6 +3351,41 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 2048

#elif defined(CORTEXA76)

#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4

#define DGEMM_DEFAULT_UNROLL_M 8
#define DGEMM_DEFAULT_UNROLL_N 4

#define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_N 4

#define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4

#if defined(XDOUBLE) || defined(DOUBLE)
#define SWITCH_RATIO 8
#else
#define SWITCH_RATIO 16
#endif

#define SGEMM_DEFAULT_P 256
#define DGEMM_DEFAULT_P 128
#define CGEMM_DEFAULT_P 128
#define ZGEMM_DEFAULT_P 64

#define SGEMM_DEFAULT_Q 512
#define DGEMM_DEFAULT_Q 256
#define CGEMM_DEFAULT_Q 256
#define ZGEMM_DEFAULT_Q 256

#define SGEMM_DEFAULT_R 4096
#define DGEMM_DEFAULT_R 4096
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096

#elif defined(CORTEXA53) || defined(CORTEXA55)

#define SGEMM_DEFAULT_UNROLL_M 8
Expand Down

0 comments on commit a87713f

Please sign in to comment.