From 9f0fb6e66222f34e5cb4d4c78bceb60c1c38bc6f Mon Sep 17 00:00:00 2001 From: Sebastien Fabbro Date: Wed, 24 Jul 2013 09:37:16 -0700 Subject: [PATCH 01/13] Respect user's LDFLAGS --- Makefile.generic | 5 ----- Makefile.power | 11 ----------- Makefile.sparc | 1 - Makefile.x86 | 3 --- Makefile.x86_64 | 13 ------------- ctest/Makefile | 2 +- exports/Makefile | 20 ++++++++++---------- test/Makefile | 2 +- 8 files changed, 12 insertions(+), 45 deletions(-) diff --git a/Makefile.generic b/Makefile.generic index 770aaf850d..a5e50b1f0e 100644 --- a/Makefile.generic +++ b/Makefile.generic @@ -1,6 +1 @@ COPT = -Wall -O2 # -DGEMMTEST -ifdef BINARY64 -else -# LDFLAGS = -m elf32ppc -LDFLAGS = -m elf_i386 -endif diff --git a/Makefile.power b/Makefile.power index 35eb2cb7b1..c6d6aeb504 100644 --- a/Makefile.power +++ b/Makefile.power @@ -17,13 +17,7 @@ endif endif ifdef BINARY64 -ifeq ($(OSNAME), Linux) -LDFLAGS = -m elf64ppc -endif -ifeq ($(OSNAME), Darwin) -LDFLAGS = -arch ppc64 -endif ifeq ($(OSNAME), AIX) CCOMMON_OPT += -mpowerpc64 -maix64 @@ -34,17 +28,12 @@ ifeq ($(COMPILER_F77), xlf) FCOMMON_OPT += -q64 endif ARFLAGS = -X 64 -LDFLAGS = -b64 ASFLAGS = -a64 endif else -ifeq ($(OSNAME), Linux) -LDFLAGS = -m elf32ppc -endif ifeq ($(OSNAME), AIX) CCOMMON_OPT += -Wa,-a32 ARFLAGS = -X 32 -LDFLAGS = -b32 ASFLAGS = -a32 endif endif diff --git a/Makefile.sparc b/Makefile.sparc index c2b878e73d..c58c77e1a9 100644 --- a/Makefile.sparc +++ b/Makefile.sparc @@ -10,7 +10,6 @@ endif ifeq ($(COMPILER_F77), f90) FCOMMON_OPT += -xarch=v9 endif -LDFLAGS = -64 else CCOMMON_OPT += -mcpu=v9 diff --git a/Makefile.x86 b/Makefile.x86 index 94ca7c4a7f..cd7cc9f90f 100644 --- a/Makefile.x86 +++ b/Makefile.x86 @@ -1,8 +1,5 @@ # COMPILER_PREFIX = mingw32- -ifeq ($(OSNAME), Linux) -LDFLAGS = -melf_i386 -endif ifeq ($(OSNAME), Interix) ARFLAGS = -m x86 diff --git a/Makefile.x86_64 b/Makefile.x86_64 index b939e5459b..c8d4b237bc 100644 --- a/Makefile.x86_64 +++ b/Makefile.x86_64 @@ -2,25 +2,12 @@ ifeq ($(OSNAME), SunOS) ifdef BINARY64 -LDFLAGS = -64 ifeq ($(F_COMPILER), SUN) FCOMMON_OPT += -m64 endif endif endif -ifeq ($(OSNAME), FreeBSD) -LDFLAGS = -m elf_x86_64_fbsd -endif - -ifeq ($(OSNAME), Linux) -LDFLAGS = -m elf_x86_64 -endif - -ifeq ($(OSNAME), Darwin) -LDFLAGS = -endif - ifeq ($(OSNAME), Interix) ARFLAGS = -m x64 endif diff --git a/ctest/Makefile b/ctest/Makefile index b1295640f3..0991168958 100644 --- a/ctest/Makefile +++ b/ctest/Makefile @@ -77,7 +77,7 @@ endif clean :: rm -f x* -FLDFLAGS = $(FFLAGS:-fPIC=) +FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS) CEXTRALIB = # Single real diff --git a/exports/Makefile b/exports/Makefile index 6502d5d01f..1e1837a11e 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -89,7 +89,7 @@ else endif libgoto2_shared.dll : ../$(LIBNAME) libgoto2_shared.def - $(CC) $(CFLAGS) libgoto2_shared.def -shared -o $(@F) \ + $(CC) $(LDFLAGS) libgoto2_shared.def -shared -o $(@F) \ -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ -Wl,--out-implib,libgoto2_shared.lib $(FEXTRALIB) @@ -116,14 +116,14 @@ ifeq ($(OSNAME), Linux) so : ../$(LIBSONAME) ../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c - $(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \ + $(CC) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ -Wl,--retain-symbols-file=linux.def -Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB) ifneq ($(C_COMPILER), LSB) - $(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. + $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. else #Use FC on LSB - $(FC) $(FFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. + $(FC) $(FFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. endif rm -f linktest @@ -135,10 +135,10 @@ ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD NetBSD)) so : ../$(LIBSONAME) ../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c - $(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \ + $(CC) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ -Wl,--retain-symbols-file=linux.def $(FEXTRALIB) $(EXTRALIB) - $(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. + $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. rm -f linktest endif @@ -148,15 +148,15 @@ ifeq ($(OSNAME), OSF1) so : ../$(LIBSONAME) ../$(LIBSONAME) : - $(CC) -shared -o ../$(LIBSONAME) ../$(LIBNAME) + $(CC) $(LDFLAGS) -shared -o ../$(LIBSONAME) ../$(LIBNAME) endif ifeq ($(OSNAME), SunOS) so : ../$(LIBSONAME) - $(CC) $(CFLAGS) -shared -o ../$(LIBSONAME) \ + $(CC) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(EXTRALIB) - $(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. + $(CC) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. rm -f linktest endif @@ -199,7 +199,7 @@ symbol.S : gensymbol perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > symbol.S test : linktest.c - $(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. + $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. rm -f linktest linktest.c : gensymbol ../Makefile.system ../getarch.c diff --git a/test/Makefile b/test/Makefile index 2df499b118..0bc06e85f5 100644 --- a/test/Makefile +++ b/test/Makefile @@ -88,7 +88,7 @@ else endif endif -FLDFLAGS = $(FFLAGS:-fPIC=) +FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS) CEXTRALIB = From 4471c77905f8dae39c6b5569a724f1368293c646 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Fri, 26 Jul 2013 23:43:54 +0800 Subject: [PATCH 02/13] Fixed #261. Use strncmp instead of a comparing trick. --- driver/others/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/driver/others/init.c b/driver/others/init.c index 4efc2816a3..5da71cec1e 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -83,6 +83,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #define MAX_NODES 16 #define MAX_CPUS 256 @@ -315,7 +316,7 @@ static int numa_check(void) { } while ((dir = readdir(dp)) != NULL) { - if (*(unsigned int *) dir -> d_name == 0x065646f6eU) { + if (strncmp(dir->d_name, "node", 4)==0) { node = atoi(&dir -> d_name[4]); From 23186d9f218d767984fdbad747c3eaf44ac5a0ef Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Sat, 27 Jul 2013 22:37:57 +0800 Subject: [PATCH 03/13] Fixed the FMA3 detection bug. --- cpuid_x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpuid_x86.c b/cpuid_x86.c index 98af9d0417..9e850a2aa9 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -229,8 +229,8 @@ int get_cputype(int gettype){ if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2; #ifndef NO_AVX if (support_avx()) feature |= HAVE_AVX; + if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3; #endif - if ((ecx & (1 << 20)) != 0) feature |= HAVE_FMA3; if (have_excpuid() >= 0x01) { cpuid(0x80000001, &eax, &ebx, &ecx, &edx); From 1e1250b7031bcb975366f8dc7485402909e9972e Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Sat, 27 Jul 2013 23:01:36 +0800 Subject: [PATCH 04/13] Fixed #260. Fixed generating 32-bit shared library on previous commit. --- exports/Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/exports/Makefile b/exports/Makefile index 1e1837a11e..9fd93dd93e 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -89,7 +89,7 @@ else endif libgoto2_shared.dll : ../$(LIBNAME) libgoto2_shared.def - $(CC) $(LDFLAGS) libgoto2_shared.def -shared -o $(@F) \ + $(CC) $(CFLAGS) $(LDFLAGS) libgoto2_shared.def -shared -o $(@F) \ -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ -Wl,--out-implib,libgoto2_shared.lib $(FEXTRALIB) @@ -116,7 +116,7 @@ ifeq ($(OSNAME), Linux) so : ../$(LIBSONAME) ../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c - $(CC) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ + $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ -Wl,--retain-symbols-file=linux.def -Wl,-soname,$(LIBPREFIX).so.$(MAJOR_VERSION) $(EXTRALIB) ifneq ($(C_COMPILER), LSB) @@ -135,7 +135,7 @@ ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD NetBSD)) so : ../$(LIBSONAME) ../$(LIBSONAME) : ../$(LIBNAME) linux.def linktest.c - $(CC) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ + $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive \ -Wl,--retain-symbols-file=linux.def $(FEXTRALIB) $(EXTRALIB) $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. @@ -148,15 +148,15 @@ ifeq ($(OSNAME), OSF1) so : ../$(LIBSONAME) ../$(LIBSONAME) : - $(CC) $(LDFLAGS) -shared -o ../$(LIBSONAME) ../$(LIBNAME) + $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) ../$(LIBNAME) endif ifeq ($(OSNAME), SunOS) so : ../$(LIBSONAME) - $(CC) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ + $(CC) $(CFLAGS) $(LDFLAGS) -shared -o ../$(LIBSONAME) \ -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(EXTRALIB) - $(CC) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. + $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) $(FEXTRALIB) && echo OK. rm -f linktest endif From a2930664f42d5c7635bb83b3c7f3c988c5849c78 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Sun, 28 Jul 2013 00:09:40 +0800 Subject: [PATCH 05/13] Refs #262. Added executable stack markings. --- CONTRIBUTORS.md | 4 ++++ common_alpha.h | 10 +++++++++- common_ia64.h | 9 ++++++++- common_mips64.h | 9 ++++++++- common_sparc.h | 11 ++++++++++- common_x86.h | 4 +++- common_x86_64.h | 5 ++++- 7 files changed, 46 insertions(+), 6 deletions(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 4a13bcc2cf..59df776090 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -79,5 +79,9 @@ In chronological order: * [2013-07-11] create openblas_get_parallel to retrieve information which parallelization model is used by OpenBLAS. +* Sébastien Fabbro + * [2013-07-24] Modify makefile to respect user's LDFLAGS + * [2013-07-24] Add stack markings for GNU as arch-independent for assembler files + * [Your name or handle] <[email or website]> * [Date] [Brief summary of your changes] diff --git a/common_alpha.h b/common_alpha.h index cf794739ca..3b46c74ce5 100644 --- a/common_alpha.h +++ b/common_alpha.h @@ -150,9 +150,17 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ #define PROFCODE .prologue 0 #endif +#if defined(__linux__) && defined(__ELF__) +#define GNUSTACK .section .note.GNU-stack,"",%progbits +#else +#define GNUSTACK +#endif + #define EPILOGUE \ .end REALNAME; \ - .ident VERSION + .ident VERSION; \ + GNUSTACK + #endif #ifdef DOUBLE diff --git a/common_ia64.h b/common_ia64.h index 81939cc1bd..3530e0b1d6 100644 --- a/common_ia64.h +++ b/common_ia64.h @@ -379,8 +379,15 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ #define PROFCODE #endif +#if defined(__linux__) && defined(__ELF__) +#define GNUSTACK .section .note.GNU-stack,"",%progbits +#else +#define GNUSTACK +#endif + #define EPILOGUE \ - .endp REALNAME + .endp REALNAME ; \ + GNUSTACK #define START_ADDRESS 0x20000fc800000000UL diff --git a/common_mips64.h b/common_mips64.h index 1bee694078..c08fb2c759 100644 --- a/common_mips64.h +++ b/common_mips64.h @@ -235,10 +235,17 @@ REALNAME: ;\ .set noreorder ;\ .set nomacro +#if defined(__linux__) && defined(__ELF__) +#define GNUSTACK .section .note.GNU-stack,"",%progbits +#else +#define GNUSTACK +#endif + #define EPILOGUE \ .set macro ;\ .set reorder ;\ - .end REALNAME + .end REALNAME ;\ + GNUSTACK #define PROFCODE #endif diff --git a/common_sparc.h b/common_sparc.h index 35d8bdb5f1..cfd27f7683 100644 --- a/common_sparc.h +++ b/common_sparc.h @@ -199,8 +199,17 @@ static __inline int blas_quickdivide(blasint x, blasint y){ .type REALNAME, #function; \ .proc 07; \ REALNAME:; + +#if defined(__linux__) && defined(__ELF__) +#define GNUSTACK .section .note.GNU-stack,"",%progbits +#else +#define GNUSTACK +#endif + #define EPILOGUE \ - .size REALNAME, .-REALNAME + .size REALNAME, .-REALNAME; \ + GNUSTACK + #endif #endif diff --git a/common_x86.h b/common_x86.h index 5f56839f87..48517d900c 100644 --- a/common_x86.h +++ b/common_x86.h @@ -301,7 +301,9 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ #define PROFCODE #endif -#define EPILOGUE .size REALNAME, .-REALNAME +#define EPILOGUE \ + .size REALNAME, .-REALNAME; \ + .section .note.GNU-stack,"",%progbits #endif diff --git a/common_x86_64.h b/common_x86_64.h index 8f9f736807..188903848b 100644 --- a/common_x86_64.h +++ b/common_x86_64.h @@ -372,7 +372,10 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ #define PROFCODE #endif -#define EPILOGUE .size REALNAME, .-REALNAME +#define EPILOGUE \ + .size REALNAME, .-REALNAME; \ + .section .note.GNU-stack,"",%progbits + #endif From 5b504d6c234681344e7eeb0c518c974c6773f656 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Sun, 28 Jul 2013 17:39:24 +0800 Subject: [PATCH 06/13] Refs #263. Rollback bulldozer and piledriver kernels to barcelona kernels. --- Makefile.system | 6 ++++-- cpuid.h | 8 ++++---- driver/others/dynamic.c | 8 +++++--- getarch.c | 6 +++--- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/Makefile.system b/Makefile.system index b1f9ba514f..cb0cac3e05 100644 --- a/Makefile.system +++ b/Makefile.system @@ -324,14 +324,16 @@ ifeq ($(ARCH), x86) DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO ifneq ($(NO_AVX), 1) -DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER +DYNAMIC_CORE += SANDYBRIDGE +#BULLDOZER PILEDRIVER endif endif ifeq ($(ARCH), x86_64) DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO ifneq ($(NO_AVX), 1) -DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER +DYNAMIC_CORE += SANDYBRIDGE +#BULLDOZER PILEDRIVER endif endif diff --git a/cpuid.h b/cpuid.h index 2cbbd45390..4311ce95e6 100644 --- a/cpuid.h +++ b/cpuid.h @@ -105,8 +105,8 @@ #define CORE_NANO 19 #define CORE_SANDYBRIDGE 20 #define CORE_BOBCAT 21 -#define CORE_BULLDOZER 22 -#define CORE_PILEDRIVER 23 +#define CORE_BULLDOZER CORE_BARCELONA +#define CORE_PILEDRIVER CORE_BARCELONA #define CORE_HASWELL CORE_SANDYBRIDGE #define HAVE_SSE (1 << 0) @@ -198,8 +198,8 @@ typedef struct { #define CPUTYPE_NANO 43 #define CPUTYPE_SANDYBRIDGE 44 #define CPUTYPE_BOBCAT 45 -#define CPUTYPE_BULLDOZER 46 -#define CPUTYPE_PILEDRIVER 47 +#define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA +#define CPUTYPE_PILEDRIVER CPUTYPE_BARCELONA // this define is because BLAS doesn't have haswell specific optimizations yet #define CPUTYPE_HASWELL CPUTYPE_SANDYBRIDGE diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c index 197cc2b2de..bf60efb190 100644 --- a/driver/others/dynamic.c +++ b/driver/others/dynamic.c @@ -63,14 +63,16 @@ extern gotoblas_t gotoblas_BARCELONA; extern gotoblas_t gotoblas_BOBCAT; #ifndef NO_AVX extern gotoblas_t gotoblas_SANDYBRIDGE; -extern gotoblas_t gotoblas_BULLDOZER; -extern gotoblas_t gotoblas_PILEDRIVER; +//extern gotoblas_t gotoblas_BULLDOZER; +//extern gotoblas_t gotoblas_PILEDRIVER; #else //Use NEHALEM kernels for sandy bridge #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM +#endif + #define gotoblas_BULLDOZER gotoblas_BARCELONA #define gotoblas_PILEDRIVER gotoblas_BARCELONA -#endif + //Use sandy bridge kernels for haswell. #define gotoblas_HASWELL gotoblas_SANDYBRIDGE diff --git a/getarch.c b/getarch.c index 3ffda62446..ec9bb75a84 100644 --- a/getarch.c +++ b/getarch.c @@ -354,7 +354,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "OPTERON" #endif -#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) +#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) || defined (FORCE_PILEDRIVER) || defined (FORCE_BULLDOZER) #define FORCE #define FORCE_INTEL #define ARCHITECTURE "X86" @@ -384,7 +384,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "BOBCAT" #endif -#if defined (FORCE_BULLDOZER) +#if 0 #define FORCE #define FORCE_INTEL #define ARCHITECTURE "X86" @@ -400,7 +400,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "BULLDOZER" #endif -#if defined (FORCE_PILEDRIVER) +#if 0 #define FORCE #define FORCE_INTEL #define ARCHITECTURE "X86" From bd2da90e13673a579c2683a56fc38b0978ab0f54 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Mon, 29 Jul 2013 15:42:00 +0800 Subject: [PATCH 07/13] Fixed typo in getarch_2nd.c. --- getarch_2nd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/getarch_2nd.c b/getarch_2nd.c index fc800cfacd..0b140bba4c 100644 --- a/getarch_2nd.c +++ b/getarch_2nd.c @@ -8,7 +8,7 @@ int main(int argc, char **argv) { - if ( (argc <= 1) || (argc >= 2) && (*argv[1] == '0')) { + if ( (argc <= 1) || ((argc >= 2) && (*argv[1] == '0'))) { printf("SGEMM_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M); printf("SGEMM_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N); printf("DGEMM_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M); From 534c5ec919393259fd13c896f7552edf03538e1a Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Fri, 26 Jul 2013 23:43:54 +0800 Subject: [PATCH 08/13] Fixed #261. Use strncmp instead of a comparing trick. --- driver/others/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/driver/others/init.c b/driver/others/init.c index 4efc2816a3..5da71cec1e 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -83,6 +83,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #define MAX_NODES 16 #define MAX_CPUS 256 @@ -315,7 +316,7 @@ static int numa_check(void) { } while ((dir = readdir(dp)) != NULL) { - if (*(unsigned int *) dir -> d_name == 0x065646f6eU) { + if (strncmp(dir->d_name, "node", 4)==0) { node = atoi(&dir -> d_name[4]); From 749f45ffc84d48750e7f41cea8f02476a3304183 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Sat, 27 Jul 2013 22:37:57 +0800 Subject: [PATCH 09/13] Fixed the FMA3 detection bug. --- cpuid_x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpuid_x86.c b/cpuid_x86.c index 98af9d0417..9e850a2aa9 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -229,8 +229,8 @@ int get_cputype(int gettype){ if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2; #ifndef NO_AVX if (support_avx()) feature |= HAVE_AVX; + if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3; #endif - if ((ecx & (1 << 20)) != 0) feature |= HAVE_FMA3; if (have_excpuid() >= 0x01) { cpuid(0x80000001, &eax, &ebx, &ecx, &edx); From b5c2ac4fd6b0b7e782af0332f4ca6e0feec48300 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Mon, 29 Jul 2013 23:21:10 +0800 Subject: [PATCH 10/13] Fixed #264 the memory leak bug in dtrtri_U. --- interface/trtri.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/interface/trtri.c b/interface/trtri.c index 0564bc1830..71680e87d7 100644 --- a/interface/trtri.c +++ b/interface/trtri.c @@ -138,6 +138,9 @@ int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *In // call dtrtri from lapack for a walk around. if(uplo==0){ dtrtri_lapack_(UPLO, DIAG, N, a, ldA, Info); +#ifndef PPC440 + blas_memory_free(buffer); +#endif return 0; } #endif From a07cc3957102f400ec15e893f05073113795d404 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Wed, 31 Jul 2013 14:41:39 +0800 Subject: [PATCH 11/13] Refs #266. Fixed the compiling bug with Open64 5.0. --- Makefile.system | 1 + exports/Makefile | 20 ++++++----- exports/gensymbol | 69 ++++++++++++++++++++++++++++++++++-- f_check | 21 ++++++++++- ftest3.f | 6 ++++ interface/trtri.c | 4 +-- lapack/trtri/dtrtri_lapack.f | 2 +- 7 files changed, 108 insertions(+), 15 deletions(-) create mode 100644 ftest3.f diff --git a/Makefile.system b/Makefile.system index cb0cac3e05..727b089606 100644 --- a/Makefile.system +++ b/Makefile.system @@ -897,6 +897,7 @@ export CC export FC export BU export FU +export NEED2UNDERSCORES export USE_THREAD export NUM_THREADS export NUM_CORES diff --git a/exports/Makefile b/exports/Makefile index 6502d5d01f..7ef2830db4 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -18,6 +18,10 @@ ifndef NO_LAPACKE NO_LAPACKE = 0 endif +ifndef NEED2UNDERSCORES +NEED2UNDERSCORES=0 +endif + ifeq ($(OSNAME), WINNT) ifeq ($(F_COMPILER), GFORTRAN) EXTRALIB += -lgfortran @@ -94,13 +98,13 @@ libgoto2_shared.dll : ../$(LIBNAME) libgoto2_shared.def -Wl,--out-implib,libgoto2_shared.lib $(FEXTRALIB) libopenblas.def : gensymbol - perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) + perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) libgoto2_shared.def : gensymbol - perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) + perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) libgoto_hpl.def : gensymbol - perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) + perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) $(LIBDYNNAME) : ../$(LIBNAME) osx.def $(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) @@ -187,23 +191,23 @@ static : ../$(LIBNAME) rm -f goto.$(SUFFIX) linux.def : gensymbol ../Makefile.system ../getarch.c - perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) + perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) osx.def : gensymbol ../Makefile.system ../getarch.c - perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) + perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) aix.def : gensymbol ../Makefile.system ../getarch.c - perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) + perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) symbol.S : gensymbol - perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > symbol.S + perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > symbol.S test : linktest.c $(CC) $(CFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. rm -f linktest linktest.c : gensymbol ../Makefile.system ../getarch.c - perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > linktest.c + perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > linktest.c clean :: @rm -f *.def *.dylib __.SYMDEF* diff --git a/exports/gensymbol b/exports/gensymbol index 0a9729a6c0..7076412918 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -114,8 +114,8 @@ # ALLAUX -- Auxiliary routines called from all precisions # already provided by @blasobjs: xerbla, lsame - ilaenv, ieeeck, lsamen, xerbla_array, iparmq, - ilaprec, ilatrans, ilauplo, iladiag, chla_transtype, + ilaenv, ieeeck, lsamen, iparmq, + ilaprec, ilatrans, ilauplo, iladiag, ilaver, slamch, slamc3, # SCLAUX -- Auxiliary routines called from both REAL and COMPLEX. @@ -2672,12 +2672,25 @@ #LAPACKE_zlagsy_work, ); +#These function may need 2 underscores. +@lapack_embeded_underscore_objs=(xerbla_array, chla_transtype,); + if ($ARGV[5] == 1) { #NO_LAPACK=1 @underscore_objs = (@blasobjs, @misc_underscore_objs); } elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1" || -d "../lapack-3.4.2" || -d "../lapack-netlib") { - @underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs); + + if ($ARGV[7] == 0){ + # NEED2UNDERSCORES=0 + # Don't need 2 underscores + @underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs, @lapack_embeded_underscore_objs); + }else{ + # Need 2 underscores + @underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs); + @need_2underscore_objs = (@lapack_embeded_underscore_objs); + }; + } else { @underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs); } @@ -2729,6 +2742,10 @@ if ($ARGV[0] eq "linux"){ print $objs, $bu, "\n"; } + foreach $objs (@need_2underscore_objs) { + print $objs, $bu, $bu, "\n"; + } + # if ($ARGV[4] == 0) { foreach $objs (@no_underscore_objs) { print $objs, "\n"; @@ -2750,6 +2767,10 @@ if ($ARGV[0] eq "osx"){ print "_", $objs, $bu, "\n"; } + foreach $objs (@need_2underscore_objs) { + print "_", $objs, $bu, $bu, "\n"; + } + # if ($ARGV[4] == 0) { foreach $objs (@no_underscore_objs) { print "_", $objs, "\n"; @@ -2767,6 +2788,10 @@ if ($ARGV[0] eq "aix"){ print $objs, $bu, "\n"; } + foreach $objs (@need_2underscore_objs) { + print $objs, $bu, $bu, "\n"; + } + # if ($ARGV[4] == 0) { foreach $objs (@no_underscore_objs) { print $objs, "\n"; @@ -2791,6 +2816,17 @@ if ($ARGV[0] eq "win2k"){ print "\t$uppercase=$objs", "_ \@", $count, "\n"; $count ++; } + + foreach $objs (@need_2underscore_objs) { + $uppercase = $objs; + $uppercase =~ tr/[a-z]/[A-Z]/; + print "\t$objs=$objs","__ \@", $count, "\n"; + $count ++; + print "\t",$objs, "__=$objs","__ \@", $count, "\n"; + $count ++; + print "\t$uppercase=$objs", "__ \@", $count, "\n"; + $count ++; + } #for misc_common_objs foreach $objs (@misc_common_objs) { @@ -2852,6 +2888,18 @@ if ($ARGV[0] eq "microsoft"){ print "\t$uppercase\_ = $objs","_\n"; $count ++; } + + foreach $objs (@need_2underscore_objs) { + $uppercase = $objs; + $uppercase =~ tr/[a-z]/[A-Z]/; + print "\t$objs=$objs","__ \@", $count, "\n"; + $count ++; + print "\t",$objs, "__=$objs","__ \@", $count, "\n"; + $count ++; + print "\t$uppercase=$objs", "__ \@", $count, "\n"; + $count ++; + } + exit(0); } @@ -2868,6 +2916,16 @@ if ($ARGV[0] eq "win2kasm"){ print "_", $uppercase, "_:\n"; print "\tjmp\t_", $objs, "_\n"; } + + foreach $objs (@need_2underscore_objs) { + $uppercase = $objs; + $uppercase =~ tr/[a-z]/[A-Z]/; + print "\t.align 16\n"; + print "\t.globl _", $uppercase, "__\n"; + print "_", $uppercase, "__:\n"; + print "\tjmp\t_", $objs, "__\n"; + } + exit(0); } @@ -2880,6 +2938,11 @@ if ($ARGV[0] eq "linktest"){ foreach $objs (@underscore_objs) { print $objs, $bu, "();\n" if $objs ne "xerbla"; } + + foreach $objs (@need_2underscore_objs) { + print $objs, $bu, $bu, "();\n"; + } + # if ($ARGV[4] == 0) { foreach $objs (@no_underscore_objs) { print $objs, "();\n"; diff --git a/f_check b/f_check index d7c0b23283..86f1fa689b 100644 --- a/f_check +++ b/f_check @@ -114,6 +114,12 @@ if ($compiler eq "") { $vendor = IBM; $openmp = "-openmp"; } + + # for embeded underscore name, e.g. zho_ge, it may append 2 underscores. + $data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s`; + if ($data =~ /zho_ge__/) { + $need2bu = 1; + } } if ($vendor eq "") { @@ -245,6 +251,8 @@ if ($link ne "") { $link =~ s/\-rpath\s+/\-rpath\@/g; + $link =~ s/\-rpath-link\s+/\-rpath-link\@/g; + @flags = split(/[\s\,\n]/, $link); # remove leading and trailing quotes from each flag. @flags = map {s/^['"]|['"]$//g; $_} @flags; @@ -265,7 +273,15 @@ if ($link ne "") { $linker_L .= "-Wl,". $flags . " "; } - if ($flags =~ /^\-rpath/) { + if ($flags =~ /^\-rpath\@/) { + $flags =~ s/\@/\,/g; + if ($vendor eq "PGI") { + $flags =~ s/lib$/libso/; + } + $linker_L .= "-Wl,". $flags . " " ; + } + + if ($flags =~ /^\-rpath-link\@/) { $flags =~ s/\@/\,/g; if ($vendor eq "PGI") { $flags =~ s/lib$/libso/; @@ -309,6 +325,9 @@ print MAKEFILE "NOFORTRAN=1\n" if $nofortran == 1; print CONFFILE "#define BUNDERSCORE\t$bu\n" if $bu ne ""; print CONFFILE "#define NEEDBUNDERSCORE\t1\n" if $bu ne ""; +print CONFFILE "#define NEED2UNDERSCORES\t1\n" if $need2bu ne ""; + +print MAKEFILE "NEED2UNDERSCORES=1\n" if $need2bu ne ""; if (($linker_l ne "") || ($linker_a ne "")) { print MAKEFILE "FEXTRALIB=$linker_L $linker_l $linker_a\n"; diff --git a/ftest3.f b/ftest3.f new file mode 100644 index 0000000000..8f2cd332f5 --- /dev/null +++ b/ftest3.f @@ -0,0 +1,6 @@ + double complex function zho_ge() + + zho_ge = (0.0d0,0.0d0) + + return + end diff --git a/interface/trtri.c b/interface/trtri.c index 71680e87d7..007dbd7faf 100644 --- a/interface/trtri.c +++ b/interface/trtri.c @@ -60,7 +60,7 @@ static blasint (*trtri_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT * }; #endif -extern void dtrtri_lapack_(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info); +extern void BLASFUNC(dtrtrilapack)(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info); int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ @@ -137,7 +137,7 @@ int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *In // double trtri_U single thread error // call dtrtri from lapack for a walk around. if(uplo==0){ - dtrtri_lapack_(UPLO, DIAG, N, a, ldA, Info); + BLASFUNC(dtrtrilapack)(UPLO, DIAG, N, a, ldA, Info); #ifndef PPC440 blas_memory_free(buffer); #endif diff --git a/lapack/trtri/dtrtri_lapack.f b/lapack/trtri/dtrtri_lapack.f index 31a880f764..8e9a081705 100644 --- a/lapack/trtri/dtrtri_lapack.f +++ b/lapack/trtri/dtrtri_lapack.f @@ -107,7 +107,7 @@ *> \ingroup doubleOTHERcomputational * * ===================================================================== - SUBROUTINE DTRTRI_LAPACK( UPLO, DIAG, N, A, LDA, INFO ) + SUBROUTINE DTRTRILAPACK( UPLO, DIAG, N, A, LDA, INFO ) * * -- LAPACK computational routine (version 3.4.0) -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- From ae521ecc3ea48f38af59af2cb424e745eb248477 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Wed, 31 Jul 2013 14:49:16 +0800 Subject: [PATCH 12/13] OpenBLAS 0.2.8 rc1. --- Makefile.rule | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.rule b/Makefile.rule index fb377c3775..a1a88cb98b 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.2.7 +VERSION = 0.2.8-rc1 # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library From b736aa811089fddf586188447f4e25c77df3b103 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Thu, 1 Aug 2013 23:52:43 +0800 Subject: [PATCH 13/13] Update the doc for 0.2.8 version. --- Changelog.txt | 12 ++++++++++++ Makefile.rule | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Changelog.txt b/Changelog.txt index 574f462265..dd186b683b 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,4 +1,16 @@ OpenBLAS ChangeLog +==================================================================== +Version 0.2.8 +01-Aug-2013 +common: + * Support Open64 5.0. (#266) + * Add executable stack markings. (#262, Thank Sébastien Fabbro) + * Respect user's LDFLAGS (Thank Sébastien Fabbro) + +x86/x86-64: + * Rollback bulldozer and piledriver kernels to barcelona kernels (#263) + We will fix the compuational error bug in bulldozer and piledriver kernels. + ==================================================================== Version 0.2.7 20-Jul-2013 diff --git a/Makefile.rule b/Makefile.rule index a1a88cb98b..a92eb500ac 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.2.8-rc1 +VERSION = 0.2.8 # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library