Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
xianyi committed Mar 24, 2015
2 parents 51ce5ef + a3491e1 commit d0c51c4
Show file tree
Hide file tree
Showing 137 changed files with 1,947 additions and 444 deletions.
6 changes: 6 additions & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,5 +121,11 @@ In chronological order:
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
ARMv8 support.

* Dan Kortschak
* [2015-01-07] Added test for drotmg bug #484.

* Ton van den Heuvel <https://github.com/ton>
* [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity().

* [Your name or handle] <[email or website]>
* [Date] [Brief summary of your changes]
20 changes: 20 additions & 0 deletions Changelog.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,24 @@
OpenBLAS ChangeLog
====================================================================
Version 0.2.14
24-Mar-2015
common:
* Improve OpenBLASConfig.cmake. (#474, #475. Thanks, xantares.)
* Improve ger and gemv for small matrices by stack allocation.
e.g. make -DMAX_STACK_ALLOC=2048 (#482. Thanks, Jerome Robert.)
* Introduce openblas_get_num_threads and openblas_get_num_procs.
(#497. Thanks, Erik Schnetter.)
* Add ATLAS-style ?geadd function. (#509. Thanks, Martin Köhler.)
* Fix c/zsyr bug with negative incx. (#492.)
* Fix race condition during shutdown causing a crash in
gotoblas_set_affinity(). (#508. Thanks, Ton van den Heuvel.)

x86/x86-64:
* Support AMD Streamroller.

ARM:
* Add Cortex-A9 and Cortex-A15 targets.

====================================================================
Version 0.2.13
3-Dec-2014
Expand Down
8 changes: 4 additions & 4 deletions GotoBLAS_05LargePage.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@

If you want to allocate 64 large pages,

$shell> echo 0 > /pros/sys/vm/nr_hugepages # need to be reset
$shell> echo 65 > /pros/sys/vm/nr_hugepages # add 1 extra page
$shell> echo 3355443200 > /pros/sys/kernel/shmmax # just large number
$shell> echo 3355443200 > /pros/sys/kernel/shmall
$shell> echo 0 > /proc/sys/vm/nr_hugepages # need to be reset
$shell> echo 65 > /proc/sys/vm/nr_hugepages # add 1 extra page
$shell> echo 3355443200 > /proc/sys/kernel/shmmax # just large number
$shell> echo 3355443200 > /proc/sys/kernel/shmall

Also may add a few lines into /etc/security/limits.conf file.

Expand Down
5 changes: 5 additions & 0 deletions Makefile.arm
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# ifeq logical or
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
endif

ifeq ($(CORE), ARMV7)
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
Expand Down
15 changes: 8 additions & 7 deletions Makefile.install
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ OPENBLAS_INCLUDE_DIR := $(PREFIX)/include
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib
OPENBLAS_BINARY_DIR := $(PREFIX)/bin
OPENBLAS_BUILD_DIR := $(CURDIR)
OPENBLAS_CMAKE_DIR := $(PREFIX)/cmake
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake

.PHONY : install
Expand Down Expand Up @@ -46,11 +46,11 @@ ifndef NO_CBLAS
endif

ifndef NO_LAPACKE
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
endif

#for install static library
Expand Down Expand Up @@ -95,7 +95,8 @@ endif
endif
#Generating OpenBLASConfig.cmake
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
ifndef NO_SHARED
#ifeq logical or
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
Expand Down
15 changes: 14 additions & 1 deletion Makefile.rule
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#

# This library's version
VERSION = 0.2.13
VERSION = 0.2.14

# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
Expand Down Expand Up @@ -159,6 +159,19 @@ COMMON_PROF = -pg
# Build Debug version
# DEBUG = 1

# Improve GEMV and GER for small matrices by stack allocation.
# For details, https://github.com/xianyi/OpenBLAS/pull/482
#
# MAX_STACK_ALLOC=2048

# Add a prefix or suffix to all exported symbol names in the shared library.
# Avoid conflicts with other BLAS libraries, especially when using
# 64 bit integer interfaces in OpenBLAS.
# For details, https://github.com/xianyi/OpenBLAS/pull/459
#
# SYMBOLPREFIX=
# SYMBOLSUFFIX=

#
# End of user configuration
#
18 changes: 17 additions & 1 deletion Makefile.system
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ endif
ifeq ($(TARGET), PILEDRIVER)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
ifeq ($(TARGET), STEAMROLLER)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
endif


Expand All @@ -85,6 +88,9 @@ endif
ifeq ($(TARGET_CORE), PILEDRIVER)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
ifeq ($(TARGET_CORE), STEAMROLLER)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
endif


Expand Down Expand Up @@ -305,6 +311,10 @@ ifdef SANITY_CHECK
CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU)
endif

ifdef MAX_STACK_ALLOC
CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC)
endif

#
# Architecture dependent settings
#
Expand Down Expand Up @@ -354,6 +364,12 @@ endif


ifeq ($(USE_OPENMP), 1)

#check
ifeq ($(USE_THREAD), 0)
$(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.)
endif

# ifeq logical or. GCC or LSB
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
CCOMMON_OPT += -fopenmp
Expand Down Expand Up @@ -392,7 +408,7 @@ endif
ifeq ($(ARCH), x86_64)
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER
endif
ifneq ($(NO_AVX2), 1)
DYNAMIC_CORE += HASWELL
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ Please read GotoBLAS_01Readme.txt
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thank Werner Saar)
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.

#### MIPS64:
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.
Expand Down
6 changes: 6 additions & 0 deletions TargetList.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ ISTANBUL
BOBCAT
BULLDOZER
PILEDRIVER
STEAMROLLER

c)VIA CPU:
SSE_GENERIC
Expand Down Expand Up @@ -62,6 +63,11 @@ SPARC
SPARCV7

6.ARM CPU:
CORTEXA15
CORTEXA9
ARMV7
ARMV6
ARMV5

7.ARM 64-bit CPU:
ARMV8
9 changes: 7 additions & 2 deletions benchmark/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,13 @@ include $(TOPDIR)/Makefile.system
#LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm

# ACML custom
ACML=/opt/pb/acml-5-3-1-gfortran-64bit/gfortran64_fma4_mp/lib
LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm
#ACML=/opt/pb/acml-5-3-1-gfortran-64bit/gfortran64_fma4_mp/lib
#LIBACML = -fopenmp $(ACML)/libacml_mp.a -lgfortran -lm

# ACML 6.1 custom
ACML=/home/werner/project/acml6.1/gfortran64_mp/lib
LIBACML = -fopenmp $(ACML)/libacml_mp.so -lgfortran -lm


# Atlas Ubuntu
#ATLAS=/usr/lib/atlas-base
Expand Down
4 changes: 2 additions & 2 deletions benchmark/axpy.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ static void *huge_malloc(BLASLONG size){

#endif

int MAIN__(int argc, char *argv[]){
int main(int argc, char *argv[]){

FLOAT *x, *y;
FLOAT alpha[2] = { 2.0, 2.0 };
Expand Down Expand Up @@ -198,4 +198,4 @@ int MAIN__(int argc, char *argv[]){
return 0;
}

void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
4 changes: 2 additions & 2 deletions benchmark/cholesky.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ static __inline double getmflops(int ratio, int m, double secs){
}


int MAIN__(int argc, char *argv[]){
int main(int argc, char *argv[]){

#ifndef COMPLEX
char *trans[] = {"T", "N"};
Expand Down Expand Up @@ -273,4 +273,4 @@ int MAIN__(int argc, char *argv[]){
return 0;
}

void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
4 changes: 2 additions & 2 deletions benchmark/dot.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){

#endif

int MAIN__(int argc, char *argv[]){
int main(int argc, char *argv[]){

FLOAT *x, *y;
FLOAT result;
Expand Down Expand Up @@ -192,4 +192,4 @@ int MAIN__(int argc, char *argv[]){
return 0;
}

void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
4 changes: 2 additions & 2 deletions benchmark/geev.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ static void *huge_malloc(BLASLONG size){

#endif

int MAIN__(int argc, char *argv[]){
int main(int argc, char *argv[]){

FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
FLOAT wkopt[4];
Expand Down Expand Up @@ -257,4 +257,4 @@ int MAIN__(int argc, char *argv[]){
return 0;
}

void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
25 changes: 19 additions & 6 deletions benchmark/gemm.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,15 @@ static void *huge_malloc(BLASLONG size){

#endif

int MAIN__(int argc, char *argv[]){
int main(int argc, char *argv[]){

FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char trans='N';
blasint m, i, j;
blasint m, n, i, j;
int loops = 1;
int has_param_n=0;
int l;
char *p;

Expand Down Expand Up @@ -162,6 +163,11 @@ int MAIN__(int argc, char *argv[]){
if ( p != NULL )
loops = atoi(p);

if ((p = getenv("OPENBLAS_PARAM_N"))) {
n = atoi(p);
has_param_n=1;
}


#ifdef linux
srandom(getpid());
Expand All @@ -174,7 +180,14 @@ int MAIN__(int argc, char *argv[]){

timeg=0;

fprintf(stderr, " %6d : ", (int)m);
if ( has_param_n == 1 && n <= m )
n=n;
else
n=m;



fprintf(stderr, " %6dx%d : ", (int)m, (int)n);

for (l=0; l<loops; l++)
{
Expand All @@ -189,7 +202,7 @@ int MAIN__(int argc, char *argv[]){

gettimeofday( &start, (struct timezone *)0);

GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m );

gettimeofday( &stop, (struct timezone *)0);

Expand All @@ -202,11 +215,11 @@ int MAIN__(int argc, char *argv[]){
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / timeg * 1.e-6);
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6);

}

return 0;
}

void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
4 changes: 2 additions & 2 deletions benchmark/gemm3m.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){

#endif

int MAIN__(int argc, char *argv[]){
int main(int argc, char *argv[]){

FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0};
Expand Down Expand Up @@ -209,4 +209,4 @@ int MAIN__(int argc, char *argv[]){
return 0;
}

void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
4 changes: 2 additions & 2 deletions benchmark/gemv.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){

#endif

int MAIN__(int argc, char *argv[]){
int main(int argc, char *argv[]){

FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
Expand Down Expand Up @@ -266,4 +266,4 @@ int MAIN__(int argc, char *argv[]){
return 0;
}

void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
4 changes: 2 additions & 2 deletions benchmark/ger.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){

#endif

int MAIN__(int argc, char *argv[]){
int main(int argc, char *argv[]){

FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
Expand Down Expand Up @@ -214,5 +214,5 @@ int MAIN__(int argc, char *argv[]){
return 0;
}

void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

Loading

0 comments on commit d0c51c4

Please sign in to comment.