Skip to content

Commit

Permalink
Merge branch 'release-0.2.16'
Browse files Browse the repository at this point in the history
  • Loading branch information
xianyi committed Mar 15, 2016
2 parents 53e849f + 8c0fb12 commit fced574
Show file tree
Hide file tree
Showing 3,456 changed files with 203,651 additions and 30,385 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,4 @@ test/zblat2
test/zblat3
build
build.*
*.swp
7 changes: 6 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ before_install:
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi

script: make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
script:
- set -e
- make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C test DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C ctest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C utest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi

# whitelist
branches:
Expand Down
22 changes: 16 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.4)
project(OpenBLAS)
set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 2)
set(OpenBLAS_PATCH_VERSION 14)
set(OpenBLAS_PATCH_VERSION 16)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")

enable_language(ASM)
Expand Down Expand Up @@ -54,10 +54,6 @@ if (NOT DYNAMIC_ARCH)
list(APPEND BLASDIRS kernel)
endif ()

if (DEFINED UTEST_CHECK)
set(SANITY_CHECK 1)
endif ()

if (DEFINED SANITY_CHECK)
list(APPEND BLASDIRS reference)
endif ()
Expand Down Expand Up @@ -110,6 +106,10 @@ if (${NO_STATIC} AND ${NO_SHARED})
message(FATAL_ERROR "Neither static nor shared are enabled.")
endif ()

#Set default output directory
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib )
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib )

# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
set(TARGET_OBJS "")
foreach (SUBDIR ${SUBDIRS})
Expand Down Expand Up @@ -139,6 +139,17 @@ add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET

include("${CMAKE_SOURCE_DIR}/cmake/export.cmake")

# Set output for libopenblas
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG )
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
endforeach()

enable_testing()
add_subdirectory(utest)

if(NOT MSVC)
#only build shared library for MSVC
Expand All @@ -152,7 +163,6 @@ target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
endif()

#build test and ctest
enable_testing()
add_subdirectory(test)
if(NOT NO_CBLAS)
add_subdirectory(ctest)
Expand Down
17 changes: 17 additions & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,17 @@ In chronological order:
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
ARMv8 support.

* Jerome Robert <[email protected]>
* [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478)
* [2015-12-23] `stack_check` in `gemv.c` (bug #722)
* [2015-12-28] Allow to force the number of parallel make job
* [2015-12-28] Fix detection of AMD E2-3200 detection
* [2015-12-31] Let `make MAX_STACK_ALLOC=0` do what expected
* [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731)
* [2016-01-24] Use `GEMM_MULTITHREAD_THRESHOLD` as a number of ops (bug #742)
* [2016-01-26] Let `openblas_get_num_threads` return the number of active threads (bug #760)
* [2016-01-30] Speed-up small `zger`, `zgemv`, `ztrmv` using stack allocation (bug #727)

* Dan Kortschak
* [2015-01-07] Added test for drotmg bug #484.

Expand All @@ -130,5 +141,11 @@ In chronological order:
* Martin Koehler <https://github.com/grisuthedragon/>
* [2015-09-07] Improved imatcopy

* Ashwin Sekhar T K <https://github.com/ashwinyes/>
* [2015-11-09] Assembly kernels for Cortex-A57 (ARMv8)
* [2015-11-20] lapack-test fixes for Cortex-A57
* [2016-03-14] Additional functional Assembly Kernels for Cortex-A57
* [2016-03-14] Optimize Dgemm 4x4 for Cortex-A57

* [Your name or handle] <[email or website]>
* [Date] [Brief summary of your changes]
53 changes: 53 additions & 0 deletions Changelog.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,57 @@
OpenBLAS ChangeLog
====================================================================
Version 0.2.16
15-Mar-2016
common:
* Avoid potential getenv segfault. (#716)
* Import LAPACK svn bugfix #142-#147,#150-#155

x86/x86_64:
* Optimize c/zgemv for AMD Bulldozer, Piledriver, Steamroller
* Fix bug with scipy linalg test.

ARM:
* Improve DGEMM for ARM Cortex-A57. (Thanks, Ashwin Sekhar T K)

POWER:
* Optimize D and Z BLAS3 functions for Power8.

====================================================================
Version 0.2.16.rc1
23-Feb-2016
common:
* Upgrade LAPACK to 3.6.0 version.
Add BUILD_LAPACK_DEPRECATED option in Makefile.rule to build
LAPACK deprecated functions.
* Add MAKE_NB_JOBS option in Makefile.
Force number of make jobs.This is particularly
useful when using distcc. (#735. Thanks, Jerome Robert.)
* Redesign unit test. Run unit/regression test at every build (Travis-CI and Appveyor).
* Disable multi-threading for small size swap and ger. (#744. Thanks, Jerome Robert)
* Improve small zger, zgemv, ztrmv using stack alloction (#727. Thanks, Jerome Robert)
* Let openblas_get_num_threads return the number of active threads.
(#760. Thanks, Jerome Robert)
* Support illumos(OmniOS). (#749. Thanks, Lauri Tirkkonen)
* Fix LAPACK Dormbr, Dormlq bug. (#711, #713. Thanks, Brendan Tracey)
* Update scipy benchmark script. (#745. Thanks, John Kirkham)

x86/x86_64:
* Optimize trsm kernels for AMD Bulldozer, Piledriver, Steamroller.
* Detect Intel Avoton.
* Detect AMD Trinity, Richland, E2-3200.
* Fix gemv performance bug on Mac OSX Intel Haswell.
* Fix some bugs with CMake and Visual Studio

ARM:
* Support and optimize Cortex-A57 AArch64.
(#686. Thanks, Ashwin Sekhar TK)
* Fix Android build on ARMV7 (#778. Thanks, Paul Mustiere)
* Update ARMV6 kernels.

POWER:
* Fix detection of POWER architecture
(#684. Thanks, Sebastien Villemot)

====================================================================
Version 0.2.15
27-Oct-2015
Expand Down
37 changes: 24 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,6 @@ ifneq ($(DYNAMIC_ARCH), 1)
BLASDIRS += kernel
endif

ifdef UTEST_CHECK
SANITY_CHECK = 1
endif

ifdef SANITY_CHECK
BLASDIRS += reference
endif
Expand Down Expand Up @@ -85,22 +81,22 @@ endif

shared :
ifndef NO_SHARED
ifeq ($(OSNAME), Linux)
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS))
@$(MAKE) -C exports so
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif
ifeq ($(OSNAME), FreeBSD)
@$(MAKE) -C exports so
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
endif
ifeq ($(OSNAME), NetBSD)
@$(MAKE) -C exports so
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
endif
ifeq ($(OSNAME), Darwin)
@$(MAKE) -C exports dyn
@-ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
endif
ifeq ($(OSNAME), WINNT)
@$(MAKE) -C exports dll
Expand All @@ -117,10 +113,8 @@ ifndef CROSS
touch $(LIBNAME)
ifndef NO_FBLAS
$(MAKE) -C test all
ifdef UTEST_CHECK
$(MAKE) -C utest all
endif
endif
ifndef NO_CBLAS
$(MAKE) -C ctest all
endif
Expand Down Expand Up @@ -249,16 +243,23 @@ ifndef NOFORTRAN
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
ifeq ($(FC), gfortran)
ifeq ($(F_COMPILER), GFORTRAN)
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
ifdef SMP
ifeq ($(OSNAME), WINNT)
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
else
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
else
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
else
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_LAPACK_DEPRECATED), 1)
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
endif
Expand Down Expand Up @@ -288,8 +289,18 @@ endif
lapack-test :
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
ifneq ($(CROSS), 1)
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
./testsecond; ./testdsecnd; ./testieee; ./testversion )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
endif

lapack-runtest:
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
./testsecond; ./testdsecnd; ./testieee; ./testversion )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )


blas-test:
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
make -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
Expand Down
6 changes: 2 additions & 4 deletions Makefile.arm
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ endif

ifeq ($(CORE), ARMV7)
ifeq ($(OSNAME), Android)
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch
else
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
Expand All @@ -29,5 +29,3 @@ ifeq ($(CORE), ARMV5)
CCOMMON_OPT += -marm -march=armv5
FCOMMON_OPT += -marm -march=armv5
endif


4 changes: 4 additions & 0 deletions Makefile.arm64
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,8 @@ CCOMMON_OPT += -march=armv8-a
FCOMMON_OPT += -march=armv8-a
endif

ifeq ($(CORE), CORTEXA57)
CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
endif

12 changes: 6 additions & 6 deletions Makefile.install
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ install : lib.grd
#for inc
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@awk 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@$(AWK) 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
Expand All @@ -48,10 +48,10 @@ endif

ifndef NO_LAPACKE
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
endif

#for install static library
Expand All @@ -64,7 +64,7 @@ endif
#for install shared library
ifndef NO_SHARED
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
ifeq ($(OSNAME), Linux)
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS))
@install -pm755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
Expand Down
20 changes: 12 additions & 8 deletions Makefile.rule
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#

# This library's version
VERSION = 0.2.15
VERSION = 0.2.16

# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
Expand Down Expand Up @@ -79,6 +79,9 @@ VERSION = 0.2.15
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
# NO_LAPACKE = 1

# Build LAPACK Deprecated functions since LAPACK 3.6.0
# BUILD_LAPACK_DEPRECATED = 1

# If you want to use legacy threaded Level 3 implementation.
# USE_SIMPLE_THREADED_LEVEL3 = 1

Expand Down Expand Up @@ -108,6 +111,10 @@ NO_AFFINITY = 1
# Don't use parallel make.
# NO_PARALLEL_MAKE = 1

# Force number of make jobs. The default is the number of logical CPU of the host.
# This is particularly useful when using distcc
# MAKE_NB_JOBS = 2

# If you would like to know minute performance report of GotoBLAS.
# FUNCTION_PROFILE = 1

Expand Down Expand Up @@ -138,10 +145,6 @@ NO_AFFINITY = 1
# slow (Not implemented yet).
# SANITY_CHECK = 1

# Run testcases in utest/ . When you enable UTEST_CHECK, it would enable
# SANITY_CHECK to compare the result with reference BLAS.
# UTEST_CHECK = 1

# The installation directory.
# PREFIX = /opt/OpenBLAS

Expand All @@ -159,10 +162,11 @@ COMMON_PROF = -pg
# Build Debug version
# DEBUG = 1

# Improve GEMV and GER for small matrices by stack allocation.
# For details, https://github.com/xianyi/OpenBLAS/pull/482
# Set maximum stack allocation.
# The default value is 2048. 0 disable stack allocation a may reduce GER and GEMV
# performance. For details, https://github.com/xianyi/OpenBLAS/pull/482
#
MAX_STACK_ALLOC=2048
# MAX_STACK_ALLOC = 0

# Add a prefix or suffix to all exported symbol names in the shared library.
# Avoid conflicts with other BLAS libraries, especially when using
Expand Down
Loading

0 comments on commit fced574

Please sign in to comment.