From 5f150180d1ad74b1b6b4bf0d2ec4814645d902ec Mon Sep 17 00:00:00 2001 From: Michael O'Farrell Date: Wed, 22 Jul 2015 16:22:57 -0700 Subject: [PATCH] Added benchmarking library. --- benchmark/.gitignore | 46 + benchmark/.travis-setup.sh | 26 + benchmark/.travis.yml | 41 + benchmark/.ycm_extra_conf.py | 115 +++ benchmark/AUTHORS | 26 + benchmark/CMakeLists.txt | 107 +++ benchmark/CONTRIBUTING.md | 58 ++ benchmark/CONTRIBUTORS | 42 + benchmark/LICENSE | 202 ++++ benchmark/README.md | 282 ++++++ benchmark/appveyor.yml | 55 ++ benchmark/cmake/AddCXXCompilerFlag.cmake | 37 + benchmark/cmake/CXXFeatureCheck.cmake | 39 + benchmark/cmake/GetGitVersion.cmake | 45 + benchmark/cmake/gnu_posix_regex.cpp | 12 + benchmark/cmake/posix_regex.cpp | 12 + benchmark/cmake/std_regex.cpp | 10 + benchmark/cmake/steady_clock.cpp | 7 + benchmark/cmake/thread_safety_attributes.cpp | 4 + benchmark/include/benchmark/benchmark.h | 21 + benchmark/include/benchmark/benchmark_api.h | 603 ++++++++++++ benchmark/include/benchmark/macros.h | 44 + benchmark/include/benchmark/reporter.h | 122 +++ benchmark/mingw.py | 320 +++++++ benchmark/src/CMakeLists.txt | 51 ++ benchmark/src/arraysize.h | 36 + benchmark/src/benchmark.cc | 918 +++++++++++++++++++ benchmark/src/check.h | 57 ++ benchmark/src/colorprint.cc | 115 +++ benchmark/src/colorprint.h | 19 + benchmark/src/commandlineflags.cc | 219 +++++ benchmark/src/commandlineflags.h | 76 ++ benchmark/src/console_reporter.cc | 115 +++ benchmark/src/csv_reporter.cc | 105 +++ benchmark/src/cycleclock.h | 134 +++ benchmark/src/internal_macros.h | 40 + benchmark/src/json_reporter.cc | 165 ++++ benchmark/src/log.cc | 40 + benchmark/src/log.h | 28 + benchmark/src/mutex.h | 142 +++ benchmark/src/re.h | 60 ++ benchmark/src/re_posix.cc | 59 ++ benchmark/src/re_std.cc | 44 + benchmark/src/reporter.cc | 86 ++ benchmark/src/sleep.cc | 50 + benchmark/src/sleep.h | 17 + benchmark/src/stat.h | 307 +++++++ benchmark/src/string_util.cc | 166 ++++ benchmark/src/string_util.h | 43 + benchmark/src/sysinfo.cc | 413 +++++++++ benchmark/src/sysinfo.h | 12 + benchmark/src/walltime.cc | 236 +++++ benchmark/src/walltime.h | 17 + benchmark/test/CMakeLists.txt | 89 ++ benchmark/test/basic_test.cc | 104 +++ benchmark/test/benchmark_test.cc | 154 ++++ benchmark/test/cxx03_test.cc | 31 + benchmark/test/filter_test.cc | 85 ++ benchmark/test/fixture_test.cc | 42 + benchmark/test/options_test.cc | 18 + 60 files changed, 6569 insertions(+) create mode 100644 benchmark/.gitignore create mode 100644 benchmark/.travis-setup.sh create mode 100644 benchmark/.travis.yml create mode 100644 benchmark/.ycm_extra_conf.py create mode 100644 benchmark/AUTHORS create mode 100644 benchmark/CMakeLists.txt create mode 100644 benchmark/CONTRIBUTING.md create mode 100644 benchmark/CONTRIBUTORS create mode 100644 benchmark/LICENSE create mode 100644 benchmark/README.md create mode 100644 benchmark/appveyor.yml create mode 100644 benchmark/cmake/AddCXXCompilerFlag.cmake create mode 100644 benchmark/cmake/CXXFeatureCheck.cmake create mode 100644 benchmark/cmake/GetGitVersion.cmake create mode 100644 benchmark/cmake/gnu_posix_regex.cpp create mode 100644 benchmark/cmake/posix_regex.cpp create mode 100644 benchmark/cmake/std_regex.cpp create mode 100644 benchmark/cmake/steady_clock.cpp create mode 100644 benchmark/cmake/thread_safety_attributes.cpp create mode 100644 benchmark/include/benchmark/benchmark.h create mode 100644 benchmark/include/benchmark/benchmark_api.h create mode 100644 benchmark/include/benchmark/macros.h create mode 100644 benchmark/include/benchmark/reporter.h create mode 100644 benchmark/mingw.py create mode 100644 benchmark/src/CMakeLists.txt create mode 100644 benchmark/src/arraysize.h create mode 100644 benchmark/src/benchmark.cc create mode 100644 benchmark/src/check.h create mode 100644 benchmark/src/colorprint.cc create mode 100644 benchmark/src/colorprint.h create mode 100644 benchmark/src/commandlineflags.cc create mode 100644 benchmark/src/commandlineflags.h create mode 100644 benchmark/src/console_reporter.cc create mode 100644 benchmark/src/csv_reporter.cc create mode 100644 benchmark/src/cycleclock.h create mode 100644 benchmark/src/internal_macros.h create mode 100644 benchmark/src/json_reporter.cc create mode 100644 benchmark/src/log.cc create mode 100644 benchmark/src/log.h create mode 100644 benchmark/src/mutex.h create mode 100644 benchmark/src/re.h create mode 100644 benchmark/src/re_posix.cc create mode 100644 benchmark/src/re_std.cc create mode 100644 benchmark/src/reporter.cc create mode 100644 benchmark/src/sleep.cc create mode 100644 benchmark/src/sleep.h create mode 100644 benchmark/src/stat.h create mode 100644 benchmark/src/string_util.cc create mode 100644 benchmark/src/string_util.h create mode 100644 benchmark/src/sysinfo.cc create mode 100644 benchmark/src/sysinfo.h create mode 100644 benchmark/src/walltime.cc create mode 100644 benchmark/src/walltime.h create mode 100644 benchmark/test/CMakeLists.txt create mode 100644 benchmark/test/basic_test.cc create mode 100644 benchmark/test/benchmark_test.cc create mode 100644 benchmark/test/cxx03_test.cc create mode 100644 benchmark/test/filter_test.cc create mode 100644 benchmark/test/fixture_test.cc create mode 100644 benchmark/test/options_test.cc diff --git a/benchmark/.gitignore b/benchmark/.gitignore new file mode 100644 index 00000000..3c1b4f21 --- /dev/null +++ b/benchmark/.gitignore @@ -0,0 +1,46 @@ +*.a +*.so +*.so.?* +*.dll +*.exe +*.dylib +*.cmake +!/cmake/*.cmake +*~ +*.pyc +__pycache__ + +# lcov +*.lcov +/lcov + +# cmake files. +/Testing +CMakeCache.txt +CMakeFiles/ +cmake_install.cmake + +# makefiles. +Makefile + +# in-source build. +bin/ +lib/ +/test/*_test + +# exuberant ctags. +tags + +# YouCompleteMe configuration. +.ycm_extra_conf.pyc + +# ninja generated files. +.ninja_deps +.ninja_log +build.ninja +install_manifest.txt +rules.ninja + +# out-of-source build top-level folders. +build/ +_build/ diff --git a/benchmark/.travis-setup.sh b/benchmark/.travis-setup.sh new file mode 100644 index 00000000..c900fa93 --- /dev/null +++ b/benchmark/.travis-setup.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +# Before install + +sudo add-apt-repository -y ppa:kalakris/cmake +if [ "$STD" = "c++11" ]; then + sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test + if [ "$CXX" = "clang++" ]; then + wget -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add - + sudo add-apt-repository -y "deb http://llvm.org/apt/precise/ llvm-toolchain-precise-3.6 main" + fi +fi +sudo apt-get update -qq + +# Install +sudo apt-get install -qq cmake +if [ "$STD" = "c++11" ] && [ "$CXX" = "g++" ]; then + sudo apt-get install -qq gcc-4.8 g++-4.8 + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 90 + sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 90 +elif [ "$CXX" = "clang++" ]; then + sudo apt-get install -qq clang-3.6 + sudo update-alternatives --install /usr/local/bin/clang clang /usr/bin/clang-3.6 90 + sudo update-alternatives --install /usr/local/bin/clang++ clang++ /usr/bin/clang++-3.6 90 + export PATH=/usr/local/bin:$PATH +fi diff --git a/benchmark/.travis.yml b/benchmark/.travis.yml new file mode 100644 index 00000000..8b138ce1 --- /dev/null +++ b/benchmark/.travis.yml @@ -0,0 +1,41 @@ +language: cpp + +# NOTE: The COMPILER variable is unused. It simply makes the display on +# travis-ci.org more readable. +matrix: + include: + - compiler: gcc + env: COMPILER=g++-4.6 STD=c++0x BUILD_TYPE=Coverage + - compiler: gcc + env: COMPILER=g++-4.6 STD=c++0x BUILD_TYPE=Debug + - compiler: gcc + env: COMPILER=g++-4.6 STD=c++0x BUILD_TYPE=Release + - compiler: gcc + env: COMPILER=g++-4.8 STD=c++11 BUILD_TYPE=Debug + - compiler: gcc + env: COMPILER=g++-4.8 STD=c++11 BUILD_TYPE=Release + - compiler: clang + env: COMPILER=clang++-3.6 STD=c++11 BUILD_TYPE=Debug + - compiler: clang + env: COMPILER=clang++-3.6 STD=c++11 BUILD_TYPE=Release + +before_script: + - source .travis-setup.sh + - mkdir build && cd build + +install: + - if [ "${BUILD_TYPE}" == "Coverage" -a "${TRAVIS_OS_NAME}" == "linux" ]; then + PATH=~/.local/bin:${PATH}; + pip install --user --upgrade pip; + pip install --user cpp-coveralls; + fi + +script: + - cmake .. -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DCMAKE_CXX_FLAGS="-std=${STD}" + - make + - make CTEST_OUTPUT_ON_FAILURE=1 test + +after_success: + - if [ "${BUILD_TYPE}" == "Coverage" -a "${TRAVIS_OS_NAME}" == "linux" ]; then + coveralls --include src --include include --gcov-options '\-lp' --root .. --build-root .; + fi diff --git a/benchmark/.ycm_extra_conf.py b/benchmark/.ycm_extra_conf.py new file mode 100644 index 00000000..86194357 --- /dev/null +++ b/benchmark/.ycm_extra_conf.py @@ -0,0 +1,115 @@ +import os +import ycm_core + +# These are the compilation flags that will be used in case there's no +# compilation database set (by default, one is not set). +# CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR. +flags = [ +'-Wall', +'-Werror', +'-pendantic-errors', +'-std=c++0x', +'-fno-strict-aliasing', +'-O3', +'-DNDEBUG', +# ...and the same thing goes for the magic -x option which specifies the +# language that the files to be compiled are written in. This is mostly +# relevant for c++ headers. +# For a C project, you would set this to 'c' instead of 'c++'. +'-x', 'c++', +'-I', 'include', +'-isystem', '/usr/include', +'-isystem', '/usr/local/include', +] + + +# Set this to the absolute path to the folder (NOT the file!) containing the +# compile_commands.json file to use that instead of 'flags'. See here for +# more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html +# +# Most projects will NOT need to set this to anything; you can just change the +# 'flags' list of compilation flags. Notice that YCM itself uses that approach. +compilation_database_folder = '' + +if os.path.exists( compilation_database_folder ): + database = ycm_core.CompilationDatabase( compilation_database_folder ) +else: + database = None + +SOURCE_EXTENSIONS = [ '.cc' ] + +def DirectoryOfThisScript(): + return os.path.dirname( os.path.abspath( __file__ ) ) + + +def MakeRelativePathsInFlagsAbsolute( flags, working_directory ): + if not working_directory: + return list( flags ) + new_flags = [] + make_next_absolute = False + path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ] + for flag in flags: + new_flag = flag + + if make_next_absolute: + make_next_absolute = False + if not flag.startswith( '/' ): + new_flag = os.path.join( working_directory, flag ) + + for path_flag in path_flags: + if flag == path_flag: + make_next_absolute = True + break + + if flag.startswith( path_flag ): + path = flag[ len( path_flag ): ] + new_flag = path_flag + os.path.join( working_directory, path ) + break + + if new_flag: + new_flags.append( new_flag ) + return new_flags + + +def IsHeaderFile( filename ): + extension = os.path.splitext( filename )[ 1 ] + return extension in [ '.h', '.hxx', '.hpp', '.hh' ] + + +def GetCompilationInfoForFile( filename ): + # The compilation_commands.json file generated by CMake does not have entries + # for header files. So we do our best by asking the db for flags for a + # corresponding source file, if any. If one exists, the flags for that file + # should be good enough. + if IsHeaderFile( filename ): + basename = os.path.splitext( filename )[ 0 ] + for extension in SOURCE_EXTENSIONS: + replacement_file = basename + extension + if os.path.exists( replacement_file ): + compilation_info = database.GetCompilationInfoForFile( + replacement_file ) + if compilation_info.compiler_flags_: + return compilation_info + return None + return database.GetCompilationInfoForFile( filename ) + + +def FlagsForFile( filename, **kwargs ): + if database: + # Bear in mind that compilation_info.compiler_flags_ does NOT return a + # python list, but a "list-like" StringVec object + compilation_info = GetCompilationInfoForFile( filename ) + if not compilation_info: + return None + + final_flags = MakeRelativePathsInFlagsAbsolute( + compilation_info.compiler_flags_, + compilation_info.compiler_working_dir_ ) + else: + relative_to = DirectoryOfThisScript() + final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to ) + + return { + 'flags': final_flags, + 'do_cache': True + } diff --git a/benchmark/AUTHORS b/benchmark/AUTHORS new file mode 100644 index 00000000..3d9d1df4 --- /dev/null +++ b/benchmark/AUTHORS @@ -0,0 +1,26 @@ +# This is the official list of benchmark authors for copyright purposes. +# This file is distinct from the CONTRIBUTORS files. +# See the latter for an explanation. +# +# Names should be added to this file as: +# Name or Organization +# The email address is not required for organizations. +# +# Please keep the list sorted. + +Arne Beer +Christopher Seymour +David Coeurjolly +Dominic Hamon +Eugene Zhuk +Evgeny Safronov +Felix Homann +Google Inc. +JianXiong Zhou +Kaito Udagawa +Lei Xu +Matt Clarkson +Oleksandr Sochka +Paul Redmond +Shuo Chen +Yusuke Suzuki diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt new file mode 100644 index 00000000..911f3096 --- /dev/null +++ b/benchmark/CMakeLists.txt @@ -0,0 +1,107 @@ +cmake_minimum_required (VERSION 2.8.11) +project (benchmark) + +option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON) +option(BENCHMARK_ENABLE_LTO "Enable link time optimisation of the benchmark library." OFF) +# Make sure we can import out CMake functions +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") + +# Read the git tags to determine the project version +include(GetGitVersion) +get_git_version(GIT_VERSION) + +# Tell the user what versions we are using +string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" VERSION ${GIT_VERSION}) +message("-- Version: ${VERSION}") + +# The version of the libraries +set(GENERIC_LIB_VERSION ${VERSION}) +string(SUBSTRING ${VERSION} 0 1 GENERIC_LIB_SOVERSION) + +# Import our CMake modules +include(CheckCXXCompilerFlag) +include(AddCXXCompilerFlag) +include(CXXFeatureCheck) + +# Try and enable C++11. Don't use C++14 because it doesn't work in some +# configurations. +add_cxx_compiler_flag(-std=c++11) +if (NOT HAVE_CXX_FLAG_STD_CXX11) + add_cxx_compiler_flag(-std=c++0x) +endif() + +# Turn compiler warnings up to 11 +add_cxx_compiler_flag(-Wall) +add_cxx_compiler_flag(-Wextra) +add_cxx_compiler_flag(-Wshadow) +add_cxx_compiler_flag(-Werror RELEASE) +add_cxx_compiler_flag(-pedantic) +add_cxx_compiler_flag(-pedantic-errors) +add_cxx_compiler_flag(-Wshorten-64-to-32) +add_cxx_compiler_flag(-Wfloat-equal) +add_cxx_compiler_flag(-Wzero-as-null-pointer-constant) +add_cxx_compiler_flag(-fstrict-aliasing) +if (HAVE_CXX_FLAG_FSTRICT_ALIASING) + add_cxx_compiler_flag(-Wstrict-aliasing) +endif() +add_cxx_compiler_flag(-Wthread-safety) +if (HAVE_WTHREAD_SAFETY) + add_definitions(-DHAVE_WTHREAD_SAFETY) + cxx_feature_check(THREAD_SAFETY_ATTRIBUTES) +endif() + +# Link time optimisation +if (BENCHMARK_ENABLE_LTO) + add_cxx_compiler_flag(-flto) + if ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") + find_program(GCC_AR gcc-ar) + if (GCC_AR) + set(CMAKE_AR ${GCC_AR}) + endif() + find_program(GCC_RANLIB gcc-ranlib) + if (GCC_RANLIB) + set(CMAKE_RANLIB ${GCC_RANLIB}) + endif() + endif() +endif() + +# Coverage build type +set(CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_DEBUG}" CACHE STRING + "Flags used by the C++ compiler during coverage builds." + FORCE) +set(CMAKE_EXE_LINKER_FLAGS_COVERAGE + "${CMAKE_EXE_LINKER_FLAGS_DEBUG}" CACHE STRING + "Flags used for linking binaries during coverage builds." + FORCE) +set(CMAKE_SHARED_LINKER_FLAGS_COVERAGE + "${CMAKE_SHARED_LINKER_FLAGS_DEBUG}" CACHE STRING + "Flags used by the shared libraries linker during coverage builds." + FORCE) +mark_as_advanced( + CMAKE_CXX_FLAGS_COVERAGE + CMAKE_EXE_LINKER_FLAGS_COVERAGE + CMAKE_SHARED_LINKER_FLAGS_COVERAGE) +set(CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}" CACHE STRING + "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel Coverage." + FORCE) +add_cxx_compiler_flag(--coverage COVERAGE) + +# C++ feature checks +cxx_feature_check(STD_REGEX) +cxx_feature_check(GNU_POSIX_REGEX) +cxx_feature_check(POSIX_REGEX) +cxx_feature_check(STEADY_CLOCK) + +# Ensure we have pthreads +find_package(Threads REQUIRED) + +# Set up directories +include_directories(${PROJECT_SOURCE_DIR}/include) + +# Build the targets +add_subdirectory(src) + +if (BENCHMARK_ENABLE_TESTING) + enable_testing() + add_subdirectory(test) +endif() diff --git a/benchmark/CONTRIBUTING.md b/benchmark/CONTRIBUTING.md new file mode 100644 index 00000000..43de4c9d --- /dev/null +++ b/benchmark/CONTRIBUTING.md @@ -0,0 +1,58 @@ +# How to contribute # + +We'd love to accept your patches and contributions to this project. There are +a just a few small guidelines you need to follow. + + +## Contributor License Agreement ## + +Contributions to any Google project must be accompanied by a Contributor +License Agreement. This is not a copyright **assignment**, it simply gives +Google permission to use and redistribute your contributions as part of the +project. + + * If you are an individual writing original source code and you're sure you + own the intellectual property, then you'll need to sign an [individual + CLA][]. + + * If you work for a company that wants to allow you to contribute your work, + then you'll need to sign a [corporate CLA][]. + +You generally only need to submit a CLA once, so if you've already submitted +one (even if it was for a different project), you probably don't need to do it +again. + +[individual CLA]: https://developers.google.com/open-source/cla/individual +[corporate CLA]: https://developers.google.com/open-source/cla/corporate + +Once your CLA is submitted (or if you already submitted one for +another Google project), make a commit adding yourself to the +[AUTHORS][] and [CONTRIBUTORS][] files. This commit can be part +of your first [pull request][]. + +[AUTHORS]: AUTHORS +[CONTRIBUTORS]: CONTRIBUTORS + + +## Submitting a patch ## + + 1. It's generally best to start by opening a new issue describing the bug or + feature you're intending to fix. Even if you think it's relatively minor, + it's helpful to know what people are working on. Mention in the initial + issue that you are planning to work on that bug or feature so that it can + be assigned to you. + + 1. Follow the normal process of [forking][] the project, and setup a new + branch to work in. It's important that each group of changes be done in + separate branches in order to ensure that a pull request only includes the + commits related to that bug or feature. + + 1. Do your best to have [well-formed commit messages][] for each change. + This provides consistency throughout the project, and ensures that commit + messages are able to be formatted properly by various git tools. + + 1. Finally, push the commits to your fork and submit a [pull request][]. + +[forking]: https://help.github.com/articles/fork-a-repo +[well-formed commit messages]: http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html +[pull request]: https://help.github.com/articles/creating-a-pull-request diff --git a/benchmark/CONTRIBUTORS b/benchmark/CONTRIBUTORS new file mode 100644 index 00000000..42d713bc --- /dev/null +++ b/benchmark/CONTRIBUTORS @@ -0,0 +1,42 @@ +# People who have agreed to one of the CLAs and can contribute patches. +# The AUTHORS file lists the copyright holders; this file +# lists people. For example, Google employees are listed here +# but not in AUTHORS, because Google holds the copyright. +# +# Names should be added to this file only after verifying that +# the individual or the individual's organization has agreed to +# the appropriate Contributor License Agreement, found here: +# +# https://developers.google.com/open-source/cla/individual +# https://developers.google.com/open-source/cla/corporate +# +# The agreement for individuals can be filled out on the web. +# +# When adding J Random Contributor's name to this file, +# either J's name or J's organization's name should be +# added to the AUTHORS file, depending on whether the +# individual or corporate CLA was used. +# +# Names should be added to this file as: +# Name +# +# Please keep the list sorted. + +Arne Beer +Chris Kennelly +Christopher Seymour +David Coeurjolly +Dominic Hamon +Eugene Zhuk +Evgeny Safronov +Felix Homann +JianXiong Zhou +Kaito Udagawa +Lei Xu +Matt Clarkson +Oleksandr Sochka +Pascal Leroy +Paul Redmond +Pierre Phaneuf +Shuo Chen +Yusuke Suzuki diff --git a/benchmark/LICENSE b/benchmark/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/benchmark/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 00000000..b3427aba --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,282 @@ +benchmark +========= +[![Build Status](https://travis-ci.org/google/benchmark.svg?branch=master)](https://travis-ci.org/google/benchmark) +[![Build status](https://ci.appveyor.com/api/projects/status/u0qsyp7t1tk7cpxs/branch/master?svg=true)](https://ci.appveyor.com/project/google/benchmark/branch/master) +[![Coverage Status](https://coveralls.io/repos/google/benchmark/badge.svg)](https://coveralls.io/r/google/benchmark) + +A library to support the benchmarking of functions, similar to unit-tests. + +Discussion group: https://groups.google.com/d/forum/benchmark-discuss + +Example usage +------------- +Define a function that executes the code to be measured a +specified number of times: + +```c++ +static void BM_StringCreation(benchmark::State& state) { + while (state.KeepRunning()) + std::string empty_string; +} +// Register the function as a benchmark +BENCHMARK(BM_StringCreation); + +// Define another benchmark +static void BM_StringCopy(benchmark::State& state) { + std::string x = "hello"; + while (state.KeepRunning()) + std::string copy(x); +} +BENCHMARK(BM_StringCopy); + +BENCHMARK_MAIN(); +``` + +Sometimes a family of microbenchmarks can be implemented with +just one routine that takes an extra argument to specify which +one of the family of benchmarks to run. For example, the following +code defines a family of microbenchmarks for measuring the speed +of `memcpy()` calls of different lengths: + +```c++ +static void BM_memcpy(benchmark::State& state) { + char* src = new char[state.range_x()]; char* dst = new char[state.range_x()]; + memset(src, 'x', state.range_x()); + while (state.KeepRunning()) + memcpy(dst, src, state.range_x()); + state.SetBytesProcessed(int64_t(state.iterations) * int64_t(state.range_x())); + delete[] src; + delete[] dst; +} +BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10); +``` + +The preceding code is quite repetitive, and can be replaced with the +following short-hand. The following invocation will pick a few +appropriate arguments in the specified range and will generate a +microbenchmark for each such argument. + +```c++ +BENCHMARK(BM_memcpy)->Range(8, 8<<10); +``` + +You might have a microbenchmark that depends on two inputs. For +example, the following code defines a family of microbenchmarks for +measuring the speed of set insertion. + +```c++ +static void BM_SetInsert(benchmark::State& state) { + while (state.KeepRunning()) { + state.PauseTiming(); + std::set data = ConstructRandomSet(state.range_x()); + state.ResumeTiming(); + for (int j = 0; j < state.range_y(); ++j) + data.insert(RandomNumber()); + } +} +BENCHMARK(BM_SetInsert) + ->ArgPair(1<<10, 1) + ->ArgPair(1<<10, 8) + ->ArgPair(1<<10, 64) + ->ArgPair(1<<10, 512) + ->ArgPair(8<<10, 1) + ->ArgPair(8<<10, 8) + ->ArgPair(8<<10, 64) + ->ArgPair(8<<10, 512); +``` + +The preceding code is quite repetitive, and can be replaced with +the following short-hand. The following macro will pick a few +appropriate arguments in the product of the two specified ranges +and will generate a microbenchmark for each such pair. + +```c++ +BENCHMARK(BM_SetInsert)->RangePair(1<<10, 8<<10, 1, 512); +``` + +For more complex patterns of inputs, passing a custom function +to Apply allows programmatic specification of an +arbitrary set of arguments to run the microbenchmark on. +The following example enumerates a dense range on one parameter, +and a sparse range on the second. + +```c++ +static benchmark::internal::Benchmark* CustomArguments( + benchmark::internal::Benchmark* b) { + for (int i = 0; i <= 10; ++i) + for (int j = 32; j <= 1024*1024; j *= 8) + b = b->ArgPair(i, j); + return b; +} +BENCHMARK(BM_SetInsert)->Apply(CustomArguments); +``` + +Templated microbenchmarks work the same way: +Produce then consume 'size' messages 'iters' times +Measures throughput in the absence of multiprogramming. + +```c++ +template int BM_Sequential(benchmark::State& state) { + Q q; + typename Q::value_type v; + while (state.KeepRunning()) { + for (int i = state.range_x(); i--; ) + q.push(v); + for (int e = state.range_x(); e--; ) + q.Wait(&v); + } + // actually messages, not bytes: + state.SetBytesProcessed( + static_cast(state.iterations())*state.range_x()); +} +BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); +``` + +Three macros are provided for adding benchmark templates. + +```c++ +#if __cplusplus >= 201103L // C++11 and greater. +#define BENCHMARK_TEMPLATE(func, ...) // Takes any number of parameters. +#else // C++ < C++11 +#define BENCHMARK_TEMPLATE(func, arg1) +#endif +#define BENCHMARK_TEMPLATE1(func, arg1) +#define BENCHMARK_TEMPLATE2(func, arg1, arg2) +``` + +In a multithreaded test, it is guaranteed that none of the threads will start +until all have called KeepRunning, and all will have finished before KeepRunning +returns false. As such, any global setup or teardown you want to do can be +wrapped in a check against the thread index: + +```c++ +static void BM_MultiThreaded(benchmark::State& state) { + if (state.thread_index == 0) { + // Setup code here. + } + while (state.KeepRunning()) { + // Run the test as normal. + } + if (state.thread_index == 0) { + // Teardown code here. + } +} +BENCHMARK(BM_MultiThreaded)->Threads(2); + +To prevent a value or expression from being optimized away by the compiler +the `benchmark::DoNotOptimize(...)` function can be used. + +```c++ +static void BM_test(benchmark::State& state) { + while (state.KeepRunning()) { + int x = 0; + for (int i=0; i < 64; ++i) { + benchmark::DoNotOptimize(x += i); + } + } +} +``` + +Benchmark Fixtures +------------------ +Fixture tests are created by +first defining a type that derives from ::benchmark::Fixture and then +creating/registering the tests using the following macros: + +* `BENCHMARK_F(ClassName, Method)` +* `BENCHMARK_DEFINE_F(ClassName, Method)` +* `BENCHMARK_REGISTER_F(ClassName, Method)` + +For Example: + +```c++ +class MyFixture : public benchmark::Fixture {}; + +BENCHMARK_F(MyFixture, FooTest)(benchmark::State& st) { + while (st.KeepRunning()) { + ... + } +} + +BENCHMARK_DEFINE_F(MyFixture, BarTest)(benchmark::State& st) { + while (st.KeepRunning()) { + ... + } +} +/* BarTest is NOT registered */ +BENCHMARK_REGISTER_F(MyFixture, BarTest)->Threads(2); +/* BarTest is now registered */ +``` + +Output Formats +-------------- +The library supports multiple output formats. Use the +`--benchmark_format=` flag to set the format type. `tabular` is +the default format. + +The Tabular format is intended to be a human readable format. By default +the format generates color output. Context is output on stderr and the +tabular data on stdout. Example tabular output looks like: +``` +Benchmark Time(ns) CPU(ns) Iterations +---------------------------------------------------------------------- +BM_SetInsert/1024/1 28928 29349 23853 133.097kB/s 33.2742k items/s +BM_SetInsert/1024/8 32065 32913 21375 949.487kB/s 237.372k items/s +BM_SetInsert/1024/10 33157 33648 21431 1.13369MB/s 290.225k items/s +``` + +The JSON format outputs human readable json split into two top level attributes. +The `context` attribute contains information about the run in general, including +information about the CPU and the date. +The `benchmarks` attribute contains a list of ever benchmark run. Example json +output looks like: +``` +{ + "context": { + "date": "2015/03/17-18:40:25", + "num_cpus": 40, + "mhz_per_cpu": 2801, + "cpu_scaling_enabled": false, + "build_type": "debug" + }, + "benchmarks": [ + { + "name": "BM_SetInsert/1024/1", + "iterations": 94877, + "real_time": 29275, + "cpu_time": 29836, + "bytes_per_second": 134066, + "items_per_second": 33516 + }, + { + "name": "BM_SetInsert/1024/8", + "iterations": 21609, + "real_time": 32317, + "cpu_time": 32429, + "bytes_per_second": 986770, + "items_per_second": 246693 + }, + { + "name": "BM_SetInsert/1024/10", + "iterations": 21393, + "real_time": 32724, + "cpu_time": 33355, + "bytes_per_second": 1199226, + "items_per_second": 299807 + } + ] +} +``` + +The CSV format outputs comma-separated values. The `context` is output on stderr +and the CSV itself on stdout. Example CSV output looks like: +``` +name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label +"BM_SetInsert/1024/1",65465,17890.7,8407.45,475768,118942, +"BM_SetInsert/1024/8",116606,18810.1,9766.64,3.27646e+06,819115, +"BM_SetInsert/1024/10",106365,17238.4,8421.53,4.74973e+06,1.18743e+06, +``` + +Linking against the library +--------------------------- +When using gcc, it is necessary to link against pthread to avoid runtime exceptions. This is due to how gcc implements std::thread. See [issue #67](https://github.com/google/benchmark/issues/67) for more details. diff --git a/benchmark/appveyor.yml b/benchmark/appveyor.yml new file mode 100644 index 00000000..5368a4ac --- /dev/null +++ b/benchmark/appveyor.yml @@ -0,0 +1,55 @@ +version: '{build}' + +configuration: + - Static Debug + - Static Release +# - Shared Debug +# - Shared Release + +platform: + - x86 + - x64 + +environment: + matrix: + - compiler: gcc-4.9.2-posix +# - compiler: gcc-4.8.4-posix +# - compiler: msvc-12-seh + +install: + # derive some extra information + - for /f "tokens=1-2" %%a in ("%configuration%") do (@set "linkage=%%a") + - for /f "tokens=1-2" %%a in ("%configuration%") do (@set "variant=%%b") + - if "%linkage%"=="Shared" (set shared=YES) else (set shared=NO) + - for /f "tokens=1-3 delims=-" %%a in ("%compiler%") do (@set "compiler_name=%%a") + - for /f "tokens=1-3 delims=-" %%a in ("%compiler%") do (@set "compiler_version=%%b") + - for /f "tokens=1-3 delims=-" %%a in ("%compiler%") do (@set "compiler_threading=%%c") + - if "%platform%"=="x64" (set arch=x86_64) + - if "%platform%"=="x86" (set arch=i686) + # download the specific version of MinGW + - if "%compiler_name%"=="gcc" (for /f %%a in ('python mingw.py --quiet --version "%compiler_version%" --arch "%arch%" --threading "%compiler_threading%" --location "C:\mingw-builds"') do @set "compiler_path=%%a") + +before_build: + # Set up mingw commands + - if "%compiler_name%"=="gcc" (set "generator=MinGW Makefiles") + - if "%compiler_name%"=="gcc" (set "build=mingw32-make -j4") + - if "%compiler_name%"=="gcc" (set "test=mingw32-make CTEST_OUTPUT_ON_FAILURE=1 test") + # msvc specific commands + # TODO :) + # add the compiler path if needed + - if not "%compiler_path%"=="" (set "PATH=%PATH%;%compiler_path%") + # git bash conflicts with MinGW makefiles + - if "%generator%"=="MinGW Makefiles" (set "PATH=%PATH:C:\Program Files (x86)\Git\bin=%") + +build_script: + - cmake -G "%generator%" "-DCMAKE_BUILD_TYPE=%variant%" "-DBUILD_SHARED_LIBS=%shared%" + - cmd /c "%build%" + +test_script: + - cmd /c "%test%" + +matrix: + fast_finish: true + +cache: + - C:\mingw-builds diff --git a/benchmark/cmake/AddCXXCompilerFlag.cmake b/benchmark/cmake/AddCXXCompilerFlag.cmake new file mode 100644 index 00000000..870f11ae --- /dev/null +++ b/benchmark/cmake/AddCXXCompilerFlag.cmake @@ -0,0 +1,37 @@ +# - Adds a compiler flag if it is supported by the compiler +# +# This function checks that the supplied compiler flag is supported and then +# adds it to the corresponding compiler flags +# +# add_cxx_compiler_flag( []) +# +# - Example +# +# include(AddCXXCompilerFlag) +# add_cxx_compiler_flag(-Wall) +# add_cxx_compiler_flag(-no-strict-aliasing RELEASE) +# Requires CMake 2.6+ + +if(__add_cxx_compiler_flag) + return() +endif() +set(__add_cxx_compiler_flag INCLUDED) + +include(CheckCXXCompilerFlag) + +function(add_cxx_compiler_flag FLAG) + string(TOUPPER "HAVE_CXX_FLAG_${FLAG}" SANITIZED_FLAG) + string(REPLACE "+" "X" SANITIZED_FLAG ${SANITIZED_FLAG}) + string(REGEX REPLACE "[^A-Za-z_0-9]" "_" SANITIZED_FLAG ${SANITIZED_FLAG}) + string(REGEX REPLACE "_+" "_" SANITIZED_FLAG ${SANITIZED_FLAG}) + set(CMAKE_REQUIRED_FLAGS "${FLAG}") + check_cxx_compiler_flag("" ${SANITIZED_FLAG}) + if(${SANITIZED_FLAG}) + set(VARIANT ${ARGV1}) + if(ARGV1) + string(TOUPPER "_${VARIANT}" VARIANT) + endif() + set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE) + endif() +endfunction() + diff --git a/benchmark/cmake/CXXFeatureCheck.cmake b/benchmark/cmake/CXXFeatureCheck.cmake new file mode 100644 index 00000000..23ee8ac6 --- /dev/null +++ b/benchmark/cmake/CXXFeatureCheck.cmake @@ -0,0 +1,39 @@ +# - Compile and run code to check for C++ features +# +# This functions compiles a source file under the `cmake` folder +# and adds the corresponding `HAVE_[FILENAME]` flag to the CMake +# environment +# +# cxx_feature_check( []) +# +# - Example +# +# include(CXXFeatureCheck) +# cxx_feature_check(STD_REGEX) +# Requires CMake 2.6+ + +if(__cxx_feature_check) + return() +endif() +set(__cxx_feature_check INCLUDED) + +function(cxx_feature_check FILE) + string(TOLOWER ${FILE} FILE) + string(TOUPPER ${FILE} VAR) + string(TOUPPER "HAVE_${VAR}" FEATURE) + message("-- Performing Test ${FEATURE}") + try_run(RUN_${FEATURE} COMPILE_${FEATURE} + ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp) + if(RUN_${FEATURE} EQUAL 0) + message("-- Performing Test ${FEATURE} -- success") + set(HAVE_${VAR} 1 PARENT_SCOPE) + add_definitions(-DHAVE_${VAR}) + else() + if(NOT COMPILE_${FEATURE}) + message("-- Performing Test ${FEATURE} -- failed to compile") + else() + message("-- Performing Test ${FEATURE} -- compiled but failed to run") + endif() + endif() +endfunction() + diff --git a/benchmark/cmake/GetGitVersion.cmake b/benchmark/cmake/GetGitVersion.cmake new file mode 100644 index 00000000..e017fa30 --- /dev/null +++ b/benchmark/cmake/GetGitVersion.cmake @@ -0,0 +1,45 @@ +# - Returns a version string from Git tags +# +# This function inspects the annotated git tags for the project and returns a string +# into a CMake variable +# +# get_git_version() +# +# - Example +# +# include(GetGitVersion) +# get_git_version(GIT_VERSION) +# +# Requires CMake 2.6+ + +if(__get_git_version) + return() +endif() +set(__get_git_version INCLUDED) + +function(get_git_version var) + execute_process(COMMAND git describe --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8 + RESULT_VARIABLE status + OUTPUT_VARIABLE GIT_VERSION + ERROR_QUIET) + if(${status}) + set(GIT_VERSION "v0.0.0") + else() + string(STRIP ${GIT_VERSION} GIT_VERSION) + string(REGEX REPLACE "-[0-9]+-g" "-" GIT_VERSION ${GIT_VERSION}) + endif() + + # Work out if the repository is dirty + execute_process(COMMAND git update-index -q --refresh + OUTPUT_QUIET + ERROR_QUIET) + execute_process(COMMAND git diff-index --name-only HEAD -- + OUTPUT_VARIABLE GIT_DIFF_INDEX + ERROR_QUIET) + string(COMPARE NOTEQUAL "${GIT_DIFF_INDEX}" "" GIT_DIRTY) + if (${GIT_DIRTY}) + set(GIT_VERSION "${GIT_VERSION}-dirty") + endif() + message("-- git Version: ${GIT_VERSION}") + set(${var} ${GIT_VERSION} PARENT_SCOPE) +endfunction() diff --git a/benchmark/cmake/gnu_posix_regex.cpp b/benchmark/cmake/gnu_posix_regex.cpp new file mode 100644 index 00000000..b5b91cda --- /dev/null +++ b/benchmark/cmake/gnu_posix_regex.cpp @@ -0,0 +1,12 @@ +#include +#include +int main() { + std::string str = "test0159"; + regex_t re; + int ec = regcomp(&re, "^[a-z]+[0-9]+$", REG_EXTENDED | REG_NOSUB); + if (ec != 0) { + return ec; + } + return regexec(&re, str.c_str(), 0, nullptr, 0) ? -1 : 0; +} + diff --git a/benchmark/cmake/posix_regex.cpp b/benchmark/cmake/posix_regex.cpp new file mode 100644 index 00000000..a31af804 --- /dev/null +++ b/benchmark/cmake/posix_regex.cpp @@ -0,0 +1,12 @@ +#include +#include +int main() { + std::string str = "test0159"; + regex_t re; + int ec = regcomp(&re, "^[a-z]+[0-9]+$", REG_EXTENDED | REG_NOSUB); + if (ec != 0) { + return ec; + } + return regexec(&re, str.c_str(), 0, nullptr, 0) ? -1 : 0; +} + diff --git a/benchmark/cmake/std_regex.cpp b/benchmark/cmake/std_regex.cpp new file mode 100644 index 00000000..696f2a26 --- /dev/null +++ b/benchmark/cmake/std_regex.cpp @@ -0,0 +1,10 @@ +#include +#include +int main() { + const std::string str = "test0159"; + std::regex re; + re = std::regex("^[a-z]+[0-9]+$", + std::regex_constants::extended | std::regex_constants::nosubs); + return std::regex_search(str, re) ? 0 : -1; +} + diff --git a/benchmark/cmake/steady_clock.cpp b/benchmark/cmake/steady_clock.cpp new file mode 100644 index 00000000..66d50d17 --- /dev/null +++ b/benchmark/cmake/steady_clock.cpp @@ -0,0 +1,7 @@ +#include + +int main() { + typedef std::chrono::steady_clock Clock; + Clock::time_point tp = Clock::now(); + ((void)tp); +} diff --git a/benchmark/cmake/thread_safety_attributes.cpp b/benchmark/cmake/thread_safety_attributes.cpp new file mode 100644 index 00000000..46161bab --- /dev/null +++ b/benchmark/cmake/thread_safety_attributes.cpp @@ -0,0 +1,4 @@ +#define HAVE_THREAD_SAFETY_ATTRIBUTES +#include "../src/mutex.h" + +int main() {} diff --git a/benchmark/include/benchmark/benchmark.h b/benchmark/include/benchmark/benchmark.h new file mode 100644 index 00000000..18aa9e63 --- /dev/null +++ b/benchmark/include/benchmark/benchmark.h @@ -0,0 +1,21 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef BENCHMARK_BENCHMARK_H_ +#define BENCHMARK_BENCHMARK_H_ + +#include "macros.h" +#include "benchmark_api.h" +#include "reporter.h" + +#endif // BENCHMARK_BENCHMARK_H_ diff --git a/benchmark/include/benchmark/benchmark_api.h b/benchmark/include/benchmark/benchmark_api.h new file mode 100644 index 00000000..ee97de28 --- /dev/null +++ b/benchmark/include/benchmark/benchmark_api.h @@ -0,0 +1,603 @@ +// Support for registering benchmarks for functions. + +/* Example usage: +// Define a function that executes the code to be measured a +// specified number of times: +static void BM_StringCreation(benchmark::State& state) { + while (state.KeepRunning()) + std::string empty_string; +} + +// Register the function as a benchmark +BENCHMARK(BM_StringCreation); + +// Define another benchmark +static void BM_StringCopy(benchmark::State& state) { + std::string x = "hello"; + while (state.KeepRunning()) + std::string copy(x); +} +BENCHMARK(BM_StringCopy); + +// Augment the main() program to invoke benchmarks if specified +// via the --benchmarks command line flag. E.g., +// my_unittest --benchmark_filter=all +// my_unittest --benchmark_filter=BM_StringCreation +// my_unittest --benchmark_filter=String +// my_unittest --benchmark_filter='Copy|Creation' +int main(int argc, char** argv) { + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + return 0; +} + +// Sometimes a family of microbenchmarks can be implemented with +// just one routine that takes an extra argument to specify which +// one of the family of benchmarks to run. For example, the following +// code defines a family of microbenchmarks for measuring the speed +// of memcpy() calls of different lengths: + +static void BM_memcpy(benchmark::State& state) { + char* src = new char[state.range_x()]; char* dst = new char[state.range_x()]; + memset(src, 'x', state.range_x()); + while (state.KeepRunning()) + memcpy(dst, src, state.range_x()); + state.SetBytesProcessed(int64_t_t(state.iterations) * int64(state.range_x())); + delete[] src; delete[] dst; +} +BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10); + +// The preceding code is quite repetitive, and can be replaced with the +// following short-hand. The following invocation will pick a few +// appropriate arguments in the specified range and will generate a +// microbenchmark for each such argument. +BENCHMARK(BM_memcpy)->Range(8, 8<<10); + +// You might have a microbenchmark that depends on two inputs. For +// example, the following code defines a family of microbenchmarks for +// measuring the speed of set insertion. +static void BM_SetInsert(benchmark::State& state) { + while (state.KeepRunning()) { + state.PauseTiming(); + set data = ConstructRandomSet(state.range_x()); + state.ResumeTiming(); + for (int j = 0; j < state.range_y(); ++j) + data.insert(RandomNumber()); + } +} +BENCHMARK(BM_SetInsert) + ->ArgPair(1<<10, 1) + ->ArgPair(1<<10, 8) + ->ArgPair(1<<10, 64) + ->ArgPair(1<<10, 512) + ->ArgPair(8<<10, 1) + ->ArgPair(8<<10, 8) + ->ArgPair(8<<10, 64) + ->ArgPair(8<<10, 512); + +// The preceding code is quite repetitive, and can be replaced with +// the following short-hand. The following macro will pick a few +// appropriate arguments in the product of the two specified ranges +// and will generate a microbenchmark for each such pair. +BENCHMARK(BM_SetInsert)->RangePair(1<<10, 8<<10, 1, 512); + +// For more complex patterns of inputs, passing a custom function +// to Apply allows programmatic specification of an +// arbitrary set of arguments to run the microbenchmark on. +// The following example enumerates a dense range on +// one parameter, and a sparse range on the second. +static benchmark::internal::Benchmark* CustomArguments( + benchmark::internal::Benchmark* b) { + for (int i = 0; i <= 10; ++i) + for (int j = 32; j <= 1024*1024; j *= 8) + b = b->ArgPair(i, j); + return b; +} +BENCHMARK(BM_SetInsert)->Apply(CustomArguments); + +// Templated microbenchmarks work the same way: +// Produce then consume 'size' messages 'iters' times +// Measures throughput in the absence of multiprogramming. +template int BM_Sequential(benchmark::State& state) { + Q q; + typename Q::value_type v; + while (state.KeepRunning()) { + for (int i = state.range_x(); i--; ) + q.push(v); + for (int e = state.range_x(); e--; ) + q.Wait(&v); + } + // actually messages, not bytes: + state.SetBytesProcessed( + static_cast(state.iterations())*state.range_x()); +} +BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); + +Use `Benchmark::MinTime(double t)` to set the minimum time used to run the +benchmark. This option overrides the `benchmark_min_time` flag. + +void BM_test(benchmark::State& state) { + ... body ... +} +BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds. + +In a multithreaded test, it is guaranteed that none of the threads will start +until all have called KeepRunning, and all will have finished before KeepRunning +returns false. As such, any global setup or teardown you want to do can be +wrapped in a check against the thread index: + +static void BM_MultiThreaded(benchmark::State& state) { + if (state.thread_index == 0) { + // Setup code here. + } + while (state.KeepRunning()) { + // Run the test as normal. + } + if (state.thread_index == 0) { + // Teardown code here. + } +} +BENCHMARK(BM_MultiThreaded)->Threads(4); +*/ + +#ifndef BENCHMARK_BENCHMARK_API_H_ +#define BENCHMARK_BENCHMARK_API_H_ + +#include +#include +#include + +#include "macros.h" + +namespace benchmark { +class BenchmarkReporter; + +void Initialize(int* argc, const char** argv); + +// Otherwise, run all benchmarks specified by the --benchmark_filter flag, +// and exit after running the benchmarks. +void RunSpecifiedBenchmarks(); +void RunSpecifiedBenchmarks(BenchmarkReporter* reporter); + +// If this routine is called, peak memory allocation past this point in the +// benchmark is reported at the end of the benchmark report line. (It is +// computed by running the benchmark once with a single iteration and a memory +// tracer.) +// TODO(dominic) +// void MemoryUsage(); + +namespace internal { +class Benchmark; +class BenchmarkImp; +class BenchmarkFamilies; + +template struct Voider { + typedef void type; +}; + +template +struct EnableIfString {}; + +template +struct EnableIfString::type> { + typedef int type; +}; + +void UseCharPointer(char const volatile*); + +// Take ownership of the pointer and register the benchmark. Return the +// registered benchmark. +Benchmark* RegisterBenchmarkInternal(Benchmark*); + +} // end namespace internal + + +// The DoNotOptimize(...) function can be used to prevent a value or +// expression from being optimized away by the compiler. This function is +// intented to add little to no overhead. +// See: http://stackoverflow.com/questions/28287064 +#if defined(__clang__) && defined(__GNUC__) +// TODO(ericwf): Clang has a bug where it tries to always use a register +// even if value must be stored in memory. This causes codegen to fail. +// To work around this we remove the "r" modifier so the operand is always +// loaded into memory. +template +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { + asm volatile("" : "+m" (const_cast(value))); +} +#elif defined(__GNUC__) +template +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { + asm volatile("" : "+rm" (const_cast(value))); +} +#else +template +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { + internal::UseCharPointer(&reinterpret_cast(value)); +} +#endif + + +// State is passed to a running Benchmark and contains state for the +// benchmark to use. +class State { +public: + State(size_t max_iters, bool has_x, int x, bool has_y, int y, int thread_i); + + // Returns true iff the benchmark should continue through another iteration. + // NOTE: A benchmark may not return from the test until KeepRunning() has + // returned false. + bool KeepRunning() { + if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) { + ResumeTiming(); + started_ = true; + } + bool const res = total_iterations_++ < max_iterations; + if (BENCHMARK_BUILTIN_EXPECT(!res, false)) { + assert(started_); + PauseTiming(); + // Total iterations now is one greater than max iterations. Fix this. + total_iterations_ = max_iterations; + } + return res; + } + + // REQUIRES: timer is running + // Stop the benchmark timer. If not called, the timer will be + // automatically stopped after KeepRunning() returns false for the first time. + // + // For threaded benchmarks the PauseTiming() function acts + // like a barrier. I.e., the ith call by a particular thread to this + // function will block until all threads have made their ith call. + // The timer will stop when the last thread has called this function. + // + // NOTE: PauseTiming()/ResumeTiming() are relatively + // heavyweight, and so their use should generally be avoided + // within each benchmark iteration, if possible. + void PauseTiming(); + + // REQUIRES: timer is not running + // Start the benchmark timer. The timer is NOT running on entrance to the + // benchmark function. It begins running after the first call to KeepRunning() + // + // For threaded benchmarks the ResumeTiming() function acts + // like a barrier. I.e., the ith call by a particular thread to this + // function will block until all threads have made their ith call. + // The timer will start when the last thread has called this function. + // + // NOTE: PauseTiming()/ResumeTiming() are relatively + // heavyweight, and so their use should generally be avoided + // within each benchmark iteration, if possible. + void ResumeTiming(); + + // Set the number of bytes processed by the current benchmark + // execution. This routine is typically called once at the end of a + // throughput oriented benchmark. If this routine is called with a + // value > 0, the report is printed in MB/sec instead of nanoseconds + // per iteration. + // + // REQUIRES: a benchmark has exited its KeepRunning loop. + BENCHMARK_ALWAYS_INLINE + void SetBytesProcessed(size_t bytes) { + bytes_processed_ = bytes; + } + + BENCHMARK_ALWAYS_INLINE + size_t bytes_processed() const { + return bytes_processed_; + } + + // If this routine is called with items > 0, then an items/s + // label is printed on the benchmark report line for the currently + // executing benchmark. It is typically called at the end of a processing + // benchmark where a processing items/second output is desired. + // + // REQUIRES: a benchmark has exited its KeepRunning loop. + BENCHMARK_ALWAYS_INLINE + void SetItemsProcessed(size_t items) { + items_processed_ = items; + } + + BENCHMARK_ALWAYS_INLINE + size_t items_processed() const { + return items_processed_; + } + + // If this routine is called, the specified label is printed at the + // end of the benchmark report line for the currently executing + // benchmark. Example: + // static void BM_Compress(int iters) { + // ... + // double compress = input_size / output_size; + // benchmark::SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression)); + // } + // Produces output that looks like: + // BM_Compress 50 50 14115038 compress:27.3% + // + // REQUIRES: a benchmark has exited its KeepRunning loop. + void SetLabel(const char* label); + + // Allow the use of std::string without actually including . + // This function does not participate in overload resolution unless StringType + // has the nested typename `basic_string`. This typename should be provided + // as an injected class name in the case of std::string. + template + void SetLabel(StringType const & str, + typename internal::EnableIfString::type = 1) { + this->SetLabel(str.c_str()); + } + + // Range arguments for this run. CHECKs if the argument has been set. + BENCHMARK_ALWAYS_INLINE + int range_x() const { + assert(has_range_x_); + ((void)has_range_x_); // Prevent unused warning. + return range_x_; + } + + BENCHMARK_ALWAYS_INLINE + int range_y() const { + assert(has_range_y_); + ((void)has_range_y_); // Prevent unused warning. + return range_y_; + } + + BENCHMARK_ALWAYS_INLINE + size_t iterations() const { return total_iterations_; } + +private: + bool started_; + size_t total_iterations_; + + bool has_range_x_; + int range_x_; + + bool has_range_y_; + int range_y_; + + size_t bytes_processed_; + size_t items_processed_; + +public: + const int thread_index; + const size_t max_iterations; + +private: + BENCHMARK_DISALLOW_COPY_AND_ASSIGN(State); +}; + +namespace internal { + +typedef void(Function)(State&); + +// ------------------------------------------------------ +// Benchmark registration object. The BENCHMARK() macro expands +// into an internal::Benchmark* object. Various methods can +// be called on this object to change the properties of the benchmark. +// Each method returns "this" so that multiple method calls can +// chained into one expression. +class Benchmark { +public: + virtual ~Benchmark(); + + // Note: the following methods all return "this" so that multiple + // method calls can be chained together in one expression. + + // Run this benchmark once with "x" as the extra argument passed + // to the function. + // REQUIRES: The function passed to the constructor must accept an arg1. + Benchmark* Arg(int x); + + // Run this benchmark once for a number of values picked from the + // range [start..limit]. (start and limit are always picked.) + // REQUIRES: The function passed to the constructor must accept an arg1. + Benchmark* Range(int start, int limit); + + // Run this benchmark once for every value in the range [start..limit] + // REQUIRES: The function passed to the constructor must accept an arg1. + Benchmark* DenseRange(int start, int limit); + + // Run this benchmark once with "x,y" as the extra arguments passed + // to the function. + // REQUIRES: The function passed to the constructor must accept arg1,arg2. + Benchmark* ArgPair(int x, int y); + + // Pick a set of values A from the range [lo1..hi1] and a set + // of values B from the range [lo2..hi2]. Run the benchmark for + // every pair of values in the cartesian product of A and B + // (i.e., for all combinations of the values in A and B). + // REQUIRES: The function passed to the constructor must accept arg1,arg2. + Benchmark* RangePair(int lo1, int hi1, int lo2, int hi2); + + // Pass this benchmark object to *func, which can customize + // the benchmark by calling various methods like Arg, ArgPair, + // Threads, etc. + Benchmark* Apply(void (*func)(Benchmark* benchmark)); + + // Set the minimum amount of time to use when running this benchmark. This + // option overrides the `benchmark_min_time` flag. + Benchmark* MinTime(double t); + + // If a particular benchmark is I/O bound, or if for some reason CPU + // timings are not representative, call this method. If called, the elapsed + // time will be used to control how many iterations are run, and in the + // printing of items/second or MB/seconds values. If not called, the cpu + // time used by the benchmark will be used. + Benchmark* UseRealTime(); + + // Support for running multiple copies of the same benchmark concurrently + // in multiple threads. This may be useful when measuring the scaling + // of some piece of code. + + // Run one instance of this benchmark concurrently in t threads. + Benchmark* Threads(int t); + + // Pick a set of values T from [min_threads,max_threads]. + // min_threads and max_threads are always included in T. Run this + // benchmark once for each value in T. The benchmark run for a + // particular value t consists of t threads running the benchmark + // function concurrently. For example, consider: + // BENCHMARK(Foo)->ThreadRange(1,16); + // This will run the following benchmarks: + // Foo in 1 thread + // Foo in 2 threads + // Foo in 4 threads + // Foo in 8 threads + // Foo in 16 threads + Benchmark* ThreadRange(int min_threads, int max_threads); + + // Equivalent to ThreadRange(NumCPUs(), NumCPUs()) + Benchmark* ThreadPerCpu(); + + virtual void Run(State& state) = 0; + + // Used inside the benchmark implementation + struct Instance; + +protected: + explicit Benchmark(const char* name); + Benchmark(Benchmark const&); + void SetName(const char* name); + +private: + friend class BenchmarkFamilies; + BenchmarkImp* imp_; + + Benchmark& operator=(Benchmark const&); +}; + +// The class used to hold all Benchmarks created from static function. +// (ie those created using the BENCHMARK(...) macros. +class FunctionBenchmark : public Benchmark { +public: + FunctionBenchmark(const char* name, Function* func) + : Benchmark(name), func_(func) + {} + + virtual void Run(State& st); +private: + Function* func_; +}; + +} // end namespace internal + +// The base class for all fixture tests. +class Fixture: public internal::Benchmark { +public: + Fixture() : internal::Benchmark("") {} + + virtual void Run(State& st) { + this->SetUp(); + this->BenchmarkCase(st); + this->TearDown(); + } + + virtual void SetUp() {} + virtual void TearDown() {} + +protected: + virtual void BenchmarkCase(State&) = 0; +}; + +} // end namespace benchmark + + +// ------------------------------------------------------ +// Macro to register benchmarks + +// Check that __COUNTER__ is defined and that __COUNTER__ increases by 1 +// every time it is expanded. X + 1 == X + 0 is used in case X is defined to be +// empty. If X is empty the expression becomes (+1 == +0). +#if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0) +#define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__ +#else +#define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__ +#endif + +// Helpers for generating unique variable names +#define BENCHMARK_PRIVATE_NAME(n) \ + BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n) +#define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c) +#define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c + +#define BENCHMARK_PRIVATE_DECLARE(n) \ + static ::benchmark::internal::Benchmark* \ + BENCHMARK_PRIVATE_NAME(n) BENCHMARK_UNUSED + +#define BENCHMARK(n) \ + BENCHMARK_PRIVATE_DECLARE(n) = \ + (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark(#n, n))) + +// Old-style macros +#define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a)) +#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->ArgPair((a1), (a2)) +#define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi)) +#define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \ + BENCHMARK(n)->RangePair((l1), (h1), (l2), (h2)) + +// This will register a benchmark for a templatized function. For example: +// +// template +// void BM_Foo(int iters); +// +// BENCHMARK_TEMPLATE(BM_Foo, 1); +// +// will register BM_Foo<1> as a benchmark. +#define BENCHMARK_TEMPLATE1(n, a) \ + BENCHMARK_PRIVATE_DECLARE(n) = \ + (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n))) + +#define BENCHMARK_TEMPLATE2(n, a, b) \ + BENCHMARK_PRIVATE_DECLARE(n) = \ + (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark( \ + #n "<" #a "," #b ">", n))) + +#if __cplusplus >= 201103L +#define BENCHMARK_TEMPLATE(n, ...) \ + BENCHMARK_PRIVATE_DECLARE(n) = \ + (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark( \ + #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>))) +#else +#define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a) +#endif + + +#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ +class BaseClass##_##Method##_Benchmark : public BaseClass { \ +public: \ + BaseClass##_##Method##_Benchmark() : BaseClass() { \ + this->SetName(#BaseClass "/" #Method);} \ +protected: \ + virtual void BenchmarkCase(::benchmark::State&); \ +}; + +#define BENCHMARK_DEFINE_F(BaseClass, Method) \ + BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ + void BaseClass##_##Method##_Benchmark::BenchmarkCase + +#define BENCHMARK_REGISTER_F(BaseClass, Method) \ + BENCHMARK_PRIVATE_REGISTER_F(BaseClass##_##Method##_Benchmark) + +#define BENCHMARK_PRIVATE_REGISTER_F(TestName) \ + BENCHMARK_PRIVATE_DECLARE(TestName) = \ + (::benchmark::internal::RegisterBenchmarkInternal(new TestName())) + +// This macro will define and register a benchmark within a fixture class. +#define BENCHMARK_F(BaseClass, Method) \ + BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ + BENCHMARK_REGISTER_F(BaseClass, Method); \ + void BaseClass##_##Method##_Benchmark::BenchmarkCase + + +// Helper macro to create a main routine in a test that runs the benchmarks +#define BENCHMARK_MAIN() \ + int main(int argc, const char** argv) { \ + ::benchmark::Initialize(&argc, argv); \ + ::benchmark::RunSpecifiedBenchmarks(); \ + } + +#endif // BENCHMARK_BENCHMARK_API_H_ diff --git a/benchmark/include/benchmark/macros.h b/benchmark/include/benchmark/macros.h new file mode 100644 index 00000000..5e75ed32 --- /dev/null +++ b/benchmark/include/benchmark/macros.h @@ -0,0 +1,44 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef BENCHMARK_MACROS_H_ +#define BENCHMARK_MACROS_H_ + +#if __cplusplus < 201103L +# define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&); \ + TypeName& operator=(const TypeName&) +#else +# define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&) = delete; \ + TypeName& operator=(const TypeName&) = delete +#endif + +#if defined(__GNUC__) +# define BENCHMARK_UNUSED __attribute__((unused)) +# define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline)) +#elif defined(_MSC_VER) && !defined(__clang__) +# define BENCHMARK_UNUSED +# define BENCHMARK_ALWAYS_INLINE __forceinline +#else +# define BENCHMARK_UNUSED +# define BENCHMARK_ALWAYS_INLINE +#endif + +#if defined(__GNUC__) +# define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y) +#else +# define BENCHMARK_BUILTIN_EXPECT(x, y) x +#endif + +#endif // BENCHMARK_MACROS_H_ diff --git a/benchmark/include/benchmark/reporter.h b/benchmark/include/benchmark/reporter.h new file mode 100644 index 00000000..bb4ccdfe --- /dev/null +++ b/benchmark/include/benchmark/reporter.h @@ -0,0 +1,122 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef BENCHMARK_REPORTER_H_ +#define BENCHMARK_REPORTER_H_ + +#include +#include +#include + +#include "benchmark_api.h" // For forward declaration of BenchmarkReporter + +namespace benchmark { + +// Interface for custom benchmark result printers. +// By default, benchmark reports are printed to stdout. However an application +// can control the destination of the reports by calling +// RunSpecifiedBenchmarks and passing it a custom reporter object. +// The reporter object must implement the following interface. +class BenchmarkReporter { + public: + struct Context { + int num_cpus; + double mhz_per_cpu; + bool cpu_scaling_enabled; + + // The number of chars in the longest benchmark name. + size_t name_field_width; + }; + + struct Run { + Run() : + iterations(1), + real_accumulated_time(0), + cpu_accumulated_time(0), + bytes_per_second(0), + items_per_second(0), + max_heapbytes_used(0) {} + + std::string benchmark_name; + std::string report_label; // Empty if not set by benchmark. + size_t iterations; + double real_accumulated_time; + double cpu_accumulated_time; + + // Zero if not set by benchmark. + double bytes_per_second; + double items_per_second; + + // This is set to 0.0 if memory tracing is not enabled. + double max_heapbytes_used; + }; + + // Called once for every suite of benchmarks run. + // The parameter "context" contains information that the + // reporter may wish to use when generating its report, for example the + // platform under which the benchmarks are running. The benchmark run is + // never started if this function returns false, allowing the reporter + // to skip runs based on the context information. + virtual bool ReportContext(const Context& context) = 0; + + // Called once for each group of benchmark runs, gives information about + // cpu-time and heap memory usage during the benchmark run. + // Note that all the grouped benchmark runs should refer to the same + // benchmark, thus have the same name. + virtual void ReportRuns(const std::vector& report) = 0; + + // Called once and only once after ever group of benchmarks is run and + // reported. + virtual void Finalize(); + + virtual ~BenchmarkReporter(); +protected: + static void ComputeStats(std::vector const& reports, Run* mean, Run* stddev); +}; + +// Simple reporter that outputs benchmark data to the console. This is the +// default reporter used by RunSpecifiedBenchmarks(). +class ConsoleReporter : public BenchmarkReporter { + public: + virtual bool ReportContext(const Context& context); + virtual void ReportRuns(const std::vector& reports); +protected: + virtual void PrintRunData(const Run& report); + + size_t name_field_width_; +}; + +class JSONReporter : public BenchmarkReporter { +public: + JSONReporter() : first_report_(true) {} + virtual bool ReportContext(const Context& context); + virtual void ReportRuns(const std::vector& reports); + virtual void Finalize(); + +private: + void PrintRunData(const Run& report); + + bool first_report_; +}; + +class CSVReporter : public BenchmarkReporter { +public: + virtual bool ReportContext(const Context& context); + virtual void ReportRuns(const std::vector& reports); + +private: + void PrintRunData(const Run& report); +}; + +} // end namespace benchmark +#endif // BENCHMARK_REPORTER_H_ diff --git a/benchmark/mingw.py b/benchmark/mingw.py new file mode 100644 index 00000000..706ad559 --- /dev/null +++ b/benchmark/mingw.py @@ -0,0 +1,320 @@ +#! /usr/bin/env python +# encoding: utf-8 + +import argparse +import errno +import logging +import os +import platform +import re +import sys +import subprocess +import tempfile + +try: + import winreg +except ImportError: + import _winreg as winreg +try: + import urllib.request as request +except ImportError: + import urllib as request +try: + import urllib.parse as parse +except ImportError: + import urlparse as parse + +class EmptyLogger(object): + ''' + Provides an implementation that performs no logging + ''' + def debug(self, *k, **kw): + pass + def info(self, *k, **kw): + pass + def warn(self, *k, **kw): + pass + def error(self, *k, **kw): + pass + def critical(self, *k, **kw): + pass + def setLevel(self, *k, **kw): + pass + +urls = ( + 'http://downloads.sourceforge.net/project/mingw-w64/Toolchains%20' + 'targetting%20Win32/Personal%20Builds/mingw-builds/installer/' + 'repository.txt', + 'http://downloads.sourceforge.net/project/mingwbuilds/host-windows/' + 'repository.txt' +) +''' +A list of mingw-build repositories +''' + +def repository(urls = urls, log = EmptyLogger()): + ''' + Downloads and parse mingw-build repository files and parses them + ''' + log.info('getting mingw-builds repository') + versions = {} + re_sourceforge = re.compile(r'http://sourceforge.net/projects/([^/]+)/files') + re_sub = r'http://downloads.sourceforge.net/project/\1' + for url in urls: + log.debug(' - requesting: %s', url) + socket = request.urlopen(url) + repo = socket.read() + if not isinstance(repo, str): + repo = repo.decode(); + socket.close() + for entry in repo.split('\n')[:-1]: + value = entry.split('|') + version = tuple([int(n) for n in value[0].strip().split('.')]) + version = versions.setdefault(version, {}) + arch = value[1].strip() + if arch == 'x32': + arch = 'i686' + elif arch == 'x64': + arch = 'x86_64' + arch = version.setdefault(arch, {}) + threading = arch.setdefault(value[2].strip(), {}) + exceptions = threading.setdefault(value[3].strip(), {}) + revision = exceptions.setdefault(int(value[4].strip()[3:]), + re_sourceforge.sub(re_sub, value[5].strip())) + return versions + +def find_in_path(file, path=None): + ''' + Attempts to find an executable in the path + ''' + if platform.system() == 'Windows': + file += '.exe' + if path is None: + path = os.environ.get('PATH', '') + if type(path) is type(''): + path = path.split(os.pathsep) + return list(filter(os.path.exists, + map(lambda dir, file=file: os.path.join(dir, file), path))) + +def find_7zip(log = EmptyLogger()): + ''' + Attempts to find 7zip for unpacking the mingw-build archives + ''' + log.info('finding 7zip') + path = find_in_path('7z') + if not path: + key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\7-Zip') + path, _ = winreg.QueryValueEx(key, 'Path') + path = [os.path.join(path, '7z.exe')] + log.debug('found \'%s\'', path[0]) + return path[0] + +find_7zip() + +def unpack(archive, location, log = EmptyLogger()): + ''' + Unpacks a mingw-builds archive + ''' + sevenzip = find_7zip(log) + log.info('unpacking %s', os.path.basename(archive)) + cmd = [sevenzip, 'x', archive, '-o' + location, '-y'] + log.debug(' - %r', cmd) + with open(os.devnull, 'w') as devnull: + subprocess.check_call(cmd, stdout = devnull) + +def download(url, location, log = EmptyLogger()): + ''' + Downloads and unpacks a mingw-builds archive + ''' + log.info('downloading MinGW') + log.debug(' - url: %s', url) + log.debug(' - location: %s', location) + + re_content = re.compile(r'attachment;[ \t]*filename=(")?([^"]*)(")?[\r\n]*') + + stream = request.urlopen(url) + try: + content = stream.getheader('Content-Disposition') or '' + except AttributeError: + content = stream.headers.getheader('Content-Disposition') or '' + matches = re_content.match(content) + if matches: + filename = matches.group(2) + else: + parsed = parse.urlparse(stream.geturl()) + filename = os.path.basename(parsed.path) + + try: + os.makedirs(location) + except OSError as e: + if e.errno == errno.EEXIST and os.path.isdir(location): + pass + else: + raise + + archive = os.path.join(location, filename) + with open(archive, 'wb') as out: + while True: + buf = stream.read(1024) + if not buf: + break + out.write(buf) + unpack(archive, location, log = log) + os.remove(archive) + + possible = os.path.join(location, 'mingw64') + if not os.path.exists(possible): + possible = os.path.join(location, 'mingw32') + if not os.path.exists(possible): + raise ValueError('Failed to find unpacked MinGW: ' + possible) + return possible + +def root(location = None, arch = None, version = None, threading = None, + exceptions = None, revision = None, log = EmptyLogger()): + ''' + Returns the root folder of a specific version of the mingw-builds variant + of gcc. Will download the compiler if needed + ''' + + # Get the repository if we don't have all the information + if not (arch and version and threading and exceptions and revision): + versions = repository(log = log) + + # Determine some defaults + version = version or max(versions.keys()) + if not arch: + arch = platform.machine().lower() + if arch == 'x86': + arch = 'i686' + elif arch == 'amd64': + arch = 'x86_64' + if not threading: + keys = versions[version][arch].keys() + if 'posix' in keys: + threading = 'posix' + elif 'win32' in keys: + threading = 'win32' + else: + threading = keys[0] + if not exceptions: + keys = versions[version][arch][threading].keys() + if 'seh' in keys: + exceptions = 'seh' + elif 'sjlj' in keys: + exceptions = 'sjlj' + else: + exceptions = keys[0] + if revision == None: + revision = max(versions[version][arch][threading][exceptions].keys()) + if not location: + location = os.path.join(tempfile.gettempdir(), 'mingw-builds') + + # Get the download url + url = versions[version][arch][threading][exceptions][revision] + + # Tell the user whatzzup + log.info('finding MinGW %s', '.'.join(str(v) for v in version)) + log.debug(' - arch: %s', arch) + log.debug(' - threading: %s', threading) + log.debug(' - exceptions: %s', exceptions) + log.debug(' - revision: %s', revision) + log.debug(' - url: %s', url) + + # Store each specific revision differently + slug = '{version}-{arch}-{threading}-{exceptions}-rev{revision}' + slug = slug.format( + version = '.'.join(str(v) for v in version), + arch = arch, + threading = threading, + exceptions = exceptions, + revision = revision + ) + if arch == 'x86_64': + root_dir = os.path.join(location, slug, 'mingw64') + elif arch == 'i686': + root_dir = os.path.join(location, slug, 'mingw32') + else: + raise ValueError('Unknown MinGW arch: ' + arch) + + # Download if needed + if not os.path.exists(root_dir): + downloaded = download(url, os.path.join(location, slug), log = log) + if downloaded != root_dir: + raise ValueError('The location of mingw did not match\n%s\n%s' + % (downloaded, root_dir)) + + return root_dir + +def str2ver(string): + ''' + Converts a version string into a tuple + ''' + try: + version = tuple(int(v) for v in string.split('.')) + if len(version) is not 3: + raise ValueError() + except ValueError: + raise argparse.ArgumentTypeError( + 'please provide a three digit version string') + return version + +def main(): + ''' + Invoked when the script is run directly by the python interpreter + ''' + parser = argparse.ArgumentParser( + description = 'Downloads a specific version of MinGW', + formatter_class = argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument('--location', + help = 'the location to download the compiler to', + default = os.path.join(tempfile.gettempdir(), 'mingw-builds')) + parser.add_argument('--arch', required = True, choices = ['i686', 'x86_64'], + help = 'the target MinGW architecture string') + parser.add_argument('--version', type = str2ver, + help = 'the version of GCC to download') + parser.add_argument('--threading', choices = ['posix', 'win32'], + help = 'the threading type of the compiler') + parser.add_argument('--exceptions', choices = ['sjlj', 'seh', 'dwarf'], + help = 'the method to throw exceptions') + parser.add_argument('--revision', type=int, + help = 'the revision of the MinGW release') + group = parser.add_mutually_exclusive_group() + group.add_argument('-v', '--verbose', action='store_true', + help='increase the script output verbosity') + group.add_argument('-q', '--quiet', action='store_true', + help='only print errors and warning') + args = parser.parse_args() + + # Create the logger + logger = logging.getLogger('mingw') + handler = logging.StreamHandler() + formatter = logging.Formatter('%(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + if args.quiet: + logger.setLevel(logging.WARN) + if args.verbose: + logger.setLevel(logging.DEBUG) + + # Get MinGW + root_dir = root(location = args.location, arch = args.arch, + version = args.version, threading = args.threading, + exceptions = args.exceptions, revision = args.revision, + log = logger) + + sys.stdout.write('%s\n' % os.path.join(root_dir, 'bin')) + +if __name__ == '__main__': + try: + main() + except IOError as e: + sys.stderr.write('IO error: %s\n' % e) + sys.exit(1) + except OSError as e: + sys.stderr.write('OS error: %s\n' % e) + sys.exit(1) + except KeyboardInterrupt as e: + sys.stderr.write('Killed\n') + sys.exit(1) diff --git a/benchmark/src/CMakeLists.txt b/benchmark/src/CMakeLists.txt new file mode 100644 index 00000000..811d0755 --- /dev/null +++ b/benchmark/src/CMakeLists.txt @@ -0,0 +1,51 @@ +# Allow the source files to find headers in src/ +include_directories(${PROJECT_SOURCE_DIR}/src) + +# Define the source files +set(SOURCE_FILES "benchmark.cc" "colorprint.cc" "commandlineflags.cc" + "console_reporter.cc" "csv_reporter.cc" "json_reporter.cc" + "log.cc" "reporter.cc" "sleep.cc" "string_util.cc" + "sysinfo.cc" "walltime.cc") +# Determine the correct regular expression engine to use +if(HAVE_STD_REGEX) + set(RE_FILES "re_std.cc") +elseif(HAVE_GNU_POSIX_REGEX) + set(RE_FILES "re_posix.cc") +elseif(HAVE_POSIX_REGEX) + set(RE_FILES "re_posix.cc") +else() + message(FATAL_ERROR "Failed to determine the source files for the regular expression backend") +endif() + +add_library(benchmark ${SOURCE_FILES} ${RE_FILES}) + + +set_target_properties(benchmark PROPERTIES + OUTPUT_NAME "benchmark" + VERSION ${GENERIC_LIB_VERSION} + SOVERSION ${GENERIC_LIB_SOVERSION} +) + +# Link threads. +target_link_libraries(benchmark ${CMAKE_THREAD_LIBS_INIT}) + +# We need extra libraries on Windows +if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") + target_link_libraries(benchmark Shlwapi) +endif() + +# Expose public API +target_include_directories(benchmark PUBLIC ${PROJECT_SOURCE_DIR}/include) + +# Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable) +install( + TARGETS benchmark + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin + COMPONENT library) + +install( + DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark" + DESTINATION include + FILES_MATCHING PATTERN "*.*h") diff --git a/benchmark/src/arraysize.h b/benchmark/src/arraysize.h new file mode 100644 index 00000000..3a7c0c78 --- /dev/null +++ b/benchmark/src/arraysize.h @@ -0,0 +1,36 @@ +#ifndef BENCHMARK_ARRAYSIZE_H_ +#define BENCHMARK_ARRAYSIZE_H_ + +#include + +#include "internal_macros.h" + +namespace benchmark { +namespace internal { +// The arraysize(arr) macro returns the # of elements in an array arr. +// The expression is a compile-time constant, and therefore can be +// used in defining new arrays, for example. If you use arraysize on +// a pointer by mistake, you will get a compile-time error. +// + + +// This template function declaration is used in defining arraysize. +// Note that the function doesn't need an implementation, as we only +// use its type. +template +char (&ArraySizeHelper(T (&array)[N]))[N]; + +// That gcc wants both of these prototypes seems mysterious. VC, for +// its part, can't decide which to use (another mystery). Matching of +// template overloads: the final frontier. +#ifndef COMPILER_MSVC +template +char (&ArraySizeHelper(const T (&array)[N]))[N]; +#endif + +#define arraysize(array) (sizeof(::benchmark::internal::ArraySizeHelper(array))) + +} // end namespace internal +} // end namespace benchmark + +#endif // BENCHMARK_ARRAYSIZE_H_ diff --git a/benchmark/src/benchmark.cc b/benchmark/src/benchmark.cc new file mode 100644 index 00000000..56104aca --- /dev/null +++ b/benchmark/src/benchmark.cc @@ -0,0 +1,918 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark/benchmark.h" +#include "internal_macros.h" + +#include +#ifndef OS_WINDOWS +#include +#endif +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "check.h" +#include "commandlineflags.h" +#include "log.h" +#include "mutex.h" +#include "re.h" +#include "stat.h" +#include "string_util.h" +#include "sysinfo.h" +#include "walltime.h" + +DEFINE_bool(benchmark_list_tests, false, + "Print a list of benchmarks. This option overrides all other " + "options."); + +DEFINE_string(benchmark_filter, ".", + "A regular expression that specifies the set of benchmarks " + "to execute. If this flag is empty, no benchmarks are run. " + "If this flag is the string \"all\", all benchmarks linked " + "into the process are run."); + +DEFINE_double(benchmark_min_time, 0.5, + "Minimum number of seconds we should run benchmark before " + "results are considered significant. For cpu-time based " + "tests, this is the lower bound on the total cpu time " + "used by all threads that make up the test. For real-time " + "based tests, this is the lower bound on the elapsed time " + "of the benchmark execution, regardless of number of " + "threads."); + +DEFINE_int32(benchmark_repetitions, 1, + "The number of runs of each benchmark. If greater than 1, the " + "mean and standard deviation of the runs will be reported."); + +DEFINE_string(benchmark_format, "tabular", + "The format to use for console output. Valid values are " + "'tabular', 'json', or 'csv'."); + +DEFINE_bool(color_print, true, "Enables colorized logging."); + +DEFINE_int32(v, 0, "The level of verbose logging to output"); + + +namespace benchmark { + +namespace internal { + +void UseCharPointer(char const volatile*) {} + +// NOTE: This is a dummy "mutex" type used to denote the actual mutex +// returned by GetBenchmarkLock(). This is only used to placate the thread +// safety warnings by giving the return of GetBenchmarkLock() a name. +struct CAPABILITY("mutex") BenchmarkLockType {}; +BenchmarkLockType BenchmarkLockVar; + +} // end namespace internal + +inline Mutex& RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar) +GetBenchmarkLock() +{ + static Mutex lock; + return lock; +} + +namespace { + +bool IsZero(double n) { + return std::abs(n) < std::numeric_limits::epsilon(); +} + +// For non-dense Range, intermediate values are powers of kRangeMultiplier. +static const int kRangeMultiplier = 8; +static const int kMaxIterations = 1000000000; + +bool running_benchmark = false; + +// Global variable so that a benchmark can cause a little extra printing +std::string* GetReportLabel() { + static std::string label GUARDED_BY(GetBenchmarkLock()); + return &label; +} + +// TODO(ericwf): support MallocCounter. +//static benchmark::MallocCounter *benchmark_mc; + +struct ThreadStats { + ThreadStats() : bytes_processed(0), items_processed(0) {} + int64_t bytes_processed; + int64_t items_processed; +}; + +// Timer management class +class TimerManager { + public: + TimerManager(int num_threads, Notification* done) + : num_threads_(num_threads), + done_(done), + running_(false), + real_time_used_(0), + cpu_time_used_(0), + num_finalized_(0), + phase_number_(0), + entered_(0) { + } + + // Called by each thread + void StartTimer() EXCLUDES(lock_) { + bool last_thread = false; + { + MutexLock ml(lock_); + last_thread = Barrier(ml); + if (last_thread) { + CHECK(!running_) << "Called StartTimer when timer is already running"; + running_ = true; + start_real_time_ = walltime::Now(); + start_cpu_time_ = MyCPUUsage() + ChildrenCPUUsage(); + } + } + if (last_thread) { + phase_condition_.notify_all(); + } + } + + // Called by each thread + void StopTimer() EXCLUDES(lock_) { + bool last_thread = false; + { + MutexLock ml(lock_); + last_thread = Barrier(ml); + if (last_thread) { + CHECK(running_) << "Called StopTimer when timer is already stopped"; + InternalStop(); + } + } + if (last_thread) { + phase_condition_.notify_all(); + } + } + + // Called by each thread + void Finalize() EXCLUDES(lock_) { + MutexLock l(lock_); + num_finalized_++; + if (num_finalized_ == num_threads_) { + CHECK(!running_) << + "The timer should be stopped before the timer is finalized"; + done_->Notify(); + } + } + + // REQUIRES: timer is not running + double real_time_used() EXCLUDES(lock_) { + MutexLock l(lock_); + CHECK(!running_); + return real_time_used_; + } + + // REQUIRES: timer is not running + double cpu_time_used() EXCLUDES(lock_) { + MutexLock l(lock_); + CHECK(!running_); + return cpu_time_used_; + } + + private: + Mutex lock_; + Condition phase_condition_; + int num_threads_; + Notification* done_; + + bool running_; // Is the timer running + double start_real_time_; // If running_ + double start_cpu_time_; // If running_ + + // Accumulated time so far (does not contain current slice if running_) + double real_time_used_; + double cpu_time_used_; + + // How many threads have called Finalize() + int num_finalized_; + + // State for barrier management + int phase_number_; + int entered_; // Number of threads that have entered this barrier + + void InternalStop() REQUIRES(lock_) { + CHECK(running_); + running_ = false; + real_time_used_ += walltime::Now() - start_real_time_; + cpu_time_used_ += ((MyCPUUsage() + ChildrenCPUUsage()) + - start_cpu_time_); + } + + // Enter the barrier and wait until all other threads have also + // entered the barrier. Returns iff this is the last thread to + // enter the barrier. + bool Barrier(MutexLock& ml) REQUIRES(lock_) { + CHECK_LT(entered_, num_threads_); + entered_++; + if (entered_ < num_threads_) { + // Wait for all threads to enter + int phase_number_cp = phase_number_; + auto cb = [this, phase_number_cp]() { + return this->phase_number_ > phase_number_cp; + }; + phase_condition_.wait(ml.native_handle(), cb); + return false; // I was not the last one + } else { + // Last thread has reached the barrier + phase_number_++; + entered_ = 0; + return true; + } + } +}; + +// TimerManager for current run. +static std::unique_ptr timer_manager = nullptr; + +} // end namespace + +namespace internal { + +// Information kept per benchmark we may want to run +struct Benchmark::Instance { + std::string name; + Benchmark* benchmark; + bool has_arg1; + int arg1; + bool has_arg2; + int arg2; + bool use_real_time; + double min_time; + int threads; // Number of concurrent threads to use + bool multithreaded; // Is benchmark multi-threaded? +}; + +// Class for managing registered benchmarks. Note that each registered +// benchmark identifies a family of related benchmarks to run. +class BenchmarkFamilies { + public: + static BenchmarkFamilies* GetInstance(); + + // Registers a benchmark family and returns the index assigned to it. + size_t AddBenchmark(std::unique_ptr family); + + // Extract the list of benchmark instances that match the specified + // regular expression. + bool FindBenchmarks(const std::string& re, + std::vector* benchmarks); + private: + BenchmarkFamilies() {} + + std::vector> families_; + Mutex mutex_; +}; + + +class BenchmarkImp { +public: + explicit BenchmarkImp(const char* name); + ~BenchmarkImp(); + + void Arg(int x); + void Range(int start, int limit); + void DenseRange(int start, int limit); + void ArgPair(int start, int limit); + void RangePair(int lo1, int hi1, int lo2, int hi2); + void MinTime(double n); + void UseRealTime(); + void Threads(int t); + void ThreadRange(int min_threads, int max_threads); + void ThreadPerCpu(); + void SetName(const char* name); + + static void AddRange(std::vector* dst, int lo, int hi, int mult); + +private: + friend class BenchmarkFamilies; + + std::string name_; + int arg_count_; + std::vector< std::pair > args_; // Args for all benchmark runs + double min_time_; + bool use_real_time_; + std::vector thread_counts_; + + BenchmarkImp& operator=(BenchmarkImp const&); +}; + +BenchmarkFamilies* BenchmarkFamilies::GetInstance() { + static BenchmarkFamilies instance; + return &instance; +} + + +size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr family) { + MutexLock l(mutex_); + size_t index = families_.size(); + families_.push_back(std::move(family)); + return index; +} + +bool BenchmarkFamilies::FindBenchmarks( + const std::string& spec, + std::vector* benchmarks) { + // Make regular expression out of command-line flag + std::string error_msg; + Regex re; + if (!re.Init(spec, &error_msg)) { + std::cerr << "Could not compile benchmark re: " << error_msg << std::endl; + return false; + } + + // Special list of thread counts to use when none are specified + std::vector one_thread; + one_thread.push_back(1); + + MutexLock l(mutex_); + for (std::unique_ptr& bench_family : families_) { + // Family was deleted or benchmark doesn't match + if (!bench_family) continue; + BenchmarkImp* family = bench_family->imp_; + + if (family->arg_count_ == -1) { + family->arg_count_ = 0; + family->args_.emplace_back(-1, -1); + } + for (auto const& args : family->args_) { + const std::vector* thread_counts = + (family->thread_counts_.empty() + ? &one_thread + : &family->thread_counts_); + for (int num_threads : *thread_counts) { + + Benchmark::Instance instance; + instance.name = family->name_; + instance.benchmark = bench_family.get(); + instance.has_arg1 = family->arg_count_ >= 1; + instance.arg1 = args.first; + instance.has_arg2 = family->arg_count_ == 2; + instance.arg2 = args.second; + instance.min_time = family->min_time_; + instance.use_real_time = family->use_real_time_; + instance.threads = num_threads; + instance.multithreaded = !(family->thread_counts_.empty()); + + // Add arguments to instance name + if (family->arg_count_ >= 1) { + AppendHumanReadable(instance.arg1, &instance.name); + } + if (family->arg_count_ >= 2) { + AppendHumanReadable(instance.arg2, &instance.name); + } + if (!IsZero(family->min_time_)) { + instance.name += StringPrintF("/min_time:%0.3f", family->min_time_); + } + if (family->use_real_time_) { + instance.name += "/real_time"; + } + + // Add the number of threads used to the name + if (!family->thread_counts_.empty()) { + instance.name += StringPrintF("/threads:%d", instance.threads); + } + + if (re.Match(instance.name)) { + benchmarks->push_back(instance); + } + } + } + } + return true; +} + +BenchmarkImp::BenchmarkImp(const char* name) + : name_(name), arg_count_(-1), + min_time_(0.0), use_real_time_(false) { +} + +BenchmarkImp::~BenchmarkImp() { +} + +void BenchmarkImp::Arg(int x) { + CHECK(arg_count_ == -1 || arg_count_ == 1); + arg_count_ = 1; + args_.emplace_back(x, -1); +} + +void BenchmarkImp::Range(int start, int limit) { + CHECK(arg_count_ == -1 || arg_count_ == 1); + arg_count_ = 1; + std::vector arglist; + AddRange(&arglist, start, limit, kRangeMultiplier); + + for (int i : arglist) { + args_.emplace_back(i, -1); + } +} + +void BenchmarkImp::DenseRange(int start, int limit) { + CHECK(arg_count_ == -1 || arg_count_ == 1); + arg_count_ = 1; + CHECK_GE(start, 0); + CHECK_LE(start, limit); + for (int arg = start; arg <= limit; arg++) { + args_.emplace_back(arg, -1); + } +} + +void BenchmarkImp::ArgPair(int x, int y) { + CHECK(arg_count_ == -1 || arg_count_ == 2); + arg_count_ = 2; + args_.emplace_back(x, y); +} + +void BenchmarkImp::RangePair(int lo1, int hi1, int lo2, int hi2) { + CHECK(arg_count_ == -1 || arg_count_ == 2); + arg_count_ = 2; + std::vector arglist1, arglist2; + AddRange(&arglist1, lo1, hi1, kRangeMultiplier); + AddRange(&arglist2, lo2, hi2, kRangeMultiplier); + + for (int i : arglist1) { + for (int j : arglist2) { + args_.emplace_back(i, j); + } + } +} + +void BenchmarkImp::MinTime(double t) { + CHECK(t > 0.0); + min_time_ = t; +} + +void BenchmarkImp::UseRealTime() { + use_real_time_ = true; +} + +void BenchmarkImp::Threads(int t) { + CHECK_GT(t, 0); + thread_counts_.push_back(t); +} + +void BenchmarkImp::ThreadRange(int min_threads, int max_threads) { + CHECK_GT(min_threads, 0); + CHECK_GE(max_threads, min_threads); + + AddRange(&thread_counts_, min_threads, max_threads, 2); +} + +void BenchmarkImp::ThreadPerCpu() { + static int num_cpus = NumCPUs(); + thread_counts_.push_back(num_cpus); +} + +void BenchmarkImp::SetName(const char* name) { + name_ = name; +} + +void BenchmarkImp::AddRange(std::vector* dst, int lo, int hi, int mult) { + CHECK_GE(lo, 0); + CHECK_GE(hi, lo); + + // Add "lo" + dst->push_back(lo); + + static const int kint32max = std::numeric_limits::max(); + + // Now space out the benchmarks in multiples of "mult" + for (int32_t i = 1; i < kint32max/mult; i *= mult) { + if (i >= hi) break; + if (i > lo) { + dst->push_back(i); + } + } + // Add "hi" (if different from "lo") + if (hi != lo) { + dst->push_back(hi); + } +} + +Benchmark::Benchmark(const char* name) + : imp_(new BenchmarkImp(name)) +{ +} + +Benchmark::~Benchmark() { + delete imp_; +} + +Benchmark::Benchmark(Benchmark const& other) + : imp_(new BenchmarkImp(*other.imp_)) +{ +} + +Benchmark* Benchmark::Arg(int x) { + imp_->Arg(x); + return this; +} + +Benchmark* Benchmark::Range(int start, int limit) { + imp_->Range(start, limit); + return this; +} + +Benchmark* Benchmark::DenseRange(int start, int limit) { + imp_->DenseRange(start, limit); + return this; +} + +Benchmark* Benchmark::ArgPair(int x, int y) { + imp_->ArgPair(x, y); + return this; +} + +Benchmark* Benchmark::RangePair(int lo1, int hi1, int lo2, int hi2) { + imp_->RangePair(lo1, hi1, lo2, hi2); + return this; +} + +Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) { + custom_arguments(this); + return this; +} + +Benchmark* Benchmark::MinTime(double t) { + imp_->MinTime(t); + return this; +} + +Benchmark* Benchmark::UseRealTime() { + imp_->UseRealTime(); + return this; +} + +Benchmark* Benchmark::Threads(int t) { + imp_->Threads(t); + return this; +} + +Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) { + imp_->ThreadRange(min_threads, max_threads); + return this; +} + +Benchmark* Benchmark::ThreadPerCpu() { + imp_->ThreadPerCpu(); + return this; +} + +void Benchmark::SetName(const char* name) { + imp_->SetName(name); +} + +void FunctionBenchmark::Run(State& st) { + func_(st); +} + +} // end namespace internal + +namespace { + + +// Execute one thread of benchmark b for the specified number of iterations. +// Adds the stats collected for the thread into *total. +void RunInThread(const benchmark::internal::Benchmark::Instance* b, + int iters, int thread_id, + ThreadStats* total) EXCLUDES(GetBenchmarkLock()) { + State st(iters, b->has_arg1, b->arg1, b->has_arg2, b->arg2, thread_id); + b->benchmark->Run(st); + CHECK(st.iterations() == st.max_iterations) << + "Benchmark returned before State::KeepRunning() returned false!"; + { + MutexLock l(GetBenchmarkLock()); + total->bytes_processed += st.bytes_processed(); + total->items_processed += st.items_processed(); + } + + timer_manager->Finalize(); +} + +void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, + BenchmarkReporter* br) EXCLUDES(GetBenchmarkLock()) { + int iters = 1; + + std::vector reports; + + std::vector pool; + if (b.multithreaded) + pool.resize(b.threads); + + for (int i = 0; i < FLAGS_benchmark_repetitions; i++) { + std::string mem; + while (true) { + // Try benchmark + VLOG(2) << "Running " << b.name << " for " << iters << "\n"; + + { + MutexLock l(GetBenchmarkLock()); + GetReportLabel()->clear(); + } + + Notification done; + timer_manager = std::unique_ptr(new TimerManager(b.threads, &done)); + + ThreadStats total; + running_benchmark = true; + if (b.multithreaded) { + // If this is out first iteration of the while(true) loop then the + // threads haven't been started and can't be joined. Otherwise we need + // to join the thread before replacing them. + for (std::thread& thread : pool) { + if (thread.joinable()) + thread.join(); + } + for (std::size_t ti = 0; ti < pool.size(); ++ti) { + pool[ti] = std::thread(&RunInThread, &b, iters, ti, &total); + } + } else { + // Run directly in this thread + RunInThread(&b, iters, 0, &total); + } + done.WaitForNotification(); + running_benchmark = false; + + const double cpu_accumulated_time = timer_manager->cpu_time_used(); + const double real_accumulated_time = timer_manager->real_time_used(); + timer_manager.reset(); + + VLOG(2) << "Ran in " << cpu_accumulated_time << "/" + << real_accumulated_time << "\n"; + + // Base decisions off of real time if requested by this benchmark. + double seconds = cpu_accumulated_time; + if (b.use_real_time) { + seconds = real_accumulated_time; + } + + std::string label; + { + MutexLock l(GetBenchmarkLock()); + label = *GetReportLabel(); + } + + const double min_time = !IsZero(b.min_time) ? b.min_time + : FLAGS_benchmark_min_time; + + // If this was the first run, was elapsed time or cpu time large enough? + // If this is not the first run, go with the current value of iter. + if ((i > 0) || + (iters >= kMaxIterations) || + (seconds >= min_time) || + (real_accumulated_time >= 5*min_time)) { + double bytes_per_second = 0; + if (total.bytes_processed > 0 && seconds > 0.0) { + bytes_per_second = (total.bytes_processed / seconds); + } + double items_per_second = 0; + if (total.items_processed > 0 && seconds > 0.0) { + items_per_second = (total.items_processed / seconds); + } + + // Create report about this benchmark run. + BenchmarkReporter::Run report; + report.benchmark_name = b.name; + report.report_label = label; + // Report the total iterations across all threads. + report.iterations = static_cast(iters) * b.threads; + report.real_accumulated_time = real_accumulated_time; + report.cpu_accumulated_time = cpu_accumulated_time; + report.bytes_per_second = bytes_per_second; + report.items_per_second = items_per_second; + reports.push_back(report); + break; + } + + // See how much iterations should be increased by + // Note: Avoid division by zero with max(seconds, 1ns). + double multiplier = min_time * 1.4 / std::max(seconds, 1e-9); + // If our last run was at least 10% of FLAGS_benchmark_min_time then we + // use the multiplier directly. Otherwise we use at most 10 times + // expansion. + // NOTE: When the last run was at least 10% of the min time the max + // expansion should be 14x. + bool is_significant = (seconds / min_time) > 0.1; + multiplier = is_significant ? multiplier : std::min(10.0, multiplier); + if (multiplier <= 1.0) multiplier = 2.0; + double next_iters = std::max(multiplier * iters, iters + 1.0); + if (next_iters > kMaxIterations) { + next_iters = kMaxIterations; + } + VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; + iters = static_cast(next_iters + 0.5); + } + } + br->ReportRuns(reports); + if (b.multithreaded) { + for (std::thread& thread : pool) + thread.join(); + } +} + +} // namespace + +State::State(size_t max_iters, bool has_x, int x, bool has_y, int y, + int thread_i) + : started_(false), total_iterations_(0), + has_range_x_(has_x), range_x_(x), + has_range_y_(has_y), range_y_(y), + bytes_processed_(0), items_processed_(0), + thread_index(thread_i), + max_iterations(max_iters) +{ + CHECK(max_iterations != 0) << "At least one iteration must be run"; +} + +void State::PauseTiming() { + // Add in time accumulated so far + CHECK(running_benchmark); + timer_manager->StopTimer(); +} + +void State::ResumeTiming() { + CHECK(running_benchmark); + timer_manager->StartTimer(); +} + +void State::SetLabel(const char* label) { + CHECK(running_benchmark); + MutexLock l(GetBenchmarkLock()); + *GetReportLabel() = label; +} + +namespace internal { +namespace { + +void PrintBenchmarkList() { + std::vector benchmarks; + auto families = BenchmarkFamilies::GetInstance(); + if (!families->FindBenchmarks(".", &benchmarks)) return; + + for (const internal::Benchmark::Instance& benchmark : benchmarks) { + std::cout << benchmark.name << "\n"; + } +} + +void RunMatchingBenchmarks(const std::string& spec, + BenchmarkReporter* reporter) { + CHECK(reporter != nullptr); + if (spec.empty()) return; + + std::vector benchmarks; + auto families = BenchmarkFamilies::GetInstance(); + if (!families->FindBenchmarks(spec, &benchmarks)) return; + + // Determine the width of the name field using a minimum width of 10. + size_t name_field_width = 10; + for (const Benchmark::Instance& benchmark : benchmarks) { + name_field_width = + std::max(name_field_width, benchmark.name.size()); + } + if (FLAGS_benchmark_repetitions > 1) + name_field_width += std::strlen("_stddev"); + + // Print header here + BenchmarkReporter::Context context; + context.num_cpus = NumCPUs(); + context.mhz_per_cpu = CyclesPerSecond() / 1000000.0f; + + context.cpu_scaling_enabled = CpuScalingEnabled(); + context.name_field_width = name_field_width; + + if (reporter->ReportContext(context)) { + for (const auto& benchmark : benchmarks) { + RunBenchmark(benchmark, reporter); + } + } +} + +std::unique_ptr GetDefaultReporter() { + typedef std::unique_ptr PtrType; + if (FLAGS_benchmark_format == "tabular") { + return PtrType(new ConsoleReporter); + } else if (FLAGS_benchmark_format == "json") { + return PtrType(new JSONReporter); + } else if (FLAGS_benchmark_format == "csv") { + return PtrType(new CSVReporter); + } else { + std::cerr << "Unexpected format: '" << FLAGS_benchmark_format << "'\n"; + std::exit(1); + } +} + +} // end namespace +} // end namespace internal + +void RunSpecifiedBenchmarks() { + RunSpecifiedBenchmarks(nullptr); +} + +void RunSpecifiedBenchmarks(BenchmarkReporter* reporter) { + if (FLAGS_benchmark_list_tests) { + internal::PrintBenchmarkList(); + return; + } + std::string spec = FLAGS_benchmark_filter; + if (spec.empty() || spec == "all") + spec = "."; // Regexp that matches all benchmarks + + std::unique_ptr default_reporter; + if (!reporter) { + default_reporter = internal::GetDefaultReporter(); + reporter = default_reporter.get(); + } + internal::RunMatchingBenchmarks(spec, reporter); + reporter->Finalize(); +} + +namespace internal { + +void PrintUsageAndExit() { + fprintf(stdout, + "benchmark" + " [--benchmark_list_tests={true|false}]\n" + " [--benchmark_filter=]\n" + " [--benchmark_min_time=]\n" + " [--benchmark_repetitions=]\n" + " [--benchmark_format=]\n" + " [--color_print={true|false}]\n" + " [--v=]\n"); + exit(0); +} + +void ParseCommandLineFlags(int* argc, const char** argv) { + using namespace benchmark; + for (int i = 1; i < *argc; ++i) { + if ( + ParseBoolFlag(argv[i], "benchmark_list_tests", + &FLAGS_benchmark_list_tests) || + ParseStringFlag(argv[i], "benchmark_filter", + &FLAGS_benchmark_filter) || + ParseDoubleFlag(argv[i], "benchmark_min_time", + &FLAGS_benchmark_min_time) || + ParseInt32Flag(argv[i], "benchmark_repetitions", + &FLAGS_benchmark_repetitions) || + ParseStringFlag(argv[i], "benchmark_format", + &FLAGS_benchmark_format) || + ParseBoolFlag(argv[i], "color_print", + &FLAGS_color_print) || + ParseInt32Flag(argv[i], "v", &FLAGS_v)) { + for (int j = i; j != *argc; ++j) argv[j] = argv[j + 1]; + + --(*argc); + --i; + } else if (IsFlag(argv[i], "help")) { + PrintUsageAndExit(); + } + } + if (FLAGS_benchmark_format != "tabular" && + FLAGS_benchmark_format != "json" && + FLAGS_benchmark_format != "csv") { + PrintUsageAndExit(); + } +} + +Benchmark* RegisterBenchmarkInternal(Benchmark* bench) { + std::unique_ptr bench_ptr(bench); + BenchmarkFamilies* families = BenchmarkFamilies::GetInstance(); + families->AddBenchmark(std::move(bench_ptr)); + return bench; +} + +} // end namespace internal + +void Initialize(int* argc, const char** argv) { + internal::ParseCommandLineFlags(argc, argv); + internal::SetLogLevel(FLAGS_v); + // TODO remove this. It prints some output the first time it is called. + // We don't want to have this ouput printed during benchmarking. + MyCPUUsage(); + // The first call to walltime::Now initialized it. Call it once to + // prevent the initialization from happening in a benchmark. + walltime::Now(); +} + +} // end namespace benchmark diff --git a/benchmark/src/check.h b/benchmark/src/check.h new file mode 100644 index 00000000..2b04cd2c --- /dev/null +++ b/benchmark/src/check.h @@ -0,0 +1,57 @@ +#ifndef CHECK_H_ +#define CHECK_H_ + +#include +#include + +#include "internal_macros.h" +#include "log.h" + +namespace benchmark { +namespace internal { + +// CheckHandler is the class constructed by failing CHECK macros. CheckHandler +// will log information about the failures and abort when it is destructed. +class CheckHandler { +public: + CheckHandler(const char* check, const char* file, const char* func, int line) + : log_(GetErrorLogInstance()) + { + log_ << file << ":" << line << ": " << func << ": Check `" + << check << "' failed. "; + } + + std::ostream& GetLog() { + return log_; + } + + BENCHMARK_NORETURN ~CheckHandler() { + log_ << std::endl; + std::abort(); + } + +private: + std::ostream& log_; +}; + +} // end namespace internal +} // end namespace benchmark + +// The CHECK macro returns a std::ostream object that can have extra information +// written to it. +#ifndef NDEBUG +# define CHECK(b) (b ? ::benchmark::internal::GetNullLogInstance() \ + : ::benchmark::internal::CheckHandler( \ + #b, __FILE__, __func__, __LINE__).GetLog()) +#else +# define CHECK(b) ::benchmark::internal::GetNullLogInstance() +#endif + +#define CHECK_EQ(a, b) CHECK((a) == (b)) +#define CHECK_NE(a, b) CHECK((a) != (b)) +#define CHECK_GE(a, b) CHECK((a) >= (b)) +#define CHECK_LE(a, b) CHECK((a) <= (b)) +#define CHECK_GT(a, b) CHECK((a) > (b)) +#define CHECK_LT(a, b) CHECK((a) < (b)) + +#endif // CHECK_H_ diff --git a/benchmark/src/colorprint.cc b/benchmark/src/colorprint.cc new file mode 100644 index 00000000..923bc87f --- /dev/null +++ b/benchmark/src/colorprint.cc @@ -0,0 +1,115 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "colorprint.h" + +#include + +#include "commandlineflags.h" +#include "internal_macros.h" + +#ifdef OS_WINDOWS +#include +#endif + +DECLARE_bool(color_print); + +namespace benchmark { +namespace { +#ifdef OS_WINDOWS +typedef WORD PlatformColorCode; +#else +typedef const char* PlatformColorCode; +#endif + +PlatformColorCode GetPlatformColorCode(LogColor color) { +#ifdef OS_WINDOWS + switch (color) { + case COLOR_RED: + return FOREGROUND_RED; + case COLOR_GREEN: + return FOREGROUND_GREEN; + case COLOR_YELLOW: + return FOREGROUND_RED | FOREGROUND_GREEN; + case COLOR_BLUE: + return FOREGROUND_BLUE; + case COLOR_MAGENTA: + return FOREGROUND_BLUE | FOREGROUND_RED; + case COLOR_CYAN: + return FOREGROUND_BLUE | FOREGROUND_GREEN; + case COLOR_WHITE: // fall through to default + default: + return 0; + } +#else + switch (color) { + case COLOR_RED: + return "1"; + case COLOR_GREEN: + return "2"; + case COLOR_YELLOW: + return "3"; + case COLOR_BLUE: + return "4"; + case COLOR_MAGENTA: + return "5"; + case COLOR_CYAN: + return "6"; + case COLOR_WHITE: + return "7"; + default: + return nullptr; + }; +#endif +} +} // end namespace + +void ColorPrintf(LogColor color, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + + if (!FLAGS_color_print) { + vprintf(fmt, args); + va_end(args); + return; + } + +#ifdef OS_WINDOWS + const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE); + + // Gets the current text color. + CONSOLE_SCREEN_BUFFER_INFO buffer_info; + GetConsoleScreenBufferInfo(stdout_handle, &buffer_info); + const WORD old_color_attrs = buffer_info.wAttributes; + + // We need to flush the stream buffers into the console before each + // SetConsoleTextAttribute call lest it affect the text that is already + // printed but has not yet reached the console. + fflush(stdout); + SetConsoleTextAttribute(stdout_handle, + GetPlatformColorCode(color) | FOREGROUND_INTENSITY); + vprintf(fmt, args); + + fflush(stdout); + // Restores the text color. + SetConsoleTextAttribute(stdout_handle, old_color_attrs); +#else + const char* color_code = GetPlatformColorCode(color); + if (color_code) fprintf(stdout, "\033[0;3%sm", color_code); + vprintf(fmt, args); + printf("\033[m"); // Resets the terminal to default. +#endif + va_end(args); +} +} // end namespace benchmark diff --git a/benchmark/src/colorprint.h b/benchmark/src/colorprint.h new file mode 100644 index 00000000..54d1f664 --- /dev/null +++ b/benchmark/src/colorprint.h @@ -0,0 +1,19 @@ +#ifndef BENCHMARK_COLORPRINT_H_ +#define BENCHMARK_COLORPRINT_H_ + +namespace benchmark { +enum LogColor { + COLOR_DEFAULT, + COLOR_RED, + COLOR_GREEN, + COLOR_YELLOW, + COLOR_BLUE, + COLOR_MAGENTA, + COLOR_CYAN, + COLOR_WHITE +}; + +void ColorPrintf(LogColor color, const char* fmt, ...); +} // end namespace benchmark + +#endif // BENCHMARK_COLORPRINT_H_ diff --git a/benchmark/src/commandlineflags.cc b/benchmark/src/commandlineflags.cc new file mode 100644 index 00000000..6916d978 --- /dev/null +++ b/benchmark/src/commandlineflags.cc @@ -0,0 +1,219 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "commandlineflags.h" + +#include +#include +#include + +namespace benchmark { +// Parses 'str' for a 32-bit signed integer. If successful, writes +// the result to *value and returns true; otherwise leaves *value +// unchanged and returns false. +bool ParseInt32(const std::string& src_text, const char* str, int32_t* value) { + // Parses the environment variable as a decimal integer. + char* end = nullptr; + const long long_value = strtol(str, &end, 10); // NOLINT + + // Has strtol() consumed all characters in the string? + if (*end != '\0') { + // No - an invalid character was encountered. + std::cerr << src_text << " is expected to be a 32-bit integer, " + << "but actually has value \"" << str << "\".\n"; + return false; + } + + // Is the parsed value in the range of an Int32? + const int32_t result = static_cast(long_value); + if (long_value == std::numeric_limits::max() || + long_value == std::numeric_limits::min() || + // The parsed value overflows as a long. (strtol() returns + // LONG_MAX or LONG_MIN when the input overflows.) + result != long_value + // The parsed value overflows as an Int32. + ) { + std::cerr << src_text << " is expected to be a 32-bit integer, " + << "but actually has value \"" << str << "\", " + << "which overflows.\n"; + return false; + } + + *value = result; + return true; +} + +// Parses 'str' for a double. If successful, writes the result to *value and +// returns true; otherwise leaves *value unchanged and returns false. +bool ParseDouble(const std::string& src_text, const char* str, double* value) { + // Parses the environment variable as a decimal integer. + char* end = nullptr; + const double double_value = strtod(str, &end); // NOLINT + + // Has strtol() consumed all characters in the string? + if (*end != '\0') { + // No - an invalid character was encountered. + std::cerr << src_text << " is expected to be a double, " + << "but actually has value \"" << str << "\".\n"; + return false; + } + + *value = double_value; + return true; +} + +inline const char* GetEnv(const char* name) { +#if defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9) + // Environment variables which we programmatically clear will be set to the + // empty string rather than unset (nullptr). Handle that case. + const char* const env = getenv(name); + return (env != nullptr && env[0] != '\0') ? env : nullptr; +#else + return getenv(name); +#endif +} + +// Returns the name of the environment variable corresponding to the +// given flag. For example, FlagToEnvVar("foo") will return +// "BENCHMARK_FOO" in the open-source version. +static std::string FlagToEnvVar(const char* flag) { + const std::string flag_str(flag); + + std::string env_var; + for (size_t i = 0; i != flag_str.length(); ++i) + env_var += ::toupper(flag_str.c_str()[i]); + + return "BENCHMARK_" + env_var; +} + +// Reads and returns the Boolean environment variable corresponding to +// the given flag; if it's not set, returns default_value. +// +// The value is considered true iff it's not "0". +bool BoolFromEnv(const char* flag, bool default_value) { + const std::string env_var = FlagToEnvVar(flag); + const char* const string_value = GetEnv(env_var.c_str()); + return string_value == nullptr ? default_value : strcmp(string_value, "0") != 0; +} + +// Reads and returns a 32-bit integer stored in the environment +// variable corresponding to the given flag; if it isn't set or +// doesn't represent a valid 32-bit integer, returns default_value. +int32_t Int32FromEnv(const char* flag, int32_t default_value) { + const std::string env_var = FlagToEnvVar(flag); + const char* const string_value = GetEnv(env_var.c_str()); + if (string_value == nullptr) { + // The environment variable is not set. + return default_value; + } + + int32_t result = default_value; + if (!ParseInt32(std::string("Environment variable ") + env_var, string_value, + &result)) { + std::cout << "The default value " << default_value << " is used.\n"; + return default_value; + } + + return result; +} + +// Reads and returns the string environment variable corresponding to +// the given flag; if it's not set, returns default_value. +const char* StringFromEnv(const char* flag, const char* default_value) { + const std::string env_var = FlagToEnvVar(flag); + const char* const value = GetEnv(env_var.c_str()); + return value == nullptr ? default_value : value; +} + +// Parses a string as a command line flag. The string should have +// the format "--flag=value". When def_optional is true, the "=value" +// part can be omitted. +// +// Returns the value of the flag, or nullptr if the parsing failed. +const char* ParseFlagValue(const char* str, const char* flag, + bool def_optional) { + // str and flag must not be nullptr. + if (str == nullptr || flag == nullptr) return nullptr; + + // The flag must start with "--". + const std::string flag_str = std::string("--") + std::string(flag); + const size_t flag_len = flag_str.length(); + if (strncmp(str, flag_str.c_str(), flag_len) != 0) return nullptr; + + // Skips the flag name. + const char* flag_end = str + flag_len; + + // When def_optional is true, it's OK to not have a "=value" part. + if (def_optional && (flag_end[0] == '\0')) return flag_end; + + // If def_optional is true and there are more characters after the + // flag name, or if def_optional is false, there must be a '=' after + // the flag name. + if (flag_end[0] != '=') return nullptr; + + // Returns the string after "=". + return flag_end + 1; +} + +bool ParseBoolFlag(const char* str, const char* flag, bool* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, true); + + // Aborts if the parsing failed. + if (value_str == nullptr) return false; + + // Converts the string value to a bool. + *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F'); + return true; +} + +bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, false); + + // Aborts if the parsing failed. + if (value_str == nullptr) return false; + + // Sets *value to the value of the flag. + return ParseInt32(std::string("The value of flag --") + flag, value_str, + value); +} + +bool ParseDoubleFlag(const char* str, const char* flag, double* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, false); + + // Aborts if the parsing failed. + if (value_str == nullptr) return false; + + // Sets *value to the value of the flag. + return ParseDouble(std::string("The value of flag --") + flag, value_str, + value); +} + +bool ParseStringFlag(const char* str, const char* flag, std::string* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, false); + + // Aborts if the parsing failed. + if (value_str == nullptr) return false; + + *value = value_str; + return true; +} + +bool IsFlag(const char* str, const char* flag) { + return (ParseFlagValue(str, flag, true) != nullptr); +} +} // end namespace benchmark diff --git a/benchmark/src/commandlineflags.h b/benchmark/src/commandlineflags.h new file mode 100644 index 00000000..34b9c6f3 --- /dev/null +++ b/benchmark/src/commandlineflags.h @@ -0,0 +1,76 @@ +#ifndef BENCHMARK_COMMANDLINEFLAGS_H_ +#define BENCHMARK_COMMANDLINEFLAGS_H_ + +#include +#include + +// Macro for referencing flags. +#define FLAG(name) FLAGS_##name + +// Macros for declaring flags. +#define DECLARE_bool(name) extern bool FLAG(name) +#define DECLARE_int32(name) extern int32_t FLAG(name) +#define DECLARE_int64(name) extern int64_t FLAG(name) +#define DECLARE_double(name) extern double FLAG(name) +#define DECLARE_string(name) extern std::string FLAG(name) + +// Macros for defining flags. +#define DEFINE_bool(name, default_val, doc) bool FLAG(name) = (default_val) +#define DEFINE_int32(name, default_val, doc) int32_t FLAG(name) = (default_val) +#define DEFINE_int64(name, default_val, doc) int64_t FLAG(name) = (default_val) +#define DEFINE_double(name, default_val, doc) double FLAG(name) = (default_val) +#define DEFINE_string(name, default_val, doc) \ + std::string FLAG(name) = (default_val) + +namespace benchmark { +// Parses 'str' for a 32-bit signed integer. If successful, writes the result +// to *value and returns true; otherwise leaves *value unchanged and returns +// false. +bool ParseInt32(const std::string& src_text, const char* str, int32_t* value); + +// Parses a bool/Int32/string from the environment variable +// corresponding to the given Google Test flag. +bool BoolFromEnv(const char* flag, bool default_val); +int32_t Int32FromEnv(const char* flag, int32_t default_val); +double DoubleFromEnv(const char* flag, double default_val); +const char* StringFromEnv(const char* flag, const char* default_val); + +// Parses a string for a bool flag, in the form of either +// "--flag=value" or "--flag". +// +// In the former case, the value is taken as true as long as it does +// not start with '0', 'f', or 'F'. +// +// In the latter case, the value is taken as true. +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseBoolFlag(const char* str, const char* flag, bool* value); + +// Parses a string for an Int32 flag, in the form of +// "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseInt32Flag(const char* str, const char* flag, int32_t* value); + +// Parses a string for a Double flag, in the form of +// "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseDoubleFlag(const char* str, const char* flag, double* value); + +// Parses a string for a string flag, in the form of +// "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseStringFlag(const char* str, const char* flag, std::string* value); + +// Returns true if the string matches the flag. +bool IsFlag(const char* str, const char* flag); + +} // end namespace benchmark + +#endif // BENCHMARK_COMMANDLINEFLAGS_H_ diff --git a/benchmark/src/console_reporter.cc b/benchmark/src/console_reporter.cc new file mode 100644 index 00000000..2f52959d --- /dev/null +++ b/benchmark/src/console_reporter.cc @@ -0,0 +1,115 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark/reporter.h" + +#include +#include +#include +#include + +#include "check.h" +#include "colorprint.h" +#include "string_util.h" +#include "walltime.h" + +namespace benchmark { + +bool ConsoleReporter::ReportContext(const Context& context) { + name_field_width_ = context.name_field_width; + + std::cerr << "Run on (" << context.num_cpus << " X " << context.mhz_per_cpu + << " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n"; + + std::cerr << LocalDateTimeString() << "\n"; + + if (context.cpu_scaling_enabled) { + std::cerr << "***WARNING*** CPU scaling is enabled, the benchmark " + "real time measurements may be noisy and will incure extra " + "overhead.\n"; + } + +#ifndef NDEBUG + std::cerr << "***WARNING*** Library was built as DEBUG. Timings may be " + "affected.\n"; +#endif + + int output_width = fprintf(stdout, "%-*s %10s %10s %10s\n", + static_cast(name_field_width_), "Benchmark", + "Time(ns)", "CPU(ns)", "Iterations"); + std::cout << std::string(output_width - 1, '-') << "\n"; + + return true; +} + +void ConsoleReporter::ReportRuns(const std::vector& reports) { + if (reports.empty()) { + return; + } + + for (Run const& run : reports) { + CHECK_EQ(reports[0].benchmark_name, run.benchmark_name); + PrintRunData(run); + } + + if (reports.size() < 2) { + // We don't report aggregated data if there was a single run. + return; + } + + Run mean_data; + Run stddev_data; + BenchmarkReporter::ComputeStats(reports, &mean_data, &stddev_data); + + // Output using PrintRun. + PrintRunData(mean_data); + PrintRunData(stddev_data); +} + +void ConsoleReporter::PrintRunData(const Run& result) { + // Format bytes per second + std::string rate; + if (result.bytes_per_second > 0) { + rate = StrCat(" ", HumanReadableNumber(result.bytes_per_second), "B/s"); + } + + // Format items per second + std::string items; + if (result.items_per_second > 0) { + items = StrCat(" ", HumanReadableNumber(result.items_per_second), + " items/s"); + } + + double const multiplier = 1e9; // nano second multiplier + ColorPrintf(COLOR_GREEN, "%-*s ", + name_field_width_, result.benchmark_name.c_str()); + if (result.iterations == 0) { + ColorPrintf(COLOR_YELLOW, "%10.0f %10.0f ", + result.real_accumulated_time * multiplier, + result.cpu_accumulated_time * multiplier); + } else { + ColorPrintf(COLOR_YELLOW, "%10.0f %10.0f ", + (result.real_accumulated_time * multiplier) / + (static_cast(result.iterations)), + (result.cpu_accumulated_time * multiplier) / + (static_cast(result.iterations))); + } + ColorPrintf(COLOR_CYAN, "%10lld", result.iterations); + ColorPrintf(COLOR_DEFAULT, "%*s %*s %s\n", + 13, rate.c_str(), + 18, items.c_str(), + result.report_label.c_str()); +} + +} // end namespace benchmark diff --git a/benchmark/src/csv_reporter.cc b/benchmark/src/csv_reporter.cc new file mode 100644 index 00000000..a8369433 --- /dev/null +++ b/benchmark/src/csv_reporter.cc @@ -0,0 +1,105 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark/reporter.h" + +#include +#include +#include +#include + +#include "string_util.h" +#include "walltime.h" + +// File format reference: http://edoceo.com/utilitas/csv-file-format. + +namespace benchmark { + +bool CSVReporter::ReportContext(const Context& context) { + std::cerr << "Run on (" << context.num_cpus << " X " << context.mhz_per_cpu + << " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n"; + + std::cerr << LocalDateTimeString() << "\n"; + + if (context.cpu_scaling_enabled) { + std::cerr << "***WARNING*** CPU scaling is enabled, the benchmark " + "real time measurements may be noisy and will incure extra " + "overhead.\n"; + } + +#ifndef NDEBUG + std::cerr << "***WARNING*** Library was built as DEBUG. Timings may be " + "affected.\n"; +#endif + std::cout << "name,iterations,real_time,cpu_time,bytes_per_second," + "items_per_second,label\n"; + return true; +} + +void CSVReporter::ReportRuns(std::vector const& reports) { + if (reports.empty()) { + return; + } + + std::vector reports_cp = reports; + if (reports.size() >= 2) { + Run mean_data; + Run stddev_data; + BenchmarkReporter::ComputeStats(reports, &mean_data, &stddev_data); + reports_cp.push_back(mean_data); + reports_cp.push_back(stddev_data); + } + for (auto it = reports_cp.begin(); it != reports_cp.end(); ++it) { + PrintRunData(*it); + } +} + +void CSVReporter::PrintRunData(Run const& run) { + double const multiplier = 1e9; // nano second multiplier + double cpu_time = run.cpu_accumulated_time * multiplier; + double real_time = run.real_accumulated_time * multiplier; + if (run.iterations != 0) { + real_time = real_time / static_cast(run.iterations); + cpu_time = cpu_time / static_cast(run.iterations); + } + + // Field with embedded double-quote characters must be doubled and the field + // delimited with double-quotes. + std::string name = run.benchmark_name; + ReplaceAll(&name, "\"", "\"\""); + std::cout << "\"" << name << "\","; + + std::cout << run.iterations << ","; + std::cout << real_time << ","; + std::cout << cpu_time << ","; + + if (run.bytes_per_second > 0.0) { + std::cout << run.bytes_per_second; + } + std::cout << ","; + if (run.items_per_second > 0.0) { + std::cout << run.items_per_second; + } + std::cout << ","; + if (!run.report_label.empty()) { + // Field with embedded double-quote characters must be doubled and the field + // delimited with double-quotes. + std::string label = run.report_label; + ReplaceAll(&label, "\"", "\"\""); + std::cout << "\"" << label << "\""; + } + std::cout << '\n'; +} + +} // end namespace benchmark diff --git a/benchmark/src/cycleclock.h b/benchmark/src/cycleclock.h new file mode 100644 index 00000000..b87a8798 --- /dev/null +++ b/benchmark/src/cycleclock.h @@ -0,0 +1,134 @@ +// ---------------------------------------------------------------------- +// CycleClock +// A CycleClock tells you the current time in Cycles. The "time" +// is actually time since power-on. This is like time() but doesn't +// involve a system call and is much more precise. +// +// NOTE: Not all cpu/platform/kernel combinations guarantee that this +// clock increments at a constant rate or is synchronized across all logical +// cpus in a system. +// +// If you need the above guarantees, please consider using a different +// API. There are efforts to provide an interface which provides a millisecond +// granularity and implemented as a memory read. A memory read is generally +// cheaper than the CycleClock for many architectures. +// +// Also, in some out of order CPU implementations, the CycleClock is not +// serializing. So if you're trying to count at cycles granularity, your +// data might be inaccurate due to out of order instruction execution. +// ---------------------------------------------------------------------- + +#ifndef BENCHMARK_CYCLECLOCK_H_ +#define BENCHMARK_CYCLECLOCK_H_ + +#include + +#include "benchmark/macros.h" +#include "internal_macros.h" + +#if defined(OS_MACOSX) +#include +#endif +// For MSVC, we want to use '_asm rdtsc' when possible (since it works +// with even ancient MSVC compilers), and when not possible the +// __rdtsc intrinsic, declared in . Unfortunately, in some +// environments, and have conflicting +// declarations of some other intrinsics, breaking compilation. +// Therefore, we simply declare __rdtsc ourselves. See also +// http://connect.microsoft.com/VisualStudio/feedback/details/262047 +#if defined(COMPILER_MSVC) && !defined(_M_IX86) +extern "C" uint64_t __rdtsc(); +#pragma intrinsic(__rdtsc) +#endif +#include + +namespace benchmark { +// NOTE: only i386 and x86_64 have been well tested. +// PPC, sparc, alpha, and ia64 are based on +// http://peter.kuscsik.com/wordpress/?p=14 +// with modifications by m3b. See also +// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h +namespace cycleclock { +// This should return the number of cycles since power-on. Thread-safe. +inline BENCHMARK_ALWAYS_INLINE int64_t Now() { +#if defined(OS_MACOSX) + // this goes at the top because we need ALL Macs, regardless of + // architecture, to return the number of "mach time units" that + // have passed since startup. See sysinfo.cc where + // InitializeSystemInfo() sets the supposed cpu clock frequency of + // macs to the number of mach time units per second, not actual + // CPU clock frequency (which can change in the face of CPU + // frequency scaling). Also note that when the Mac sleeps, this + // counter pauses; it does not continue counting, nor does it + // reset to zero. + return mach_absolute_time(); +#elif defined(__i386__) + int64_t ret; + __asm__ volatile("rdtsc" : "=A"(ret)); + return ret; +#elif defined(__x86_64__) || defined(__amd64__) + uint64_t low, high; + __asm__ volatile("rdtsc" : "=a"(low), "=d"(high)); + return (high << 32) | low; +#elif defined(__powerpc__) || defined(__ppc__) + // This returns a time-base, which is not always precisely a cycle-count. + int64_t tbl, tbu0, tbu1; + asm("mftbu %0" : "=r"(tbu0)); + asm("mftb %0" : "=r"(tbl)); + asm("mftbu %0" : "=r"(tbu1)); + tbl &= -static_cast(tbu0 == tbu1); + // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage) + return (tbu1 << 32) | tbl; +#elif defined(__sparc__) + int64_t tick; + asm(".byte 0x83, 0x41, 0x00, 0x00"); + asm("mov %%g1, %0" : "=r"(tick)); + return tick; +#elif defined(__ia64__) + int64_t itc; + asm("mov %0 = ar.itc" : "=r"(itc)); + return itc; +#elif defined(COMPILER_MSVC) && defined(_M_IX86) + // Older MSVC compilers (like 7.x) don't seem to support the + // __rdtsc intrinsic properly, so I prefer to use _asm instead + // when I know it will work. Otherwise, I'll use __rdtsc and hope + // the code is being compiled with a non-ancient compiler. + _asm rdtsc +#elif defined(COMPILER_MSVC) + return __rdtsc(); +#elif defined(__ARM_ARCH) +#if (__ARM_ARCH >= 6) // V6 is the earliest arch that has a standard cyclecount + uint32_t pmccntr; + uint32_t pmuseren; + uint32_t pmcntenset; + // Read the user mode perf monitor counter access permissions. + asm("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren)); + if (pmuseren & 1) { // Allows reading perfmon counters for user mode code. + asm("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset)); + if (pmcntenset & 0x80000000ul) { // Is it counting? + asm("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr)); + // The counter is set up to count every 64th cycle + return static_cast(pmccntr) * 64; // Should optimize to << 6 + } + } +#endif + struct timeval tv; + gettimeofday(&tv, nullptr); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; +#elif defined(__mips__) + // mips apparently only allows rdtsc for superusers, so we fall + // back to gettimeofday. It's possible clock_gettime would be better. + struct timeval tv; + gettimeofday(&tv, nullptr); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; +#else +// The soft failover to a generic implementation is automatic only for ARM. +// For other platforms the developer is expected to make an attempt to create +// a fast implementation and use generic version if nothing better is available. +#error You need to define CycleTimer for your OS and CPU +#endif +} +} // end namespace cycleclock +} // end namespace benchmark + +#endif // BENCHMARK_CYCLECLOCK_H_ diff --git a/benchmark/src/internal_macros.h b/benchmark/src/internal_macros.h new file mode 100644 index 00000000..6667a2e1 --- /dev/null +++ b/benchmark/src/internal_macros.h @@ -0,0 +1,40 @@ +#ifndef BENCHMARK_INTERNAL_MACROS_H_ +#define BENCHMARK_INTERNAL_MACROS_H_ + +#include "benchmark/macros.h" + +#ifndef __has_feature +# define __has_feature(x) 0 +#endif + +#if __has_feature(cxx_attributes) +# define BENCHMARK_NORETURN [[noreturn]] +#elif defined(__GNUC__) +# define BENCHMARK_NORETURN __attribute__((noreturn)) +#else +# define BENCHMARK_NORETURN +#endif + +#if defined(__CYGWIN__) +# define OS_CYGWIN 1 +#elif defined(_WIN32) +# define OS_WINDOWS 1 +#elif defined(__APPLE__) +// TODO(ericwf) This doesn't actually check that it is a Mac OSX system. Just +// that it is an apple system. +# define OS_MACOSX 1 +#elif defined(__FreeBSD__) +# define OS_FREEBSD 1 +#elif defined(__linux__) +# define OS_LINUX 1 +#endif + +#if defined(__clang__) +# define COMPILER_CLANG +#elif defined(_MSC_VER) +# define COMPILER_MSVC +#elif defined(__GNUC__) +# define COMPILER_GCC +#endif + +#endif // BENCHMARK_INTERNAL_MACROS_H_ diff --git a/benchmark/src/json_reporter.cc b/benchmark/src/json_reporter.cc new file mode 100644 index 00000000..961e3d17 --- /dev/null +++ b/benchmark/src/json_reporter.cc @@ -0,0 +1,165 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark/reporter.h" + +#include +#include +#include +#include + +#include "string_util.h" +#include "walltime.h" + +namespace benchmark { + +namespace { + +std::string FormatKV(std::string const& key, std::string const& value) { + return StringPrintF("\"%s\": \"%s\"", key.c_str(), value.c_str()); +} + +std::string FormatKV(std::string const& key, const char* value) { + return StringPrintF("\"%s\": \"%s\"", key.c_str(), value); +} + +std::string FormatKV(std::string const& key, bool value) { + return StringPrintF("\"%s\": %s", key.c_str(), value ? "true" : "false"); +} + +std::string FormatKV(std::string const& key, int64_t value) { + std::stringstream ss; + ss << '"' << key << "\": " << value; + return ss.str(); +} + +std::string FormatKV(std::string const& key, std::size_t value) { + std::stringstream ss; + ss << '"' << key << "\": " << value; + return ss.str(); +} + +int64_t RoundDouble(double v) { + return static_cast(v + 0.5); +} + +} // end namespace + +bool JSONReporter::ReportContext(const Context& context) { + std::ostream& out = std::cout; + + out << "{\n"; + std::string inner_indent(2, ' '); + + // Open context block and print context information. + out << inner_indent << "\"context\": {\n"; + std::string indent(4, ' '); + + std::string walltime_value = LocalDateTimeString(); + out << indent << FormatKV("date", walltime_value) << ",\n"; + + out << indent + << FormatKV("num_cpus", static_cast(context.num_cpus)) + << ",\n"; + out << indent + << FormatKV("mhz_per_cpu", RoundDouble(context.mhz_per_cpu)) + << ",\n"; + out << indent + << FormatKV("cpu_scaling_enabled", context.cpu_scaling_enabled) + << ",\n"; + +#if defined(NDEBUG) + const char build_type[] = "release"; +#else + const char build_type[] = "debug"; +#endif + out << indent << FormatKV("library_build_type", build_type) << "\n"; + // Close context block and open the list of benchmarks. + out << inner_indent << "},\n"; + out << inner_indent << "\"benchmarks\": [\n"; + return true; +} + +void JSONReporter::ReportRuns(std::vector const& reports) { + if (reports.empty()) { + return; + } + std::string indent(4, ' '); + std::ostream& out = std::cout; + if (!first_report_) { + out << ",\n"; + } + first_report_ = false; + std::vector reports_cp = reports; + if (reports.size() >= 2) { + Run mean_data; + Run stddev_data; + BenchmarkReporter::ComputeStats(reports, &mean_data, &stddev_data); + reports_cp.push_back(mean_data); + reports_cp.push_back(stddev_data); + } + for (auto it = reports_cp.begin(); it != reports_cp.end(); ++it) { + out << indent << "{\n"; + PrintRunData(*it); + out << indent << '}'; + auto it_cp = it; + if (++it_cp != reports_cp.end()) { + out << ",\n"; + } + } +} + +void JSONReporter::Finalize() { + // Close the list of benchmarks and the top level object. + std::cout << "\n ]\n}\n"; +} + +void JSONReporter::PrintRunData(Run const& run) { + double const multiplier = 1e9; // nano second multiplier + double cpu_time = run.cpu_accumulated_time * multiplier; + double real_time = run.real_accumulated_time * multiplier; + if (run.iterations != 0) { + real_time = real_time / static_cast(run.iterations); + cpu_time = cpu_time / static_cast(run.iterations); + } + + std::string indent(6, ' '); + std::ostream& out = std::cout; + out << indent + << FormatKV("name", run.benchmark_name) + << ",\n"; + out << indent + << FormatKV("iterations", run.iterations) + << ",\n"; + out << indent + << FormatKV("real_time", RoundDouble(real_time)) + << ",\n"; + out << indent + << FormatKV("cpu_time", RoundDouble(cpu_time)); + if (run.bytes_per_second > 0.0) { + out << ",\n" << indent + << FormatKV("bytes_per_second", RoundDouble(run.bytes_per_second)); + } + if (run.items_per_second > 0.0) { + out << ",\n" << indent + << FormatKV("items_per_second", RoundDouble(run.items_per_second)); + } + if (!run.report_label.empty()) { + out << ",\n" << indent + << FormatKV("label", run.report_label); + } + out << '\n'; +} + +} // end namespace benchmark diff --git a/benchmark/src/log.cc b/benchmark/src/log.cc new file mode 100644 index 00000000..b660309d --- /dev/null +++ b/benchmark/src/log.cc @@ -0,0 +1,40 @@ +#include "log.h" + +#include + +namespace benchmark { +namespace internal { + +int& LoggingLevelImp() { + static int level = 0; + return level; +} + +void SetLogLevel(int value) { + LoggingLevelImp() = value; +} + +int GetLogLevel() { + return LoggingLevelImp(); +} + +class NullLogBuffer : public std::streambuf +{ +public: + int overflow(int c) { + return c; + } +}; + +std::ostream& GetNullLogInstance() { + static NullLogBuffer log_buff; + static std::ostream null_log(&log_buff); + return null_log; +} + +std::ostream& GetErrorLogInstance() { + return std::clog; +} + +} // end namespace internal +} // end namespace benchmark \ No newline at end of file diff --git a/benchmark/src/log.h b/benchmark/src/log.h new file mode 100644 index 00000000..3777810e --- /dev/null +++ b/benchmark/src/log.h @@ -0,0 +1,28 @@ +#ifndef BENCHMARK_LOG_H_ +#define BENCHMARK_LOG_H_ + +#include + +namespace benchmark { +namespace internal { + +int GetLogLevel(); +void SetLogLevel(int level); + +std::ostream& GetNullLogInstance(); +std::ostream& GetErrorLogInstance(); + +inline std::ostream& GetLogInstanceForLevel(int level) { + if (level <= GetLogLevel()) { + return GetErrorLogInstance(); + } + return GetNullLogInstance(); +} + +} // end namespace internal +} // end namespace benchmark + +#define VLOG(x) (::benchmark::internal::GetLogInstanceForLevel(x) \ + << "-- LOG(" << x << "): ") + +#endif \ No newline at end of file diff --git a/benchmark/src/mutex.h b/benchmark/src/mutex.h new file mode 100644 index 00000000..f37ec35b --- /dev/null +++ b/benchmark/src/mutex.h @@ -0,0 +1,142 @@ +#ifndef BENCHMARK_MUTEX_H_ +#define BENCHMARK_MUTEX_H_ + +#include +#include + +// Enable thread safety attributes only with clang. +// The attributes can be safely erased when compiling with other compilers. +#if defined(HAVE_THREAD_SAFETY_ATTRIBUTES) +#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x)) +#else +#define THREAD_ANNOTATION_ATTRIBUTE__(x) // no-op +#endif + +#define CAPABILITY(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(capability(x)) + +#define SCOPED_CAPABILITY \ + THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable) + +#define GUARDED_BY(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x)) + +#define PT_GUARDED_BY(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x)) + +#define ACQUIRED_BEFORE(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__)) + +#define ACQUIRED_AFTER(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__)) + +#define REQUIRES(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(requires_capability(__VA_ARGS__)) + +#define REQUIRES_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(requires_shared_capability(__VA_ARGS__)) + +#define ACQUIRE(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(acquire_capability(__VA_ARGS__)) + +#define ACQUIRE_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(acquire_shared_capability(__VA_ARGS__)) + +#define RELEASE(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(release_capability(__VA_ARGS__)) + +#define RELEASE_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(release_shared_capability(__VA_ARGS__)) + +#define TRY_ACQUIRE(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_capability(__VA_ARGS__)) + +#define TRY_ACQUIRE_SHARED(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_shared_capability(__VA_ARGS__)) + +#define EXCLUDES(...) \ + THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__)) + +#define ASSERT_CAPABILITY(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(assert_capability(x)) + +#define ASSERT_SHARED_CAPABILITY(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_capability(x)) + +#define RETURN_CAPABILITY(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x)) + +#define NO_THREAD_SAFETY_ANALYSIS \ + THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis) + + +namespace benchmark { + +typedef std::condition_variable Condition; + +// NOTE: Wrappers for std::mutex and std::unique_lock are provided so that +// we can annotate them with thread safety attributes and use the +// -Wthread-safety warning with clang. The standard library types cannot be +// used directly because they do not provided the required annotations. +class CAPABILITY("mutex") Mutex +{ +public: + Mutex() {} + + void lock() ACQUIRE() { mut_.lock(); } + void unlock() RELEASE() { mut_.unlock(); } + std::mutex& native_handle() { + return mut_; + } +private: + std::mutex mut_; +}; + + +class SCOPED_CAPABILITY MutexLock +{ + typedef std::unique_lock MutexLockImp; +public: + MutexLock(Mutex& m) ACQUIRE(m) : ml_(m.native_handle()) + { } + ~MutexLock() RELEASE() {} + MutexLockImp& native_handle() { return ml_; } +private: + MutexLockImp ml_; +}; + + +class Notification +{ +public: + Notification() : notified_yet_(false) { } + + void WaitForNotification() const EXCLUDES(mutex_) { + MutexLock m_lock(mutex_); + auto notified_fn = [this]() REQUIRES(mutex_) { + return this->HasBeenNotified(); + }; + cv_.wait(m_lock.native_handle(), notified_fn); + } + + void Notify() EXCLUDES(mutex_) { + { + MutexLock lock(mutex_); + notified_yet_ = 1; + } + cv_.notify_all(); + } + +private: + bool HasBeenNotified() const REQUIRES(mutex_) { + return notified_yet_; + } + + mutable Mutex mutex_; + mutable std::condition_variable cv_; + bool notified_yet_ GUARDED_BY(mutex_); +}; + +} // end namespace benchmark + +#endif // BENCHMARK_MUTEX_H_ diff --git a/benchmark/src/re.h b/benchmark/src/re.h new file mode 100644 index 00000000..af57a39c --- /dev/null +++ b/benchmark/src/re.h @@ -0,0 +1,60 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BENCHMARK_RE_H_ +#define BENCHMARK_RE_H_ + +#if defined(HAVE_STD_REGEX) +#include +#elif defined(HAVE_GNU_POSIX_REGEX) +#include +#elif defined(HAVE_POSIX_REGEX) +#include +#else +#error No regular expression backend was found! +#endif +#include + +namespace benchmark { + +// A wrapper around the POSIX regular expression API that provides automatic +// cleanup +class Regex { + public: + Regex(); + ~Regex(); + + // Compile a regular expression matcher from spec. Returns true on success. + // + // On failure (and if error is not nullptr), error is populated with a human + // readable error message if an error occurs. + bool Init(const std::string& spec, std::string* error); + + // Returns whether str matches the compiled regular expression. + bool Match(const std::string& str); + private: + bool init_; + // Underlying regular expression object +#if defined(HAVE_STD_REGEX) + std::regex re_; +#elif defined(HAVE_POSIX_REGEX) || defined(HAVE_GNU_POSIX_REGEX) + regex_t re_; +#else +# error No regular expression backend implementation available +#endif +}; + +} // end namespace benchmark + +#endif // BENCHMARK_RE_H_ diff --git a/benchmark/src/re_posix.cc b/benchmark/src/re_posix.cc new file mode 100644 index 00000000..95b086ff --- /dev/null +++ b/benchmark/src/re_posix.cc @@ -0,0 +1,59 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "check.h" +#include "re.h" + +namespace benchmark { + +Regex::Regex() : init_(false) { } + +bool Regex::Init(const std::string& spec, std::string* error) { + int ec = regcomp(&re_, spec.c_str(), REG_EXTENDED | REG_NOSUB); + if (ec != 0) { + if (error) { + size_t needed = regerror(ec, &re_, nullptr, 0); + char* errbuf = new char[needed]; + regerror(ec, &re_, errbuf, needed); + + // regerror returns the number of bytes necessary to null terminate + // the string, so we move that when assigning to error. + CHECK_NE(needed, 0); + error->assign(errbuf, needed - 1); + + delete[] errbuf; + } + + return false; + } + + init_ = true; + return true; +} + +Regex::~Regex() { + if (init_) { + regfree(&re_); + } +} + +bool Regex::Match(const std::string& str) { + if (!init_) { + return false; + } + + return regexec(&re_, str.c_str(), 0, nullptr, 0) == 0; +} + +} // end namespace benchmark diff --git a/benchmark/src/re_std.cc b/benchmark/src/re_std.cc new file mode 100644 index 00000000..cfd7a218 --- /dev/null +++ b/benchmark/src/re_std.cc @@ -0,0 +1,44 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "re.h" + +namespace benchmark { + +Regex::Regex() : init_(false) { } + +bool Regex::Init(const std::string& spec, std::string* error) { + try { + re_ = std::regex(spec, std::regex_constants::extended); + + init_ = true; + } catch (const std::regex_error& e) { + if (error) { + *error = e.what(); + } + } + return init_; +} + +Regex::~Regex() { } + +bool Regex::Match(const std::string& str) { + if (!init_) { + return false; + } + + return std::regex_search(str, re_); +} + +} // end namespace benchmark diff --git a/benchmark/src/reporter.cc b/benchmark/src/reporter.cc new file mode 100644 index 00000000..5d6e7226 --- /dev/null +++ b/benchmark/src/reporter.cc @@ -0,0 +1,86 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "benchmark/reporter.h" + +#include +#include + +#include "check.h" +#include "stat.h" + +namespace benchmark { + +void BenchmarkReporter::ComputeStats( + const std::vector& reports, + Run* mean_data, Run* stddev_data) { + CHECK(reports.size() >= 2) << "Cannot compute stats for less than 2 reports"; + // Accumulators. + Stat1_d real_accumulated_time_stat; + Stat1_d cpu_accumulated_time_stat; + Stat1_d bytes_per_second_stat; + Stat1_d items_per_second_stat; + // All repetitions should be run with the same number of iterations so we + // can take this information from the first benchmark. + std::size_t const run_iterations = reports.front().iterations; + + // Populate the accumulators. + for (Run const& run : reports) { + CHECK_EQ(reports[0].benchmark_name, run.benchmark_name); + CHECK_EQ(run_iterations, run.iterations); + real_accumulated_time_stat += + Stat1_d(run.real_accumulated_time/run.iterations, run.iterations); + cpu_accumulated_time_stat += + Stat1_d(run.cpu_accumulated_time/run.iterations, run.iterations); + items_per_second_stat += Stat1_d(run.items_per_second, run.iterations); + bytes_per_second_stat += Stat1_d(run.bytes_per_second, run.iterations); + } + + // Get the data from the accumulator to BenchmarkReporter::Run's. + mean_data->benchmark_name = reports[0].benchmark_name + "_mean"; + mean_data->iterations = run_iterations; + mean_data->real_accumulated_time = real_accumulated_time_stat.Mean() * + run_iterations; + mean_data->cpu_accumulated_time = cpu_accumulated_time_stat.Mean() * + run_iterations; + mean_data->bytes_per_second = bytes_per_second_stat.Mean(); + mean_data->items_per_second = items_per_second_stat.Mean(); + + // Only add label to mean/stddev if it is same for all runs + mean_data->report_label = reports[0].report_label; + for (std::size_t i = 1; i < reports.size(); i++) { + if (reports[i].report_label != reports[0].report_label) { + mean_data->report_label = ""; + break; + } + } + + stddev_data->benchmark_name = reports[0].benchmark_name + "_stddev"; + stddev_data->report_label = mean_data->report_label; + stddev_data->iterations = 0; + stddev_data->real_accumulated_time = + real_accumulated_time_stat.StdDev(); + stddev_data->cpu_accumulated_time = + cpu_accumulated_time_stat.StdDev(); + stddev_data->bytes_per_second = bytes_per_second_stat.StdDev(); + stddev_data->items_per_second = items_per_second_stat.StdDev(); +} + +void BenchmarkReporter::Finalize() { +} + +BenchmarkReporter::~BenchmarkReporter() { +} + +} // end namespace benchmark diff --git a/benchmark/src/sleep.cc b/benchmark/src/sleep.cc new file mode 100644 index 00000000..0d78a53b --- /dev/null +++ b/benchmark/src/sleep.cc @@ -0,0 +1,50 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "sleep.h" + +#include +#include + +#include "internal_macros.h" + +#ifdef OS_WINDOWS +#include +#endif + +namespace benchmark { +#ifdef OS_WINDOWS +// Window's Sleep takes milliseconds argument. +void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); } +void SleepForSeconds(double seconds) { + SleepForMilliseconds(static_cast(kNumMillisPerSecond * seconds)); +} +#else // OS_WINDOWS +void SleepForMicroseconds(int microseconds) { + struct timespec sleep_time; + sleep_time.tv_sec = microseconds / kNumMicrosPerSecond; + sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro; + while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) + ; // Ignore signals and wait for the full interval to elapse. +} + +void SleepForMilliseconds(int milliseconds) { + SleepForMicroseconds(static_cast(milliseconds) * kNumMicrosPerMilli); +} + +void SleepForSeconds(double seconds) { + SleepForMicroseconds(static_cast(seconds * kNumMicrosPerSecond)); +} +#endif // OS_WINDOWS +} // end namespace benchmark diff --git a/benchmark/src/sleep.h b/benchmark/src/sleep.h new file mode 100644 index 00000000..f1e515ca --- /dev/null +++ b/benchmark/src/sleep.h @@ -0,0 +1,17 @@ +#ifndef BENCHMARK_SLEEP_H_ +#define BENCHMARK_SLEEP_H_ + +#include + +namespace benchmark { +const int64_t kNumMillisPerSecond = 1000LL; +const int64_t kNumMicrosPerMilli = 1000LL; +const int64_t kNumMicrosPerSecond = kNumMillisPerSecond * 1000LL; +const int64_t kNumNanosPerMicro = 1000LL; +const int64_t kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond; + +void SleepForMilliseconds(int milliseconds); +void SleepForSeconds(double seconds); +} // end namespace benchmark + +#endif // BENCHMARK_SLEEP_H_ diff --git a/benchmark/src/stat.h b/benchmark/src/stat.h new file mode 100644 index 00000000..c4ecfe8e --- /dev/null +++ b/benchmark/src/stat.h @@ -0,0 +1,307 @@ +#ifndef BENCHMARK_STAT_H_ +#define BENCHMARK_STAT_H_ + +#include +#include +#include +#include + + +namespace benchmark { + +template +class Stat1; + +template +class Stat1MinMax; + +typedef Stat1 Stat1_f; +typedef Stat1 Stat1_d; +typedef Stat1MinMax Stat1MinMax_f; +typedef Stat1MinMax Stat1MinMax_d; + +template +class Vector2; +template +class Vector3; +template +class Vector4; + +template +class Stat1 { + public: + typedef Stat1 Self; + + Stat1() { Clear(); } + // Create a sample of value dat and weight 1 + explicit Stat1(const VType &dat) { + sum_ = dat; + sum_squares_ = Sqr(dat); + numsamples_ = 1; + } + // Create statistics for all the samples between begin (included) + // and end(excluded) + explicit Stat1(const VType *begin, const VType *end) { + Clear(); + for (const VType *item = begin; item < end; ++item) { + (*this) += Stat1(*item); + } + } + // Create a sample of value dat and weight w + Stat1(const VType &dat, const NumType &w) { + sum_ = w * dat; + sum_squares_ = w * Sqr(dat); + numsamples_ = w; + } + // Copy operator + Stat1(const Self &stat) { + sum_ = stat.sum_; + sum_squares_ = stat.sum_squares_; + numsamples_ = stat.numsamples_; + } + + void Clear() { + numsamples_ = NumType(); + sum_squares_ = sum_ = VType(); + } + + Self &operator=(const Self &stat) { + sum_ = stat.sum_; + sum_squares_ = stat.sum_squares_; + numsamples_ = stat.numsamples_; + return (*this); + } + // Merge statistics from two sample sets. + Self &operator+=(const Self &stat) { + sum_ += stat.sum_; + sum_squares_ += stat.sum_squares_; + numsamples_ += stat.numsamples_; + return (*this); + } + // The operation opposite to += + Self &operator-=(const Self &stat) { + sum_ -= stat.sum_; + sum_squares_ -= stat.sum_squares_; + numsamples_ -= stat.numsamples_; + return (*this); + } + // Multiply the weight of the set of samples by a factor k + Self &operator*=(const VType &k) { + sum_ *= k; + sum_squares_ *= k; + numsamples_ *= k; + return (*this); + } + + // Merge statistics from two sample sets. + Self operator+(const Self &stat) const { return Self(*this) += stat; } + + // The operation opposite to + + Self operator-(const Self &stat) const { return Self(*this) -= stat; } + + // Multiply the weight of the set of samples by a factor k + Self operator*(const VType &k) const { return Self(*this) *= k; } + + // Return the total weight of this sample set + NumType numSamples() const { return numsamples_; } + + // Return the sum of this sample set + VType Sum() const { return sum_; } + + // Return the mean of this sample set + VType Mean() const { + if (numsamples_ == 0) return VType(); + return sum_ * (1.0 / numsamples_); + } + + // Return the mean of this sample set and compute the standard deviation at + // the same time. + VType Mean(VType *stddev) const { + if (numsamples_ == 0) return VType(); + VType mean = sum_ * (1.0 / numsamples_); + if (stddev) { + VType avg_squares = sum_squares_ * (1.0 / numsamples_); + *stddev = Sqrt(avg_squares - Sqr(mean)); + } + return mean; + } + + // Return the standard deviation of the sample set + VType StdDev() const { + if (numsamples_ == 0) return VType(); + VType mean = Mean(); + VType avg_squares = sum_squares_ * (1.0 / numsamples_); + return Sqrt(avg_squares - Sqr(mean)); + } + + private: + static_assert(std::is_integral::value && + !std::is_same::value, + "NumType must be an integral type that is not bool."); + // Let i be the index of the samples provided (using +=) + // and weight[i],value[i] be the data of sample #i + // then the variables have the following meaning: + NumType numsamples_; // sum of weight[i]; + VType sum_; // sum of weight[i]*value[i]; + VType sum_squares_; // sum of weight[i]*value[i]^2; + + // Template function used to square a number. + // For a vector we square all components + template + static inline SType Sqr(const SType &dat) { + return dat * dat; + } + + template + static inline Vector2 Sqr(const Vector2 &dat) { + return dat.MulComponents(dat); + } + + template + static inline Vector3 Sqr(const Vector3 &dat) { + return dat.MulComponents(dat); + } + + template + static inline Vector4 Sqr(const Vector4 &dat) { + return dat.MulComponents(dat); + } + + // Template function used to take the square root of a number. + // For a vector we square all components + template + static inline SType Sqrt(const SType &dat) { + // Avoid NaN due to imprecision in the calculations + if (dat < 0) return 0; + return sqrt(dat); + } + + template + static inline Vector2 Sqrt(const Vector2 &dat) { + // Avoid NaN due to imprecision in the calculations + return Max(dat, Vector2()).Sqrt(); + } + + template + static inline Vector3 Sqrt(const Vector3 &dat) { + // Avoid NaN due to imprecision in the calculations + return Max(dat, Vector3()).Sqrt(); + } + + template + static inline Vector4 Sqrt(const Vector4 &dat) { + // Avoid NaN due to imprecision in the calculations + return Max(dat, Vector4()).Sqrt(); + } +}; + +// Useful printing function +template +std::ostream &operator<<(std::ostream &out, const Stat1 &s) { + out << "{ avg = " << s.Mean() << " std = " << s.StdDev() + << " nsamples = " << s.NumSamples() << "}"; + return out; +} + +// Stat1MinMax: same as Stat1, but it also +// keeps the Min and Max values; the "-" +// operator is disabled because it cannot be implemented +// efficiently +template +class Stat1MinMax : public Stat1 { + public: + typedef Stat1MinMax Self; + + Stat1MinMax() { Clear(); } + // Create a sample of value dat and weight 1 + explicit Stat1MinMax(const VType &dat) : Stat1(dat) { + max_ = dat; + min_ = dat; + } + // Create statistics for all the samples between begin (included) + // and end(excluded) + explicit Stat1MinMax(const VType *begin, const VType *end) { + Clear(); + for (const VType *item = begin; item < end; ++item) { + (*this) += Stat1MinMax(*item); + } + } + // Create a sample of value dat and weight w + Stat1MinMax(const VType &dat, const NumType &w) + : Stat1(dat, w) { + max_ = dat; + min_ = dat; + } + // Copy operator + Stat1MinMax(const Self &stat) : Stat1(stat) { + max_ = stat.max_; + min_ = stat.min_; + } + + void Clear() { + Stat1::Clear(); + if (std::numeric_limits::has_infinity) { + min_ = std::numeric_limits::infinity(); + max_ = -std::numeric_limits::infinity(); + } else { + min_ = std::numeric_limits::max(); + max_ = std::numeric_limits::min(); + } + } + + Self &operator=(const Self &stat) { + this->Stat1::operator=(stat); + max_ = stat.max_; + min_ = stat.min_; + return (*this); + } + // Merge statistics from two sample sets. + Self &operator+=(const Self &stat) { + this->Stat1::operator+=(stat); + if (stat.max_ > max_) max_ = stat.max_; + if (stat.min_ < min_) min_ = stat.min_; + return (*this); + } + // Multiply the weight of the set of samples by a factor k + Self &operator*=(const VType &stat) { + this->Stat1::operator*=(stat); + return (*this); + } + // Merge statistics from two sample sets. + Self operator+(const Self &stat) const { return Self(*this) += stat; } + // Multiply the weight of the set of samples by a factor k + Self operator*(const VType &k) const { return Self(*this) *= k; } + + // Return the maximal value in this sample set + VType Max() const { return max_; } + // Return the minimal value in this sample set + VType Min() const { return min_; } + + private: + // The - operation makes no sense with Min/Max + // unless we keep the full list of values (but we don't) + // make it private, and let it undefined so nobody can call it + Self &operator-=(const Self &stat); // senseless. let it undefined. + + // The operation opposite to - + Self operator-(const Self &stat) const; // senseless. let it undefined. + + // Let i be the index of the samples provided (using +=) + // and weight[i],value[i] be the data of sample #i + // then the variables have the following meaning: + VType max_; // max of value[i] + VType min_; // min of value[i] +}; + +// Useful printing function +template +std::ostream &operator<<(std::ostream &out, + const Stat1MinMax &s) { + out << "{ avg = " << s.Mean() << " std = " << s.StdDev() + << " nsamples = " << s.NumSamples() << " min = " << s.Min() + << " max = " << s.Max() << "}"; + return out; +} +} // end namespace benchmark + +#endif // BENCHMARK_STAT_H_ diff --git a/benchmark/src/string_util.cc b/benchmark/src/string_util.cc new file mode 100644 index 00000000..aeb9bc8a --- /dev/null +++ b/benchmark/src/string_util.cc @@ -0,0 +1,166 @@ +#include "string_util.h" + +#include +#include +#include +#include +#include + +#include "arraysize.h" + +namespace benchmark { +namespace { + +// kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta. +const char kBigSIUnits[] = "kMGTPEZY"; +// Kibi, Mebi, Gibi, Tebi, Pebi, Exbi, Zebi, Yobi. +const char kBigIECUnits[] = "KMGTPEZY"; +// milli, micro, nano, pico, femto, atto, zepto, yocto. +const char kSmallSIUnits[] = "munpfazy"; + +// We require that all three arrays have the same size. +static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits), + "SI and IEC unit arrays must be the same size"); +static_assert(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits), + "Small SI and Big SI unit arrays must be the same size"); + +static const int64_t kUnitsSize = arraysize(kBigSIUnits); + +} // end anonymous namespace + +void ToExponentAndMantissa(double val, double thresh, int precision, + double one_k, std::string* mantissa, + int64_t* exponent) { + std::stringstream mantissa_stream; + + if (val < 0) { + mantissa_stream << "-"; + val = -val; + } + + // Adjust threshold so that it never excludes things which can't be rendered + // in 'precision' digits. + const double adjusted_threshold = + std::max(thresh, 1.0 / std::pow(10.0, precision)); + const double big_threshold = adjusted_threshold * one_k; + const double small_threshold = adjusted_threshold; + + if (val > big_threshold) { + // Positive powers + double scaled = val; + for (size_t i = 0; i < arraysize(kBigSIUnits); ++i) { + scaled /= one_k; + if (scaled <= big_threshold) { + mantissa_stream << scaled; + *exponent = i + 1; + *mantissa = mantissa_stream.str(); + return; + } + } + mantissa_stream << val; + *exponent = 0; + } else if (val < small_threshold) { + // Negative powers + double scaled = val; + for (size_t i = 0; i < arraysize(kSmallSIUnits); ++i) { + scaled *= one_k; + if (scaled >= small_threshold) { + mantissa_stream << scaled; + *exponent = -i - 1; + *mantissa = mantissa_stream.str(); + return; + } + } + mantissa_stream << val; + *exponent = 0; + } else { + mantissa_stream << val; + *exponent = 0; + } + *mantissa = mantissa_stream.str(); +} + +std::string ExponentToPrefix(int64_t exponent, bool iec) { + if (exponent == 0) return ""; + + const int64_t index = (exponent > 0 ? exponent - 1 : -exponent - 1); + if (index >= kUnitsSize) return ""; + + const char* array = + (exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits); + if (iec) + return array[index] + std::string("i"); + else + return std::string(1, array[index]); +} + +std::string ToBinaryStringFullySpecified(double value, double threshold, + int precision) { + std::string mantissa; + int64_t exponent; + ToExponentAndMantissa(value, threshold, precision, 1024.0, &mantissa, + &exponent); + return mantissa + ExponentToPrefix(exponent, false); +} + +void AppendHumanReadable(int n, std::string* str) { + std::stringstream ss; + // Round down to the nearest SI prefix. + ss << "/" << ToBinaryStringFullySpecified(n, 1.0, 0); + *str += ss.str(); +} + +std::string HumanReadableNumber(double n) { + // 1.1 means that figures up to 1.1k should be shown with the next unit down; + // this softens edge effects. + // 1 means that we should show one decimal place of precision. + return ToBinaryStringFullySpecified(n, 1.1, 1); +} + +std::string StringPrintFImp(const char *msg, va_list args) +{ + // we might need a second shot at this, so pre-emptivly make a copy + va_list args_cp; + va_copy(args_cp, args); + + // TODO(ericwf): use std::array for first attempt to avoid one memory + // allocation guess what the size might be + std::array local_buff; + std::size_t size = local_buff.size(); + auto ret = std::vsnprintf(local_buff.data(), size, msg, args_cp); + + va_end(args_cp); + + // handle empty expansion + if (ret == 0) + return std::string{}; + if (static_cast(ret) < size) + return std::string(local_buff.data()); + + // we did not provide a long enough buffer on our first attempt. + // add 1 to size to account for null-byte in size cast to prevent overflow + size = static_cast(ret) + 1; + auto buff_ptr = std::unique_ptr(new char[size]); + ret = std::vsnprintf(buff_ptr.get(), size, msg, args); + return std::string(buff_ptr.get()); +} + +std::string StringPrintF(const char* format, ...) +{ + va_list args; + va_start(args, format); + std::string tmp = StringPrintFImp(format, args); + va_end(args); + return tmp; +} + +void ReplaceAll(std::string* str, const std::string& from, + const std::string& to) { + std::size_t start = 0; + while((start = str->find(from, start)) != std::string::npos) { + str->replace(start, from.length(), to); + start += to.length(); + } +} + +} // end namespace benchmark diff --git a/benchmark/src/string_util.h b/benchmark/src/string_util.h new file mode 100644 index 00000000..e83abbce --- /dev/null +++ b/benchmark/src/string_util.h @@ -0,0 +1,43 @@ +#ifndef BENCHMARK_STRING_UTIL_H_ +#define BENCHMARK_STRING_UTIL_H_ + +#include +#include +#include + +namespace benchmark { + +void AppendHumanReadable(int n, std::string* str); + +std::string HumanReadableNumber(double n); + +std::string StringPrintF(const char* format, ...); + +inline std::ostream& +StringCatImp(std::ostream& out) noexcept +{ + return out; +} + +template +inline std::ostream& +StringCatImp(std::ostream& out, First&& f, Rest&&... rest) +{ + out << std::forward(f); + return StringCatImp(out, std::forward(rest)...); +} + +template +inline std::string StrCat(Args&&... args) +{ + std::ostringstream ss; + StringCatImp(ss, std::forward(args)...); + return ss.str(); +} + +void ReplaceAll(std::string* str, const std::string& from, + const std::string& to); + +} // end namespace benchmark + +#endif // BENCHMARK_STRING_UTIL_H_ diff --git a/benchmark/src/sysinfo.cc b/benchmark/src/sysinfo.cc new file mode 100644 index 00000000..13ef2f89 --- /dev/null +++ b/benchmark/src/sysinfo.cc @@ -0,0 +1,413 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "sysinfo.h" +#include "internal_macros.h" + +#ifdef OS_WINDOWS +#include +#include +#else +#include +#include +#include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD +#include +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "arraysize.h" +#include "check.h" +#include "cycleclock.h" +#include "internal_macros.h" +#include "log.h" +#include "sleep.h" +#include "string_util.h" + +namespace benchmark { +namespace { +std::once_flag cpuinfo_init; +double cpuinfo_cycles_per_second = 1.0; +int cpuinfo_num_cpus = 1; // Conservative guess +std::mutex cputimens_mutex; + +#if !defined OS_MACOSX +const int64_t estimate_time_ms = 1000; + +// Helper function estimates cycles/sec by observing cycles elapsed during +// sleep(). Using small sleep time decreases accuracy significantly. +int64_t EstimateCyclesPerSecond() { + const int64_t start_ticks = cycleclock::Now(); + SleepForMilliseconds(estimate_time_ms); + return cycleclock::Now() - start_ticks; +} +#endif + +#if defined OS_LINUX || defined OS_CYGWIN +// Helper function for reading an int from a file. Returns true if successful +// and the memory location pointed to by value is set to the value read. +bool ReadIntFromFile(const char* file, long* value) { + bool ret = false; + int fd = open(file, O_RDONLY); + if (fd != -1) { + char line[1024]; + char* err; + memset(line, '\0', sizeof(line)); + CHECK(read(fd, line, sizeof(line) - 1)); + const long temp_value = strtol(line, &err, 10); + if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { + *value = temp_value; + ret = true; + } + close(fd); + } + return ret; +} +#endif + +void InitializeSystemInfo() { +#if defined OS_LINUX || defined OS_CYGWIN + char line[1024]; + char* err; + long freq; + + bool saw_mhz = false; + + // If the kernel is exporting the tsc frequency use that. There are issues + // where cpuinfo_max_freq cannot be relied on because the BIOS may be + // exporintg an invalid p-state (on x86) or p-states may be used to put the + // processor in a new mode (turbo mode). Essentially, those frequencies + // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as + // well. + if (!saw_mhz && + ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) { + // The value is in kHz (as the file name suggests). For example, on a + // 2GHz warpstation, the file contains the value "2000000". + cpuinfo_cycles_per_second = freq * 1000.0; + saw_mhz = true; + } + + // If CPU scaling is in effect, we want to use the *maximum* frequency, + // not whatever CPU speed some random processor happens to be using now. + if (!saw_mhz && + ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", + &freq)) { + // The value is in kHz. For example, on a 2GHz warpstation, the file + // contains the value "2000000". + cpuinfo_cycles_per_second = freq * 1000.0; + saw_mhz = true; + } + + // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq. + const char* pname = "/proc/cpuinfo"; + int fd = open(pname, O_RDONLY); + if (fd == -1) { + perror(pname); + if (!saw_mhz) { + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(); + } + return; + } + + double bogo_clock = 1.0; + bool saw_bogo = false; + long max_cpu_id = 0; + int num_cpus = 0; + line[0] = line[1] = '\0'; + size_t chars_read = 0; + do { // we'll exit when the last read didn't read anything + // Move the next line to the beginning of the buffer + const size_t oldlinelen = strlen(line); + if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line + line[0] = '\0'; + else // still other lines left to save + memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1)); + // Terminate the new line, reading more if we can't find the newline + char* newline = strchr(line, '\n'); + if (newline == nullptr) { + const size_t linelen = strlen(line); + const size_t bytes_to_read = sizeof(line) - 1 - linelen; + CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes + chars_read = read(fd, line + linelen, bytes_to_read); + line[linelen + chars_read] = '\0'; + newline = strchr(line, '\n'); + } + if (newline != nullptr) *newline = '\0'; + + // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only + // accept postive values. Some environments (virtual machines) report zero, + // which would cause infinite looping in WallTime_Init. + if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz") - 1) == 0) { + const char* freqstr = strchr(line, ':'); + if (freqstr) { + cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0; + if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0) + saw_mhz = true; + } + } else if (strncasecmp(line, "bogomips", sizeof("bogomips") - 1) == 0) { + const char* freqstr = strchr(line, ':'); + if (freqstr) { + bogo_clock = strtod(freqstr + 1, &err) * 1000000.0; + if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0) + saw_bogo = true; + } + } else if (strncasecmp(line, "processor", sizeof("processor") - 1) == 0) { + num_cpus++; // count up every time we see an "processor :" entry + const char* freqstr = strchr(line, ':'); + if (freqstr) { + const long cpu_id = strtol(freqstr + 1, &err, 10); + if (freqstr[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id) + max_cpu_id = cpu_id; + } + } + } while (chars_read > 0); + close(fd); + + if (!saw_mhz) { + if (saw_bogo) { + // If we didn't find anything better, we'll use bogomips, but + // we're not happy about it. + cpuinfo_cycles_per_second = bogo_clock; + } else { + // If we don't even have bogomips, we'll use the slow estimation. + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(); + } + } + if (num_cpus == 0) { + fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n"); + } else { + if ((max_cpu_id + 1) != num_cpus) { + fprintf(stderr, + "CPU ID assignments in /proc/cpuinfo seems messed up." + " This is usually caused by a bad BIOS.\n"); + } + cpuinfo_num_cpus = num_cpus; + } + +#elif defined OS_FREEBSD +// For this sysctl to work, the machine must be configured without +// SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0 +// and later. Before that, it's a 32-bit quantity (and gives the +// wrong answer on machines faster than 2^32 Hz). See +// http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html +// But also compare FreeBSD 7.0: +// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223 +// 231 error = sysctl_handle_quad(oidp, &freq, 0, req); +// To FreeBSD 6.3 (it's the same in 6-STABLE): +// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131 +// 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); +#if __FreeBSD__ >= 7 + uint64_t hz = 0; +#else + unsigned int hz = 0; +#endif + size_t sz = sizeof(hz); + const char* sysctl_path = "machdep.tsc_freq"; + if (sysctlbyname(sysctl_path, &hz, &sz, nullptr, 0) != 0) { + fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", + sysctl_path, strerror(errno)); + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(); + } else { + cpuinfo_cycles_per_second = hz; + } +// TODO: also figure out cpuinfo_num_cpus + +#elif defined OS_WINDOWS + // In NT, read MHz from the registry. If we fail to do so or we're in win9x + // then make a crude estimate. + OSVERSIONINFO os; + os.dwOSVersionInfoSize = sizeof(os); + DWORD data, data_size = sizeof(data); + if (GetVersionEx(&os) && os.dwPlatformId == VER_PLATFORM_WIN32_NT && + SUCCEEDED( + SHGetValueA(HKEY_LOCAL_MACHINE, + "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", + "~MHz", nullptr, &data, &data_size))) + cpuinfo_cycles_per_second = (int64_t)data * (int64_t)(1000 * 1000); // was mhz + else + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(); +// TODO: also figure out cpuinfo_num_cpus + +#elif defined OS_MACOSX + // returning "mach time units" per second. the current number of elapsed + // mach time units can be found by calling uint64 mach_absolute_time(); + // while not as precise as actual CPU cycles, it is accurate in the face + // of CPU frequency scaling and multi-cpu/core machines. + // Our mac users have these types of machines, and accuracy + // (i.e. correctness) trumps precision. + // See cycleclock.h: CycleClock::Now(), which returns number of mach time + // units on Mac OS X. + mach_timebase_info_data_t timebase_info; + mach_timebase_info(&timebase_info); + double mach_time_units_per_nanosecond = + static_cast(timebase_info.denom) / + static_cast(timebase_info.numer); + cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9; + + int num_cpus = 0; + size_t size = sizeof(num_cpus); + int numcpus_name[] = {CTL_HW, HW_NCPU}; + if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, nullptr, 0) == + 0 && + (size == sizeof(num_cpus))) + cpuinfo_num_cpus = num_cpus; + +#else + // Generic cycles per second counter + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(); +#endif +} +} // end namespace + +// getrusage() based implementation of MyCPUUsage +static double MyCPUUsageRUsage() { +#ifndef OS_WINDOWS + struct rusage ru; + if (getrusage(RUSAGE_SELF, &ru) == 0) { + return (static_cast(ru.ru_utime.tv_sec) + + static_cast(ru.ru_utime.tv_usec) * 1e-6 + + static_cast(ru.ru_stime.tv_sec) + + static_cast(ru.ru_stime.tv_usec) * 1e-6); + } else { + return 0.0; + } +#else + HANDLE proc = GetCurrentProcess(); + FILETIME creation_time; + FILETIME exit_time; + FILETIME kernel_time; + FILETIME user_time; + ULARGE_INTEGER kernel; + ULARGE_INTEGER user; + GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time); + kernel.HighPart = kernel_time.dwHighDateTime; + kernel.LowPart = kernel_time.dwLowDateTime; + user.HighPart = user_time.dwHighDateTime; + user.LowPart = user_time.dwLowDateTime; + return (static_cast(kernel.QuadPart) + + static_cast(user.QuadPart)) / 1.0E-7; +#endif // OS_WINDOWS +} + +#ifndef OS_WINDOWS +static bool MyCPUUsageCPUTimeNsLocked(double* cputime) { + static int cputime_fd = -1; + if (cputime_fd == -1) { + cputime_fd = open("/proc/self/cputime_ns", O_RDONLY); + if (cputime_fd < 0) { + cputime_fd = -1; + return false; + } + } + char buff[64]; + memset(buff, 0, sizeof(buff)); + if (pread(cputime_fd, buff, sizeof(buff) - 1, 0) <= 0) { + close(cputime_fd); + cputime_fd = -1; + return false; + } + unsigned long long result = strtoull(buff, nullptr, 0); + if (result == (std::numeric_limits::max)()) { + close(cputime_fd); + cputime_fd = -1; + return false; + } + *cputime = static_cast(result) / 1e9; + return true; +} +#endif // OS_WINDOWS + +double MyCPUUsage() { +#ifndef OS_WINDOWS + { + std::lock_guard l(cputimens_mutex); + static bool use_cputime_ns = true; + if (use_cputime_ns) { + double value; + if (MyCPUUsageCPUTimeNsLocked(&value)) { + return value; + } + // Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage(). + VLOG(1) << "Reading /proc/self/cputime_ns failed. Using getrusage().\n"; + use_cputime_ns = false; + } + } +#endif // OS_WINDOWS + return MyCPUUsageRUsage(); +} + +double ChildrenCPUUsage() { +#ifndef OS_WINDOWS + struct rusage ru; + if (getrusage(RUSAGE_CHILDREN, &ru) == 0) { + return (static_cast(ru.ru_utime.tv_sec) + + static_cast(ru.ru_utime.tv_usec) * 1e-6 + + static_cast(ru.ru_stime.tv_sec) + + static_cast(ru.ru_stime.tv_usec) * 1e-6); + } else { + return 0.0; + } +#else + // TODO: Not sure what this even means on Windows + return 0.0; +#endif // OS_WINDOWS +} + +double CyclesPerSecond(void) { + std::call_once(cpuinfo_init, InitializeSystemInfo); + return cpuinfo_cycles_per_second; +} + +int NumCPUs(void) { + std::call_once(cpuinfo_init, InitializeSystemInfo); + return cpuinfo_num_cpus; +} + +// The ""'s catch people who don't pass in a literal for "str" +#define strliterallen(str) (sizeof("" str "") - 1) + +// Must use a string literal for prefix. +#define memprefix(str, len, prefix) \ + ((((len) >= strliterallen(prefix)) && \ + std::memcmp(str, prefix, strliterallen(prefix)) == 0) \ + ? str + strliterallen(prefix) \ + : nullptr) + +bool CpuScalingEnabled() { + // On Linux, the CPUfreq subsystem exposes CPU information as files on the + // local file system. If reading the exported files fails, then we may not be + // running on Linux, so we silently ignore all the read errors. + for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) { + std::string governor_file = StrCat("/sys/devices/system/cpu/cpu", cpu, + "/cpufreq/scaling_governor"); + FILE* file = fopen(governor_file.c_str(), "r"); + if (!file) break; + char buff[16]; + size_t bytes_read = fread(buff, 1, sizeof(buff), file); + fclose(file); + if (memprefix(buff, bytes_read, "performance") == nullptr) return true; + } + return false; +} + +} // end namespace benchmark diff --git a/benchmark/src/sysinfo.h b/benchmark/src/sysinfo.h new file mode 100644 index 00000000..eaf77e07 --- /dev/null +++ b/benchmark/src/sysinfo.h @@ -0,0 +1,12 @@ +#ifndef BENCHMARK_SYSINFO_H_ +#define BENCHMARK_SYSINFO_H_ + +namespace benchmark { +double MyCPUUsage(); +double ChildrenCPUUsage(); +int NumCPUs(); +double CyclesPerSecond(); +bool CpuScalingEnabled(); +} // end namespace benchmark + +#endif // BENCHMARK_SYSINFO_H_ diff --git a/benchmark/src/walltime.cc b/benchmark/src/walltime.cc new file mode 100644 index 00000000..e045729a --- /dev/null +++ b/benchmark/src/walltime.cc @@ -0,0 +1,236 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "walltime.h" + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "arraysize.h" +#include "check.h" +#include "cycleclock.h" +#include "log.h" +#include "sysinfo.h" + +namespace benchmark { +namespace walltime { + +namespace { + +#if defined(HAVE_STEADY_CLOCK) +template +struct ChooseSteadyClock { + typedef std::chrono::high_resolution_clock type; +}; + +template <> +struct ChooseSteadyClock { + typedef std::chrono::steady_clock type; +}; +#endif + +struct ChooseClockType { +#if defined(HAVE_STEADY_CLOCK) + typedef typename ChooseSteadyClock<>::type type; +#else + typedef std::chrono::high_resolution_clock type; +#endif +}; + +class WallTimeImp +{ +public: + WallTime Now(); + + static WallTimeImp& GetWallTimeImp() { + static WallTimeImp imp; +#if __cplusplus >= 201103L + static_assert(std::is_trivially_destructible::value, + "WallTimeImp must be trivially destructible to prevent " + "issues with static destruction"); +#endif + return imp; + } + +private: + WallTimeImp(); + // Helper routines to load/store a float from an AtomicWord. Required because + // g++ < 4.7 doesn't support std::atomic correctly. I cannot wait to + // get rid of this horror show. + void SetDrift(float f) { + int32_t w; + memcpy(&w, &f, sizeof(f)); + std::atomic_store(&drift_adjust_, w); + } + + float GetDrift() const { + float f; + int32_t w = std::atomic_load(&drift_adjust_); + memcpy(&f, &w, sizeof(f)); + return f; + } + + WallTime Slow() const { + struct timeval tv; + gettimeofday(&tv, nullptr); + return tv.tv_sec + tv.tv_usec * 1e-6; + } + +private: + static_assert(sizeof(float) <= sizeof(int32_t), + "type sizes don't allow the drift_adjust hack"); + + static constexpr double kMaxErrorInterval = 100e-6; + + WallTime base_walltime_; + int64_t base_cycletime_; + int64_t cycles_per_second_; + double seconds_per_cycle_; + uint32_t last_adjust_time_; + std::atomic drift_adjust_; + int64_t max_interval_cycles_; + + BENCHMARK_DISALLOW_COPY_AND_ASSIGN(WallTimeImp); +}; + + +WallTime WallTimeImp::Now() { + WallTime now = 0.0; + WallTime result = 0.0; + int64_t ct = 0; + uint32_t top_bits = 0; + do { + ct = cycleclock::Now(); + int64_t cycle_delta = ct - base_cycletime_; + result = base_walltime_ + cycle_delta * seconds_per_cycle_; + + top_bits = static_cast(uint64_t(ct) >> 32); + // Recompute drift no more often than every 2^32 cycles. + // I.e., @2GHz, ~ every two seconds + if (top_bits == last_adjust_time_) { // don't need to recompute drift + return result + GetDrift(); + } + + now = Slow(); + } while (cycleclock::Now() - ct > max_interval_cycles_); + // We are now sure that "now" and "result" were produced within + // kMaxErrorInterval of one another. + + SetDrift(now - result); + last_adjust_time_ = top_bits; + return now; +} + + +WallTimeImp::WallTimeImp() + : base_walltime_(0.0), base_cycletime_(0), + cycles_per_second_(0), seconds_per_cycle_(0.0), + last_adjust_time_(0), drift_adjust_(0), + max_interval_cycles_(0) { + cycles_per_second_ = static_cast(CyclesPerSecond()); + CHECK(cycles_per_second_ != 0); + seconds_per_cycle_ = 1.0 / cycles_per_second_; + max_interval_cycles_ = + static_cast(cycles_per_second_ * kMaxErrorInterval); + do { + base_cycletime_ = cycleclock::Now(); + base_walltime_ = Slow(); + } while (cycleclock::Now() - base_cycletime_ > max_interval_cycles_); + // We are now sure that "base_walltime" and "base_cycletime" were produced + // within kMaxErrorInterval of one another. + + SetDrift(0.0); + last_adjust_time_ = static_cast(uint64_t(base_cycletime_) >> 32); +} + +WallTime CPUWalltimeNow() { + static WallTimeImp& imp = WallTimeImp::GetWallTimeImp(); + return imp.Now(); +} + +WallTime ChronoWalltimeNow() { + typedef ChooseClockType::type Clock; + typedef std::chrono::duration + FPSeconds; + static_assert(std::chrono::treat_as_floating_point::value, + "This type must be treated as a floating point type."); + auto now = Clock::now().time_since_epoch(); + return std::chrono::duration_cast(now).count(); +} + +bool UseCpuCycleClock() { + bool useWallTime = !CpuScalingEnabled(); + if (useWallTime) { + VLOG(1) << "Using the CPU cycle clock to provide walltime::Now().\n"; + } else { + VLOG(1) << "Using std::chrono to provide walltime::Now().\n"; + } + return useWallTime; +} + + +} // end anonymous namespace + +// WallTimeImp doesn't work when CPU Scaling is enabled. If CPU Scaling is +// enabled at the start of the program then std::chrono::system_clock is used +// instead. +WallTime Now() +{ + static bool useCPUClock = UseCpuCycleClock(); + if (useCPUClock) { + return CPUWalltimeNow(); + } else { + return ChronoWalltimeNow(); + } +} + +} // end namespace walltime + + +namespace { + +std::string DateTimeString(bool local) { + typedef std::chrono::system_clock Clock; + std::time_t now = Clock::to_time_t(Clock::now()); + char storage[128]; + + std::tm timeinfo; + std::memset(&timeinfo, 0, sizeof(std::tm)); + if (local) { + localtime_r(&now, &timeinfo); + } else { + gmtime_r(&now, &timeinfo); + } + std::size_t written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo); + CHECK(written < arraysize(storage)); + ((void)written); // prevent unused variable in optimized mode. + return std::string(storage); +} + +} // end namespace + +std::string LocalDateTimeString() { + return DateTimeString(true); +} + +} // end namespace benchmark diff --git a/benchmark/src/walltime.h b/benchmark/src/walltime.h new file mode 100644 index 00000000..38c26f33 --- /dev/null +++ b/benchmark/src/walltime.h @@ -0,0 +1,17 @@ +#ifndef BENCHMARK_WALLTIME_H_ +#define BENCHMARK_WALLTIME_H_ + +#include + +namespace benchmark { +typedef double WallTime; + +namespace walltime { +WallTime Now(); +} // end namespace walltime + +std::string LocalDateTimeString(); + +} // end namespace benchmark + +#endif // BENCHMARK_WALLTIME_H_ diff --git a/benchmark/test/CMakeLists.txt b/benchmark/test/CMakeLists.txt new file mode 100644 index 00000000..7e4f4854 --- /dev/null +++ b/benchmark/test/CMakeLists.txt @@ -0,0 +1,89 @@ +# Enable the tests + +find_package(Threads REQUIRED) + +set(CXX03_FLAGS "${CMAKE_CXX_FLAGS}") +string(REPLACE "-std=c++11" "-std=c++03" CXX03_FLAGS "${CXX03_FLAGS}") +string(REPLACE "-std=c++0x" "-std=c++03" CXX03_FLAGS "${CXX03_FLAGS}") + +macro(compile_benchmark_test name) + add_executable(${name} "${name}.cc") + target_link_libraries(${name} benchmark ${CMAKE_THREAD_LIBS_INIT}) +endmacro(compile_benchmark_test) + +# Demonstration executable +compile_benchmark_test(benchmark_test) +add_test(benchmark benchmark_test --benchmark_min_time=0.01) + +compile_benchmark_test(filter_test) +macro(add_filter_test name filter expect) + add_test(${name} filter_test --benchmark_min_time=0.01 --benchmark_filter=${filter} ${expect}) +endmacro(add_filter_test) + +add_filter_test(filter_simple "Foo" 3) +add_filter_test(filter_suffix "BM_.*" 4) +add_filter_test(filter_regex_all ".*" 5) +add_filter_test(filter_regex_blank "" 5) +add_filter_test(filter_regex_none "monkey" 0) +add_filter_test(filter_regex_wildcard ".*Foo.*" 3) +add_filter_test(filter_regex_begin "^BM_.*" 4) +add_filter_test(filter_regex_begin2 "^N" 1) +add_filter_test(filter_regex_end ".*Ba$" 1) + +compile_benchmark_test(options_test) +add_test(options_benchmarks options_test --benchmark_min_time=0.01) + +compile_benchmark_test(basic_test) +add_test(basic_benchmark basic_test --benchmark_min_time=0.01) + +compile_benchmark_test(fixture_test) +add_test(fixture_test fixture_test --benchmark_min_time=0.01) + +compile_benchmark_test(cxx03_test) +set_target_properties(cxx03_test + PROPERTIES COMPILE_FLAGS "${CXX03_FLAGS}") +add_test(cxx03 cxx03_test --benchmark_min_time=0.01) + +# Add the coverage command(s) +if(CMAKE_BUILD_TYPE) + string(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_LOWER) +endif() +if (${CMAKE_BUILD_TYPE_LOWER} MATCHES "coverage") + find_program(GCOV gcov) + find_program(LCOV lcov) + find_program(GENHTML genhtml) + find_program(CTEST ctest) + if (GCOV AND LCOV AND GENHTML AND CTEST AND HAVE_CXX_FLAG_COVERAGE) + add_custom_command( + OUTPUT ${CMAKE_BINARY_DIR}/lcov/index.html + COMMAND ${LCOV} -q -z -d . + COMMAND ${LCOV} -q --no-external -c -b "${CMAKE_SOURCE_DIR}" -d . -o before.lcov -i + COMMAND ${CTEST} --force-new-ctest-process + COMMAND ${LCOV} -q --no-external -c -b "${CMAKE_SOURCE_DIR}" -d . -o after.lcov + COMMAND ${LCOV} -q -a before.lcov -a after.lcov --output-file final.lcov + COMMAND ${LCOV} -q -r final.lcov "'${CMAKE_SOURCE_DIR}/test/*'" -o final.lcov + COMMAND ${GENHTML} final.lcov -o lcov --demangle-cpp --sort -p "${CMAKE_BINARY_DIR}" -t benchmark + DEPENDS filter_test benchmark_test options_test basic_test fixture_test cxx03_test + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMENT "Running LCOV" + ) + add_custom_target(coverage + DEPENDS ${CMAKE_BINARY_DIR}/lcov/index.html + COMMENT "LCOV report at lcov/index.html" + ) + message(STATUS "Coverage command added") + else() + if (HAVE_CXX_FLAG_COVERAGE) + set(CXX_FLAG_COVERAGE_MESSAGE supported) + else() + set(CXX_FLAG_COVERAGE_MESSAGE unavailable) + endif() + message(WARNING + "Coverage not available:\n" + " gcov: ${GCOV}\n" + " lcov: ${LCOV}\n" + " genhtml: ${GENHTML}\n" + " ctest: ${CTEST}\n" + " --coverage flag: ${CXX_FLAG_COVERAGE_MESSAGE}") + endif() +endif() diff --git a/benchmark/test/basic_test.cc b/benchmark/test/basic_test.cc new file mode 100644 index 00000000..e3d114ad --- /dev/null +++ b/benchmark/test/basic_test.cc @@ -0,0 +1,104 @@ + +#include + +#include "benchmark/benchmark_api.h" + +#define BASIC_BENCHMARK_TEST(x) \ + BENCHMARK(x)->Arg(8)->Arg(512)->Arg(8192) + +void BM_empty(benchmark::State& state) { + while (state.KeepRunning()) { + benchmark::DoNotOptimize(state.iterations()); + } +} +BENCHMARK(BM_empty); +BENCHMARK(BM_empty)->ThreadPerCpu(); + +void BM_spin_empty(benchmark::State& state) { + while (state.KeepRunning()) { + for (int x = 0; x < state.range_x(); ++x) { + benchmark::DoNotOptimize(x); + } + } +} +BASIC_BENCHMARK_TEST(BM_spin_empty); +BASIC_BENCHMARK_TEST(BM_spin_empty)->ThreadPerCpu(); + +void BM_spin_pause_before(benchmark::State& state) { + for (int i = 0; i < state.range_x(); ++i) { + benchmark::DoNotOptimize(i); + } + while(state.KeepRunning()) { + for (int i = 0; i < state.range_x(); ++i) { + benchmark::DoNotOptimize(i); + } + } +} +BASIC_BENCHMARK_TEST(BM_spin_pause_before); +BASIC_BENCHMARK_TEST(BM_spin_pause_before)->ThreadPerCpu(); + + +void BM_spin_pause_during(benchmark::State& state) { + while(state.KeepRunning()) { + state.PauseTiming(); + for (int i = 0; i < state.range_x(); ++i) { + benchmark::DoNotOptimize(i); + } + state.ResumeTiming(); + for (int i = 0; i < state.range_x(); ++i) { + benchmark::DoNotOptimize(i); + } + } +} +BASIC_BENCHMARK_TEST(BM_spin_pause_during); +BASIC_BENCHMARK_TEST(BM_spin_pause_during)->ThreadPerCpu(); + +void BM_pause_during(benchmark::State& state) { + while(state.KeepRunning()) { + state.PauseTiming(); + state.ResumeTiming(); + } +} +BENCHMARK(BM_pause_during); +BENCHMARK(BM_pause_during)->ThreadPerCpu(); +BENCHMARK(BM_pause_during)->UseRealTime(); +BENCHMARK(BM_pause_during)->UseRealTime()->ThreadPerCpu(); + +void BM_spin_pause_after(benchmark::State& state) { + while(state.KeepRunning()) { + for (int i = 0; i < state.range_x(); ++i) { + benchmark::DoNotOptimize(i); + } + } + for (int i = 0; i < state.range_x(); ++i) { + benchmark::DoNotOptimize(i); + } +} +BASIC_BENCHMARK_TEST(BM_spin_pause_after); +BASIC_BENCHMARK_TEST(BM_spin_pause_after)->ThreadPerCpu(); + + +void BM_spin_pause_before_and_after(benchmark::State& state) { + for (int i = 0; i < state.range_x(); ++i) { + benchmark::DoNotOptimize(i); + } + while(state.KeepRunning()) { + for (int i = 0; i < state.range_x(); ++i) { + benchmark::DoNotOptimize(i); + } + } + for (int i = 0; i < state.range_x(); ++i) { + benchmark::DoNotOptimize(i); + } +} +BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after); +BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after)->ThreadPerCpu(); + + +void BM_empty_stop_start(benchmark::State& state) { + while (state.KeepRunning()) { } +} +BENCHMARK(BM_empty_stop_start); +BENCHMARK(BM_empty_stop_start)->ThreadPerCpu(); + +BENCHMARK_MAIN() diff --git a/benchmark/test/benchmark_test.cc b/benchmark/test/benchmark_test.cc new file mode 100644 index 00000000..a23f82f3 --- /dev/null +++ b/benchmark/test/benchmark_test.cc @@ -0,0 +1,154 @@ +#include "benchmark/benchmark.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__GNUC__) +# define BENCHMARK_NOINLINE __attribute__((noinline)) +#else +# define BENCHMARK_NOINLINE +#endif + +namespace { + +int BENCHMARK_NOINLINE Factorial(uint32_t n) { + return (n == 1) ? 1 : n * Factorial(n - 1); +} + +double CalculatePi(int depth) { + double pi = 0.0; + for (int i = 0; i < depth; ++i) { + double numerator = static_cast(((i % 2) * 2) - 1); + double denominator = static_cast((2 * i) - 1); + pi += numerator / denominator; + } + return (pi - 1.0) * 4; +} + +std::set ConstructRandomSet(int size) { + std::set s; + for (int i = 0; i < size; ++i) + s.insert(i); + return s; +} + +std::mutex test_vector_mu; +std::vector* test_vector = nullptr; + +} // end namespace + +static void BM_Factorial(benchmark::State& state) { + int fac_42 = 0; + while (state.KeepRunning()) + fac_42 = Factorial(8); + // Prevent compiler optimizations + std::stringstream ss; + ss << fac_42; + state.SetLabel(ss.str()); +} +BENCHMARK(BM_Factorial); +BENCHMARK(BM_Factorial)->UseRealTime(); + +static void BM_CalculatePiRange(benchmark::State& state) { + double pi = 0.0; + while (state.KeepRunning()) + pi = CalculatePi(state.range_x()); + std::stringstream ss; + ss << pi; + state.SetLabel(ss.str()); +} +BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024); + +static void BM_CalculatePi(benchmark::State& state) { + static const int depth = 1024; + while (state.KeepRunning()) { + benchmark::DoNotOptimize(CalculatePi(depth)); + } +} +BENCHMARK(BM_CalculatePi)->Threads(8); +BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32); +BENCHMARK(BM_CalculatePi)->ThreadPerCpu(); + +static void BM_SetInsert(benchmark::State& state) { + while (state.KeepRunning()) { + state.PauseTiming(); + std::set data = ConstructRandomSet(state.range_x()); + state.ResumeTiming(); + for (int j = 0; j < state.range_y(); ++j) + data.insert(rand()); + } + state.SetItemsProcessed(state.iterations() * state.range_y()); + state.SetBytesProcessed(state.iterations() * state.range_y() * sizeof(int)); +} +BENCHMARK(BM_SetInsert)->RangePair(1<<10,8<<10, 1,10); + +template +static void BM_Sequential(benchmark::State& state) { + ValueType v = 42; + while (state.KeepRunning()) { + Container c; + for (int i = state.range_x(); --i; ) + c.push_back(v); + } + const int64_t items_processed = + static_cast(state.iterations()) * state.range_x(); + state.SetItemsProcessed(items_processed); + state.SetBytesProcessed(items_processed * sizeof(v)); +} +BENCHMARK_TEMPLATE2(BM_Sequential, std::vector, int)->Range(1 << 0, 1 << 10); +BENCHMARK_TEMPLATE(BM_Sequential, std::list)->Range(1 << 0, 1 << 10); +// Test the variadic version of BENCHMARK_TEMPLATE in C++11 and beyond. +#if __cplusplus >= 201103L +BENCHMARK_TEMPLATE(BM_Sequential, std::vector, int)->Arg(512); +#endif + +static void BM_StringCompare(benchmark::State& state) { + std::string s1(state.range_x(), '-'); + std::string s2(state.range_x(), '-'); + while (state.KeepRunning()) + benchmark::DoNotOptimize(s1.compare(s2)); +} +BENCHMARK(BM_StringCompare)->Range(1, 1<<20); + +static void BM_SetupTeardown(benchmark::State& state) { + if (state.thread_index == 0) { + // No need to lock test_vector_mu here as this is running single-threaded. + test_vector = new std::vector(); + } + int i = 0; + while (state.KeepRunning()) { + std::lock_guard l(test_vector_mu); + if (i%2 == 0) + test_vector->push_back(i); + else + test_vector->pop_back(); + ++i; + } + if (state.thread_index == 0) { + delete test_vector; + } +} +BENCHMARK(BM_SetupTeardown)->ThreadPerCpu(); + +static void BM_LongTest(benchmark::State& state) { + double tracker = 0.0; + while (state.KeepRunning()) { + for (int i = 0; i < state.range_x(); ++i) + benchmark::DoNotOptimize(tracker += i); + } +} +BENCHMARK(BM_LongTest)->Range(1<<16,1<<28); + +BENCHMARK_MAIN() + diff --git a/benchmark/test/cxx03_test.cc b/benchmark/test/cxx03_test.cc new file mode 100644 index 00000000..56779d66 --- /dev/null +++ b/benchmark/test/cxx03_test.cc @@ -0,0 +1,31 @@ + +#include + +#include "benchmark/benchmark.h" + +#if __cplusplus >= 201103L +#error C++11 or greater detected. Should be C++03. +#endif + +void BM_empty(benchmark::State& state) { + while (state.KeepRunning()) { + volatile std::size_t x = state.iterations(); + ((void)x); + } +} +BENCHMARK(BM_empty); + +template +void BM_template2(benchmark::State& state) { + BM_empty(state); +} +BENCHMARK_TEMPLATE2(BM_template2, int, long); + +template +void BM_template1(benchmark::State& state) { + BM_empty(state); +} +BENCHMARK_TEMPLATE(BM_template1, long); +BENCHMARK_TEMPLATE1(BM_template1, int); + +BENCHMARK_MAIN() diff --git a/benchmark/test/filter_test.cc b/benchmark/test/filter_test.cc new file mode 100644 index 00000000..e23b9629 --- /dev/null +++ b/benchmark/test/filter_test.cc @@ -0,0 +1,85 @@ +#include "benchmark/benchmark.h" + +#include +#include +#include + +#include +#include +#include +#include + +namespace { + +class TestReporter : public benchmark::ConsoleReporter { + public: + virtual bool ReportContext(const Context& context) { + return ConsoleReporter::ReportContext(context); + }; + + virtual void ReportRuns(const std::vector& report) { + ++count_; + ConsoleReporter::ReportRuns(report); + }; + + TestReporter() : count_(0) {} + + virtual ~TestReporter() {} + + size_t GetCount() const { + return count_; + } + + private: + mutable size_t count_; +}; + +} // end namespace + + +static void NoPrefix(benchmark::State& state) { + while (state.KeepRunning()) {} +} +BENCHMARK(NoPrefix); + +static void BM_Foo(benchmark::State& state) { + while (state.KeepRunning()) {} +} +BENCHMARK(BM_Foo); + + +static void BM_Bar(benchmark::State& state) { + while (state.KeepRunning()) {} +} +BENCHMARK(BM_Bar); + + +static void BM_FooBar(benchmark::State& state) { + while (state.KeepRunning()) {} +} +BENCHMARK(BM_FooBar); + + +static void BM_FooBa(benchmark::State& state) { + while (state.KeepRunning()) {} +} +BENCHMARK(BM_FooBa); + + + +int main(int argc, const char* argv[]) { + benchmark::Initialize(&argc, argv); + + TestReporter test_reporter; + benchmark::RunSpecifiedBenchmarks(&test_reporter); + + // Make sure we ran all of the tests + const size_t count = test_reporter.GetCount(); + const size_t expected = (argc == 2) ? std::stoul(argv[1]) : count; + if (count != expected) { + std::cerr << "ERROR: Expected " << expected << " tests to be ran but only " + << count << " completed" << std::endl; + return -1; + } +} + diff --git a/benchmark/test/fixture_test.cc b/benchmark/test/fixture_test.cc new file mode 100644 index 00000000..8aea6ef0 --- /dev/null +++ b/benchmark/test/fixture_test.cc @@ -0,0 +1,42 @@ + +#include "benchmark/benchmark.h" + +#include + +class MyFixture : public ::benchmark::Fixture +{ +public: + void SetUp() { + data = new int(42); + } + + void TearDown() { + assert(data != nullptr); + delete data; + data = nullptr; + } + + ~MyFixture() { + assert(data == nullptr); + } + + int* data; +}; + + +BENCHMARK_F(MyFixture, Foo)(benchmark::State& st) { + assert(data != nullptr); + assert(*data == 42); + while (st.KeepRunning()) { + } +} + +BENCHMARK_DEFINE_F(MyFixture, Bar)(benchmark::State& st) { + while (st.KeepRunning()) { + } + st.SetItemsProcessed(st.range_x()); +} +BENCHMARK_REGISTER_F(MyFixture, Bar)->Arg(42); + + +BENCHMARK_MAIN() diff --git a/benchmark/test/options_test.cc b/benchmark/test/options_test.cc new file mode 100644 index 00000000..ec8c2a16 --- /dev/null +++ b/benchmark/test/options_test.cc @@ -0,0 +1,18 @@ +#include "benchmark/benchmark_api.h" + +void BM_basic(benchmark::State& state) { + while (state.KeepRunning()) { + } +} +BENCHMARK(BM_basic); +BENCHMARK(BM_basic)->Arg(42); +BENCHMARK(BM_basic)->Range(1, 8); +BENCHMARK(BM_basic)->DenseRange(10, 15); +BENCHMARK(BM_basic)->ArgPair(42, 42); +BENCHMARK(BM_basic)->RangePair(64, 512, 64, 512); +BENCHMARK(BM_basic)->MinTime(0.7); +BENCHMARK(BM_basic)->UseRealTime(); +BENCHMARK(BM_basic)->ThreadRange(2, 4); +BENCHMARK(BM_basic)->ThreadPerCpu(); + +BENCHMARK_MAIN()