diff -Nru benchmark-1.6.1/AUTHORS benchmark-1.7.1/AUTHORS --- benchmark-1.6.1/AUTHORS 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/AUTHORS 2022-11-11 14:01:03.000000000 +0000 @@ -13,6 +13,7 @@ Andriy Berestovskyy Arne Beer Carto +Cezary Skrzyński Christian Wassermann Christopher Seymour Colin Braley @@ -49,9 +50,12 @@ Oleksandr Sochka Ori Livneh Paul Redmond +Raghu Raja Radoslav Yovchev +Rainer Orth Roman Lebedev Sayan Bhattacharjee +Shapr3D Shuo Chen Staffan Tjernstrom Steinar H. Gunderson diff -Nru benchmark-1.6.1/bindings/python/build_defs.bzl benchmark-1.7.1/bindings/python/build_defs.bzl --- benchmark-1.6.1/bindings/python/build_defs.bzl 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/bindings/python/build_defs.bzl 2022-11-11 14:01:03.000000000 +0000 @@ -8,8 +8,8 @@ shared_lib_name = name + shared_lib_suffix native.cc_binary( name = shared_lib_name, - linkshared = 1, - linkstatic = 1, + linkshared = True, + linkstatic = True, srcs = srcs + hdrs, copts = copts, features = features, diff -Nru benchmark-1.6.1/bindings/python/google_benchmark/benchmark.cc benchmark-1.7.1/bindings/python/google_benchmark/benchmark.cc --- benchmark-1.6.1/bindings/python/google_benchmark/benchmark.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/bindings/python/google_benchmark/benchmark.cc 2022-11-11 14:01:03.000000000 +0000 @@ -1,5 +1,7 @@ // Benchmark for Python. +#include "benchmark/benchmark.h" + #include #include #include @@ -9,8 +11,6 @@ #include "pybind11/stl.h" #include "pybind11/stl_bind.h" -#include "benchmark/benchmark.h" - PYBIND11_MAKE_OPAQUE(benchmark::UserCounters); namespace { @@ -95,6 +95,8 @@ .def("range_multiplier", &Benchmark::RangeMultiplier, py::return_value_policy::reference) .def("min_time", &Benchmark::MinTime, py::return_value_policy::reference) + .def("min_warmup_time", &Benchmark::MinWarmUpTime, + py::return_value_policy::reference) .def("iterations", &Benchmark::Iterations, py::return_value_policy::reference) .def("repetitions", &Benchmark::Repetitions, @@ -165,7 +167,7 @@ &State::SetComplexityN) .def_property("items_processed", &State::items_processed, &State::SetItemsProcessed) - .def("set_label", (void(State::*)(const char*)) & State::SetLabel) + .def("set_label", (void (State::*)(const char*)) & State::SetLabel) .def("range", &State::range, py::arg("pos") = 0) .def_property_readonly("iterations", &State::iterations) .def_readwrite("counters", &State::counters) @@ -177,5 +179,6 @@ py::return_value_policy::reference); m.def("RunSpecifiedBenchmarks", []() { benchmark::RunSpecifiedBenchmarks(); }); + m.def("ClearRegisteredBenchmarks", benchmark::ClearRegisteredBenchmarks); }; } // namespace diff -Nru benchmark-1.6.1/bindings/python/google_benchmark/__init__.py benchmark-1.7.1/bindings/python/google_benchmark/__init__.py --- benchmark-1.6.1/bindings/python/google_benchmark/__init__.py 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/bindings/python/google_benchmark/__init__.py 2022-11-11 14:01:03.000000000 +0000 @@ -26,6 +26,7 @@ if __name__ == '__main__': benchmark.main() """ +import atexit from absl import app from google_benchmark import _benchmark @@ -44,6 +45,7 @@ oNLogN, oAuto, oLambda, + State, ) @@ -64,9 +66,10 @@ "oNLogN", "oAuto", "oLambda", + "State", ] -__version__ = "1.6.1" +__version__ = "1.7.1" class __OptionMaker: @@ -156,3 +159,4 @@ # Methods for use with custom main function. initialize = _benchmark.Initialize run_benchmarks = _benchmark.RunSpecifiedBenchmarks +atexit.register(_benchmark.ClearRegisteredBenchmarks) diff -Nru benchmark-1.6.1/BUILD.bazel benchmark-1.7.1/BUILD.bazel --- benchmark-1.6.1/BUILD.bazel 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/BUILD.bazel 2022-11-11 14:01:03.000000000 +0000 @@ -18,6 +18,14 @@ visibility = [":__subpackages__"], ) +config_setting( + name = "perfcounters", + define_values = { + "pfm": "1", + }, + visibility = [":__subpackages__"], +) + cc_library( name = "benchmark", srcs = glob( @@ -27,19 +35,36 @@ ], exclude = ["src/benchmark_main.cc"], ), - hdrs = ["include/benchmark/benchmark.h"], + hdrs = [ + "include/benchmark/benchmark.h", + "include/benchmark/export.h", + ], linkopts = select({ ":windows": ["-DEFAULTLIB:shlwapi.lib"], "//conditions:default": ["-pthread"], }), strip_include_prefix = "include", visibility = ["//visibility:public"], + # Only static linking is allowed; no .so will be produced. + # Using `defines` (i.e. not `local_defines`) means that no + # dependent rules need to bother about defining the macro. + linkstatic = True, + defines = [ + "BENCHMARK_STATIC_DEFINE", + ] + select({ + ":perfcounters": ["HAVE_LIBPFM"], + "//conditions:default": [], + }), + deps = select({ + ":perfcounters": ["@libpfm//:libpfm"], + "//conditions:default": [], + }), ) cc_library( name = "benchmark_main", srcs = ["src/benchmark_main.cc"], - hdrs = ["include/benchmark/benchmark.h"], + hdrs = ["include/benchmark/benchmark.h", "include/benchmark/export.h"], strip_include_prefix = "include", visibility = ["//visibility:public"], deps = [":benchmark"], diff -Nru benchmark-1.6.1/cmake/benchmark.pc.in benchmark-1.7.1/cmake/benchmark.pc.in --- benchmark-1.6.1/cmake/benchmark.pc.in 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/cmake/benchmark.pc.in 2022-11-11 14:01:03.000000000 +0000 @@ -1,7 +1,7 @@ prefix=@CMAKE_INSTALL_PREFIX@ exec_prefix=${prefix} -libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ -includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ +libdir=@CMAKE_INSTALL_FULL_LIBDIR@ +includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ Name: @PROJECT_NAME@ Description: Google microbenchmark framework diff -Nru benchmark-1.6.1/cmake/CXXFeatureCheck.cmake benchmark-1.7.1/cmake/CXXFeatureCheck.cmake --- benchmark-1.6.1/cmake/CXXFeatureCheck.cmake 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/cmake/CXXFeatureCheck.cmake 2022-11-11 14:01:03.000000000 +0000 @@ -17,6 +17,8 @@ endif() set(__cxx_feature_check INCLUDED) +option(CXXFEATURECHECK_DEBUG OFF) + function(cxx_feature_check FILE) string(TOLOWER ${FILE} FILE) string(TOUPPER ${FILE} VAR) @@ -27,18 +29,20 @@ return() endif() + set(FEATURE_CHECK_CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}) if (ARGC GREATER 1) message(STATUS "Enabling additional flags: ${ARGV1}") - list(APPEND BENCHMARK_CXX_LINKER_FLAGS ${ARGV1}) + list(APPEND FEATURE_CHECK_CMAKE_FLAGS ${ARGV1}) endif() if (NOT DEFINED COMPILE_${FEATURE}) - message(STATUS "Performing Test ${FEATURE}") if(CMAKE_CROSSCOMPILING) + message(STATUS "Cross-compiling to test ${FEATURE}") try_compile(COMPILE_${FEATURE} ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp - CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS} - LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}) + CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS} + LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES} + OUTPUT_VARIABLE COMPILE_OUTPUT_VAR) if(COMPILE_${FEATURE}) message(WARNING "If you see build failures due to cross compilation, try setting HAVE_${VAR} to 0") @@ -47,11 +51,12 @@ set(RUN_${FEATURE} 1 CACHE INTERNAL "") endif() else() - message(STATUS "Performing Test ${FEATURE}") + message(STATUS "Compiling and running to test ${FEATURE}") try_run(RUN_${FEATURE} COMPILE_${FEATURE} ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp - CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS} - LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}) + CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS} + LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES} + COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR) endif() endif() @@ -61,7 +66,11 @@ add_definitions(-DHAVE_${VAR}) else() if(NOT COMPILE_${FEATURE}) - message(STATUS "Performing Test ${FEATURE} -- failed to compile") + if(CXXFEATURECHECK_DEBUG) + message(STATUS "Performing Test ${FEATURE} -- failed to compile: ${COMPILE_OUTPUT_VAR}") + else() + message(STATUS "Performing Test ${FEATURE} -- failed to compile") + endif() else() message(STATUS "Performing Test ${FEATURE} -- compiled but failed to run") endif() diff -Nru benchmark-1.6.1/cmake/GoogleTest.cmake benchmark-1.7.1/cmake/GoogleTest.cmake --- benchmark-1.6.1/cmake/GoogleTest.cmake 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/cmake/GoogleTest.cmake 2022-11-11 14:01:03.000000000 +0000 @@ -30,7 +30,11 @@ include(${GOOGLETEST_PREFIX}/googletest-paths.cmake) # googletest doesn't seem to want to stay build warning clean so let's not hurt ourselves. -add_compile_options(-w) +if (MSVC) + add_compile_options(/wd4244 /wd4722) +else() + add_compile_options(-w) +endif() # Add googletest directly to our build. This defines # the gtest and gtest_main targets. diff -Nru benchmark-1.6.1/CMakeLists.txt benchmark-1.7.1/CMakeLists.txt --- benchmark-1.6.1/CMakeLists.txt 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/CMakeLists.txt 2022-11-11 14:01:03.000000000 +0000 @@ -1,4 +1,4 @@ -cmake_minimum_required (VERSION 3.5.1) +cmake_minimum_required (VERSION 3.16.3) foreach(p CMP0048 # OK to clear PROJECT_VERSION on project() @@ -6,6 +6,7 @@ CMP0056 # export EXE_LINKER_FLAGS to try_run CMP0057 # Support no if() IN_LIST operator CMP0063 # Honor visibility properties for all targets + CMP0067 # Honor language standard in try_compile() source file signature CMP0077 # Allow option() overrides in importing projects ) if(POLICY ${p}) @@ -13,7 +14,7 @@ endif() endforeach() -project (benchmark VERSION 1.6.1 LANGUAGES CXX) +project (benchmark VERSION 1.7.1 LANGUAGES CXX) option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON) option(BENCHMARK_ENABLE_EXCEPTIONS "Enable the use of exceptions in the benchmark library." ON) @@ -50,7 +51,10 @@ option(BENCHMARK_ENABLE_LIBPFM "Enable performance counters provided by libpfm" OFF) -set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) +# Export only public symbols +set(CMAKE_CXX_VISIBILITY_PRESET hidden) +set(CMAKE_VISIBILITY_INLINES_HIDDEN ON) + if(MSVC) # As of CMake 3.18, CMAKE_SYSTEM_PROCESSOR is not set properly for MSVC and # cross-compilation (e.g. Host=x86_64, target=aarch64) requires using the @@ -123,10 +127,10 @@ string(SUBSTRING ${VERSION} 0 1 GENERIC_LIB_SOVERSION) # Import our CMake modules -include(CheckCXXCompilerFlag) include(AddCXXCompilerFlag) -include(CXXFeatureCheck) +include(CheckCXXCompilerFlag) include(CheckLibraryExists) +include(CXXFeatureCheck) check_library_exists(rt shm_open "" HAVE_LIB_RT) @@ -135,6 +139,16 @@ endif() if (MSVC) + set(BENCHMARK_CXX_STANDARD 14) +else() + set(BENCHMARK_CXX_STANDARD 11) +endif() + +set(CMAKE_CXX_STANDARD ${BENCHMARK_CXX_STANDARD}) +set(CMAKE_CXX_STANDARD_REQUIRED YES) +set(CMAKE_CXX_EXTENSIONS OFF) + +if (MSVC) # Turn compiler warnings up to 11 string(REGEX REPLACE "[-/]W[1-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") @@ -166,17 +180,11 @@ set(CMAKE_EXE_LINKER_FLAGS_MINSIZEREL "${CMAKE_EXE_LINKER_FLAGS_MINSIZEREL} /LTCG") endif() else() - # Try and enable C++11. Don't use C++14 because it doesn't work in some - # configurations. - add_cxx_compiler_flag(-std=c++11) - if (NOT HAVE_CXX_FLAG_STD_CXX11) - add_cxx_compiler_flag(-std=c++0x) - endif() - # Turn compiler warnings up to 11 add_cxx_compiler_flag(-Wall) add_cxx_compiler_flag(-Wextra) add_cxx_compiler_flag(-Wshadow) + add_cxx_compiler_flag(-Wfloat-equal) if(BENCHMARK_ENABLE_WERROR) add_cxx_compiler_flag(-Werror RELEASE) add_cxx_compiler_flag(-Werror RELWITHDEBINFO) @@ -219,7 +227,7 @@ add_cxx_compiler_flag(-wd654) add_cxx_compiler_flag(-Wthread-safety) if (HAVE_CXX_FLAG_WTHREAD_SAFETY) - cxx_feature_check(THREAD_SAFETY_ATTRIBUTES) + cxx_feature_check(THREAD_SAFETY_ATTRIBUTES "-DINCLUDE_DIRECTORIES=${PROJECT_SOURCE_DIR}/include") endif() # On most UNIX like platforms g++ and clang++ define _GNU_SOURCE as a diff -Nru benchmark-1.6.1/CONTRIBUTORS benchmark-1.7.1/CONTRIBUTORS --- benchmark-1.6.1/CONTRIBUTORS 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/CONTRIBUTORS 2022-11-11 14:01:03.000000000 +0000 @@ -27,7 +27,9 @@ Alex Steele Andriy Berestovskyy Arne Beer +Bátor Tallér Billy Robert O'Neal III +Cezary Skrzyński Chris Kennelly Christian Wassermann Christopher Seymour @@ -71,6 +73,8 @@ Paul Redmond Pierre Phaneuf Radoslav Yovchev +Rainer Orth +Raghu Raja Raul Marin Ray Glover Robert Guo diff -Nru benchmark-1.6.1/debian/changelog benchmark-1.7.1/debian/changelog --- benchmark-1.6.1/debian/changelog 2022-01-16 17:16:14.000000000 +0000 +++ benchmark-1.7.1/debian/changelog 2023-09-18 16:49:22.000000000 +0000 @@ -1,3 +1,40 @@ +benchmark (1.7.1-1~22.04.sav0) jammy; urgency=medium + + * Backport to Jammy + + -- Rob Savoury Mon, 18 Sep 2023 09:49:22 -0700 + +benchmark (1.7.1-1) unstable; urgency=medium + + * Team upload. + * New upstream version 1.7.1 + + -- Timo Röhling Sun, 13 Nov 2022 11:31:04 +0100 + +benchmark (1.7.0-2) unstable; urgency=medium + + * Team upload. + * Upload to unstable + * Prevent debhelper from adding LTO flags + + -- Timo Röhling Sun, 21 Aug 2022 19:57:55 +0200 + +benchmark (1.7.0-1) experimental; urgency=medium + + * Team upload. + * New upstream version 1.7.0 + * Bump Standards-Version to 4.6.1 + * Bump SOVERSION after ABI breakage + + -- Timo Röhling Sun, 14 Aug 2022 23:35:06 +0200 + +benchmark (1.6.1-2) unstable; urgency=medium + + * Team upload. + * Fix FTBFS due to excessive vector size + + -- Timo Röhling Mon, 01 Aug 2022 20:40:09 +0200 + benchmark (1.6.1-1) unstable; urgency=medium * [e539e45] New upstream version 1.6.1 diff -Nru benchmark-1.6.1/debian/control benchmark-1.7.1/debian/control --- benchmark-1.6.1/debian/control 2022-01-16 17:15:35.000000000 +0000 +++ benchmark-1.7.1/debian/control 2022-11-13 10:27:22.000000000 +0000 @@ -7,8 +7,7 @@ Build-Depends: cmake, debhelper-compat (= 13), libgmock-dev, - googletest -Standards-Version: 4.6.0 +Standards-Version: 4.6.1 Vcs-Browser: https://salsa.debian.org/science-team/benchmark Vcs-Git: https://salsa.debian.org/science-team/benchmark.git Homepage: https://github.com/google/benchmark @@ -18,14 +17,14 @@ Architecture: any Multi-Arch: same Section: libdevel -Depends: libbenchmark1 (= ${binary:Version}), +Depends: libbenchmark1debian (= ${binary:Version}), ${misc:Depends} Description: Microbenchmark support library, development files Library to support the benchmarking of functions, similar to unit-tests. . This package contains the development files. -Package: libbenchmark1 +Package: libbenchmark1debian Architecture: any Multi-Arch: same Depends: ${misc:Depends}, @@ -43,7 +42,7 @@ Section: doc Depends: ${misc:Depends}, ${shlibs:Depends} -Suggests: python (>> 3) +Suggests: python3:any Pre-Depends: ${misc:Pre-Depends} Description: Microbenchmark support library, tools and documentation Library to support the benchmarking of functions, similar to unit-tests. diff -Nru benchmark-1.6.1/debian/libbenchmark1debian.install benchmark-1.7.1/debian/libbenchmark1debian.install --- benchmark-1.6.1/debian/libbenchmark1debian.install 1970-01-01 00:00:00.000000000 +0000 +++ benchmark-1.7.1/debian/libbenchmark1debian.install 2022-11-13 10:27:22.000000000 +0000 @@ -0,0 +1 @@ +usr/lib/*/libbenchmark*.so.* diff -Nru benchmark-1.6.1/debian/libbenchmark1.install benchmark-1.7.1/debian/libbenchmark1.install --- benchmark-1.6.1/debian/libbenchmark1.install 2022-01-16 17:14:02.000000000 +0000 +++ benchmark-1.7.1/debian/libbenchmark1.install 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -usr/lib/*/libbenchmark*.so.* diff -Nru benchmark-1.6.1/debian/patches/0001-Create-shared-lib.patch benchmark-1.7.1/debian/patches/0001-Create-shared-lib.patch --- benchmark-1.6.1/debian/patches/0001-Create-shared-lib.patch 2022-01-16 17:14:02.000000000 +0000 +++ benchmark-1.7.1/debian/patches/0001-Create-shared-lib.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,36 +0,0 @@ -From 1855c5f905958cefb7900f209cfe88575abc0bab Mon Sep 17 00:00:00 2001 -From: Anton Gladky -Date: Thu, 9 Nov 2017 21:01:38 +0100 -Subject: [PATCH] Create shared lib - ---- - CMakeLists.txt | 2 +- - src/CMakeLists.txt | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -Index: benchmark/CMakeLists.txt -=================================================================== ---- benchmark.orig/CMakeLists.txt -+++ benchmark/CMakeLists.txt -@@ -92,7 +92,7 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_C - - # Read the git tags to determine the project version - include(GetGitVersion) --get_git_version(GIT_VERSION) -+#get_git_version(GIT_VERSION) - - # If no git version can be determined, use the version - # from the project() command -Index: benchmark/src/CMakeLists.txt -=================================================================== ---- benchmark.orig/src/CMakeLists.txt -+++ benchmark/src/CMakeLists.txt -@@ -17,7 +17,7 @@ foreach(item ${BENCHMARK_MAIN}) - list(REMOVE_ITEM SOURCE_FILES "${item}") - endforeach() - --add_library(benchmark ${SOURCE_FILES}) -+add_library(benchmark SHARED ${SOURCE_FILES}) - add_library(benchmark::benchmark ALIAS benchmark) - set_target_properties(benchmark PROPERTIES - OUTPUT_NAME "benchmark" diff -Nru benchmark-1.6.1/debian/patches/0001-Do-not-read-Git-tags-to-determine-version.patch benchmark-1.7.1/debian/patches/0001-Do-not-read-Git-tags-to-determine-version.patch --- benchmark-1.6.1/debian/patches/0001-Do-not-read-Git-tags-to-determine-version.patch 1970-01-01 00:00:00.000000000 +0000 +++ benchmark-1.7.1/debian/patches/0001-Do-not-read-Git-tags-to-determine-version.patch 2022-11-13 10:30:30.000000000 +0000 @@ -0,0 +1,23 @@ +From: =?utf-8?q?Timo_R=C3=B6hling?= +Date: Sun, 14 Aug 2022 21:56:24 +0200 +Subject: Do not read Git tags to determine version + +--- + CMakeLists.txt | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 9ab265e..77d110c 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -109,8 +109,8 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") + + + # Read the git tags to determine the project version +-include(GetGitVersion) +-get_git_version(GIT_VERSION) ++#include(GetGitVersion) ++#get_git_version(GIT_VERSION) + + # If no git version can be determined, use the version + # from the project() command diff -Nru benchmark-1.6.1/debian/patches/0002-Fix-compilation-on-hppa.patch benchmark-1.7.1/debian/patches/0002-Fix-compilation-on-hppa.patch --- benchmark-1.6.1/debian/patches/0002-Fix-compilation-on-hppa.patch 1970-01-01 00:00:00.000000000 +0000 +++ benchmark-1.7.1/debian/patches/0002-Fix-compilation-on-hppa.patch 2022-11-13 10:30:30.000000000 +0000 @@ -0,0 +1,26 @@ +From: John David Anglin +Date: Sun, 14 Aug 2022 21:17:52 +0200 +Subject: Fix compilation on hppa + +Reviewed-By: Anton Gladky +Last-Update: 2018-11-17 +--- + src/cycleclock.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/src/cycleclock.h b/src/cycleclock.h +index df6ffa5..298b6ec 100644 +--- a/src/cycleclock.h ++++ b/src/cycleclock.h +@@ -108,6 +108,11 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { + int64_t itc; + asm("mov %0 = ar.itc" : "=r"(itc)); + return itc; ++#elif defined(__hppa__) ++ // Counter can only be read when PSW S bit is 0. ++ long itc; ++ asm("mfctl 16,%0" : "=r"(itc)); ++ return static_cast(itc); + #elif defined(COMPILER_MSVC) && defined(_M_IX86) + // Older MSVC compilers (like 7.x) don't seem to support the + // __rdtsc intrinsic properly, so I prefer to use _asm instead diff -Nru benchmark-1.6.1/debian/patches/0003-Fix-vector-size-to-reasonable-amount.patch benchmark-1.7.1/debian/patches/0003-Fix-vector-size-to-reasonable-amount.patch --- benchmark-1.6.1/debian/patches/0003-Fix-vector-size-to-reasonable-amount.patch 1970-01-01 00:00:00.000000000 +0000 +++ benchmark-1.7.1/debian/patches/0003-Fix-vector-size-to-reasonable-amount.patch 2022-11-13 10:30:30.000000000 +0000 @@ -0,0 +1,21 @@ +From: =?utf-8?q?Timo_R=C3=B6hling?= +Date: Mon, 1 Aug 2022 20:35:55 +0200 +Subject: Fix vector size to reasonable amount + +--- + test/benchmark_test.cc | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/test/benchmark_test.cc b/test/benchmark_test.cc +index 47023a7..952a3ff 100644 +--- a/test/benchmark_test.cc ++++ b/test/benchmark_test.cc +@@ -178,7 +178,7 @@ static void BM_ParallelMemset(benchmark::State& state) { + delete test_vector; + } + } +-BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4); ++BENCHMARK(BM_ParallelMemset)->Arg(1 << 20)->ThreadRange(1, 4); + + static void BM_ManualTiming(benchmark::State& state) { + int64_t slept_for = 0; diff -Nru benchmark-1.6.1/debian/patches/0004-Properly-enable-LTO.patch benchmark-1.7.1/debian/patches/0004-Properly-enable-LTO.patch --- benchmark-1.6.1/debian/patches/0004-Properly-enable-LTO.patch 1970-01-01 00:00:00.000000000 +0000 +++ benchmark-1.7.1/debian/patches/0004-Properly-enable-LTO.patch 2022-11-13 10:30:30.000000000 +0000 @@ -0,0 +1,52 @@ +From: =?utf-8?q?Timo_R=C3=B6hling?= +Date: Sun, 14 Aug 2022 22:00:29 +0200 +Subject: Properly enable LTO + +--- + CMakeLists.txt | 18 ------------------ + src/CMakeLists.txt | 3 +++ + 2 files changed, 3 insertions(+), 18 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 77d110c..51789db 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -242,24 +242,6 @@ else() + add_definitions(-D_QNX_SOURCE) + endif() + +- # Link time optimisation +- if (BENCHMARK_ENABLE_LTO) +- add_cxx_compiler_flag(-flto) +- add_cxx_compiler_flag(-Wno-lto-type-mismatch) +- if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") +- find_program(GCC_AR gcc-ar) +- if (GCC_AR) +- set(CMAKE_AR ${GCC_AR}) +- endif() +- find_program(GCC_RANLIB gcc-ranlib) +- if (GCC_RANLIB) +- set(CMAKE_RANLIB ${GCC_RANLIB}) +- endif() +- elseif("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") +- include(llvm-toolchain) +- endif() +- endif() +- + # Coverage build type + set(BENCHMARK_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_DEBUG}" + CACHE STRING "Flags used by the C++ compiler during coverage builds." +diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt +index 7f2c88b..3edd8a8 100644 +--- a/src/CMakeLists.txt ++++ b/src/CMakeLists.txt +@@ -24,6 +24,9 @@ set_target_properties(benchmark PROPERTIES + VERSION ${GENERIC_LIB_VERSION} + SOVERSION ${GENERIC_LIB_SOVERSION} + ) ++if(BENCHMARK_ENABLE_LTO) ++ set_target_properties(benchmark PROPERTIES INTERPROCEDURAL_OPTIMIZATION ON) ++endif() + target_include_directories(benchmark PUBLIC + $ + ) diff -Nru benchmark-1.6.1/debian/patches/0005-Build-benchmark_main-as-static-library.patch benchmark-1.7.1/debian/patches/0005-Build-benchmark_main-as-static-library.patch --- benchmark-1.6.1/debian/patches/0005-Build-benchmark_main-as-static-library.patch 1970-01-01 00:00:00.000000000 +0000 +++ benchmark-1.7.1/debian/patches/0005-Build-benchmark_main-as-static-library.patch 2022-11-13 10:30:30.000000000 +0000 @@ -0,0 +1,21 @@ +From: =?utf-8?q?Timo_R=C3=B6hling?= +Date: Sun, 14 Aug 2022 22:00:57 +0200 +Subject: Build benchmark_main as static library + +--- + src/CMakeLists.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt +index 3edd8a8..2338e47 100644 +--- a/src/CMakeLists.txt ++++ b/src/CMakeLists.txt +@@ -62,7 +62,7 @@ if (NOT BUILD_SHARED_LIBS) + endif() + + # Benchmark main library +-add_library(benchmark_main "benchmark_main.cc") ++add_library(benchmark_main STATIC "benchmark_main.cc") + add_library(benchmark::benchmark_main ALIAS benchmark_main) + set_target_properties(benchmark_main PROPERTIES + OUTPUT_NAME "benchmark_main" diff -Nru benchmark-1.6.1/debian/patches/0006-Bump-SOVERSION-after-ABI-breakage.patch benchmark-1.7.1/debian/patches/0006-Bump-SOVERSION-after-ABI-breakage.patch --- benchmark-1.6.1/debian/patches/0006-Bump-SOVERSION-after-ABI-breakage.patch 1970-01-01 00:00:00.000000000 +0000 +++ benchmark-1.7.1/debian/patches/0006-Bump-SOVERSION-after-ABI-breakage.patch 2022-11-13 10:30:30.000000000 +0000 @@ -0,0 +1,21 @@ +From: =?utf-8?q?Timo_R=C3=B6hling?= +Date: Sun, 14 Aug 2022 23:30:23 +0200 +Subject: Bump SOVERSION after ABI breakage + +--- + CMakeLists.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 51789db..ba8a8bd 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -124,7 +124,7 @@ message(STATUS "Version: ${VERSION}") + + # The version of the libraries + set(GENERIC_LIB_VERSION ${VERSION}) +-string(SUBSTRING ${VERSION} 0 1 GENERIC_LIB_SOVERSION) ++set(GENERIC_LIB_SOVERSION "1debian") + + # Import our CMake modules + include(AddCXXCompilerFlag) diff -Nru benchmark-1.6.1/debian/patches/03_fix_hppa_ftbfs.patch benchmark-1.7.1/debian/patches/03_fix_hppa_ftbfs.patch --- benchmark-1.6.1/debian/patches/03_fix_hppa_ftbfs.patch 2022-01-16 17:14:02.000000000 +0000 +++ benchmark-1.7.1/debian/patches/03_fix_hppa_ftbfs.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,21 +0,0 @@ -Description: Fix compilation on hppa -Author: John David Anglin -Reviewed-By: Anton Gladky -Last-Update: 2018-11-17 - -Index: benchmark/src/cycleclock.h -=================================================================== ---- benchmark.orig/src/cycleclock.h -+++ benchmark/src/cycleclock.h -@@ -108,6 +108,11 @@ inline BENCHMARK_ALWAYS_INLINE int64_t N - int64_t itc; - asm("mov %0 = ar.itc" : "=r"(itc)); - return itc; -+#elif defined(__hppa__) -+ // Counter can only be read when PSW S bit is 0. -+ long itc; -+ asm("mfctl 16,%0" : "=r"(itc)); -+ return static_cast(itc); - #elif defined(COMPILER_MSVC) && defined(_M_IX86) - // Older MSVC compilers (like 7.x) don't seem to support the - // __rdtsc intrinsic properly, so I prefer to use _asm instead diff -Nru benchmark-1.6.1/debian/patches/04_fix_lto-static-build.patch benchmark-1.7.1/debian/patches/04_fix_lto-static-build.patch --- benchmark-1.6.1/debian/patches/04_fix_lto-static-build.patch 2022-01-16 17:14:51.000000000 +0000 +++ benchmark-1.7.1/debian/patches/04_fix_lto-static-build.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,18 +0,0 @@ -Description: Fix static library build. -Author: Anton Gladky -Bug-Debian: https://bugs.debian.org/996956 -Last-Update: 2021-10-21 - -Index: benchmark/CMakeLists.txt -=================================================================== ---- benchmark.orig/CMakeLists.txt -+++ benchmark/CMakeLists.txt -@@ -236,7 +236,7 @@ else() - - # Link time optimisation - if (BENCHMARK_ENABLE_LTO) -- add_cxx_compiler_flag(-flto) -+ add_cxx_compiler_flag(-flto -ffat-lto-objects) - add_cxx_compiler_flag(-Wno-lto-type-mismatch) - if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - find_program(GCC_AR gcc-ar) diff -Nru benchmark-1.6.1/debian/patches/series benchmark-1.7.1/debian/patches/series --- benchmark-1.6.1/debian/patches/series 2022-01-16 17:14:02.000000000 +0000 +++ benchmark-1.7.1/debian/patches/series 2022-11-13 10:30:30.000000000 +0000 @@ -1,3 +1,6 @@ -0001-Create-shared-lib.patch -03_fix_hppa_ftbfs.patch -04_fix_lto-static-build.patch +0001-Do-not-read-Git-tags-to-determine-version.patch +0002-Fix-compilation-on-hppa.patch +0003-Fix-vector-size-to-reasonable-amount.patch +0004-Properly-enable-LTO.patch +0005-Build-benchmark_main-as-static-library.patch +0006-Bump-SOVERSION-after-ABI-breakage.patch diff -Nru benchmark-1.6.1/debian/rules benchmark-1.7.1/debian/rules --- benchmark-1.6.1/debian/rules 2022-01-16 17:14:02.000000000 +0000 +++ benchmark-1.7.1/debian/rules 2022-11-13 10:27:22.000000000 +0000 @@ -1,15 +1,16 @@ #!/usr/bin/make -f include /usr/share/dpkg/pkg-info.mk -export DEB_BUILD_MAINT_OPTIONS = hardening=+all + +# Upstream can handle LTO on its own, so disable the flags +export DEB_BUILD_MAINT_OPTIONS = hardening=+all optimize=-lto %: dh $@ --buildsystem=cmake override_dh_auto_configure: dh_auto_configure -- -DGIT_VERSION="$(DEB_VERSION_UPSTREAM)" \ - -DGOOGLETEST_PATH=/usr/src/googletest \ - -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_LTO=true - -override_dh_strip: - dh_strip -Xlibbenchmark_main.a + -DBUILD_SHARED_LIBS=ON \ + -DBENCHMARK_ENABLE_LTO=ON \ + -DBENCHMARK_USE_BUNDLED_GTEST=OFF \ + -DBENCHMARK_ENABLE_WERROR=OFF diff -Nru benchmark-1.6.1/debian/tests/fix_tests.patch benchmark-1.7.1/debian/tests/fix_tests.patch --- benchmark-1.6.1/debian/tests/fix_tests.patch 2022-01-16 17:14:03.000000000 +0000 +++ benchmark-1.7.1/debian/tests/fix_tests.patch 2022-11-13 10:27:22.000000000 +0000 @@ -1,47 +1,22 @@ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt -index 79cdf53..d66211a 100644 +index a49ab19..6eeaa2e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt -@@ -38,17 +38,17 @@ add_library(output_test_helper STATIC output_test_helper.cc output_test.h) +@@ -1,3 +1,8 @@ ++cmake_minimum_required(VERSION 3.9) ++project(benchmark_autopkgtest) ++find_package(benchmark REQUIRED) ++find_package(GTest REQUIRED) ++ + # Enable the tests - macro(compile_benchmark_test name) - add_executable(${name} "${name}.cc") -- target_link_libraries(${name} benchmark::benchmark ${CMAKE_THREAD_LIBS_INIT}) -+ target_link_libraries(${name} benchmark ${CMAKE_THREAD_LIBS_INIT}) - endmacro(compile_benchmark_test) - - macro(compile_benchmark_test_with_main name) - add_executable(${name} "${name}.cc") -- target_link_libraries(${name} benchmark::benchmark_main) -+ target_link_libraries(${name} benchmark_main) - endmacro(compile_benchmark_test_with_main) - - macro(compile_output_test name) - add_executable(${name} "${name}.cc" output_test.h) -- target_link_libraries(${name} output_test_helper benchmark::benchmark -+ target_link_libraries(${name} output_test_helper benchmark - ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) - endmacro(compile_output_test) - -@@ -119,8 +119,8 @@ add_test(NAME multiple_ranges_test COMMAND multiple_ranges_test --benchmark_min_ - compile_benchmark_test(args_product_test) - add_test(NAME args_product_test COMMAND args_product_test --benchmark_min_time=0.01) - --compile_benchmark_test_with_main(link_main_test) --add_test(NAME link_main_test COMMAND link_main_test --benchmark_min_time=0.01) -+#compile_benchmark_test_with_main(link_main_test) -+#add_test(NAME link_main_test COMMAND link_main_test --benchmark_min_time=0.01) - - compile_output_test(reporter_output_test) - add_test(NAME reporter_output_test COMMAND reporter_output_test --benchmark_min_time=0.01) -@@ -188,8 +188,8 @@ add_test(NAME complexity_benchmark COMMAND complexity_test --benchmark_min_time= - if (BENCHMARK_ENABLE_GTEST_TESTS) + set(THREADS_PREFER_PTHREAD_FLAG ON) +@@ -211,7 +216,7 @@ if (BENCHMARK_ENABLE_GTEST_TESTS) macro(compile_gtest name) add_executable(${name} "${name}.cc") -- target_link_libraries(${name} benchmark::benchmark + target_link_libraries(${name} benchmark::benchmark - gmock_main ${CMAKE_THREAD_LIBS_INIT}) -+ target_link_libraries(${name} benchmark -+ gmock_main gmock gtest gtest_main ${CMAKE_THREAD_LIBS_INIT}) ++ GTest::gmock_main ${CMAKE_THREAD_LIBS_INIT}) endmacro(compile_gtest) macro(add_gtest name) diff -Nru benchmark-1.6.1/docs/dependencies.md benchmark-1.7.1/docs/dependencies.md --- benchmark-1.6.1/docs/dependencies.md 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/docs/dependencies.md 2022-11-11 14:01:03.000000000 +0000 @@ -7,13 +7,15 @@ * The last two Ubuntu LTS releases Currently, this means using build tool versions that are available for Ubuntu -18.04 (Bionic Beaver), Ubuntu 20.04 (Focal Fossa), and Debian 11 (bullseye). +Ubuntu 20.04 (Focal Fossa), Ubuntu 22.04 (Jammy Jellyfish) and Debian 11.4 (bullseye). -_Note, CI also runs ubuntu-16.04 and ubuntu-14.04 to ensure best effort support -for older versions._ +_Note, CI also runs ubuntu-18.04 to attempt best effort support for older versions._ ## cmake -The current supported version is cmake 3.5.1 as of 2018-06-06. +The current supported version is cmake 3.16.3 as of 2022-08-10. + +* _3.10.2 (ubuntu 18.04)_ +* 3.16.3 (ubuntu 20.04) +* 3.18.4 (debian 11.4) +* 3.22.1 (ubuntu 22.04) -_Note, this version is also available for Ubuntu 14.04, an older Ubuntu LTS -release, as `cmake3`._ diff -Nru benchmark-1.6.1/docs/perf_counters.md benchmark-1.7.1/docs/perf_counters.md --- benchmark-1.6.1/docs/perf_counters.md 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/docs/perf_counters.md 2022-11-11 14:01:03.000000000 +0000 @@ -12,16 +12,17 @@ * The benchmark is run on an architecture featuring a Performance Monitoring Unit (PMU), * The benchmark is compiled with support for collecting counters. Currently, - this requires [libpfm](http://perfmon2.sourceforge.net/) be available at build - time + this requires [libpfm](http://perfmon2.sourceforge.net/), which is built as a + dependency via Bazel. The feature does not require modifying benchmark code. Counter collection is handled at the boundaries where timer collection is also handled. To opt-in: - -* Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`. -* Enable the cmake flag BENCHMARK_ENABLE_LIBPFM. +* If using a Bazel build, add `--define pfm=1` to your buid flags +* If using CMake: + * Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`. + * Enable the CMake flag `BENCHMARK_ENABLE_LIBPFM` in `CMakeLists.txt`. To use, pass a comma-separated list of counter names through the `--benchmark_perf_counters` flag. The names are decoded through libpfm - meaning, @@ -31,4 +32,4 @@ The counter values are reported back through the [User Counters](../README.md#custom-counters) mechanism, meaning, they are available in all the formats (e.g. JSON) supported -by User Counters. \ No newline at end of file +by User Counters. diff -Nru benchmark-1.6.1/docs/python_bindings.md benchmark-1.7.1/docs/python_bindings.md --- benchmark-1.6.1/docs/python_bindings.md 1970-01-01 00:00:00.000000000 +0000 +++ benchmark-1.7.1/docs/python_bindings.md 2022-11-11 14:01:03.000000000 +0000 @@ -0,0 +1,34 @@ +# Building and installing Python bindings + +Python bindings are available as wheels on [PyPI](https://pypi.org/project/google-benchmark/) for importing and +using Google Benchmark directly in Python. +Currently, pre-built wheels exist for macOS (both ARM64 and Intel x86), Linux x86-64 and 64-bit Windows. +Supported Python versions are Python 3.7 - 3.10. + +To install Google Benchmark's Python bindings, run: + +```bash +python -m pip install --upgrade pip # for manylinux2014 support +python -m pip install google-benchmark +``` + +In order to keep your system Python interpreter clean, it is advisable to run these commands in a virtual +environment. See the [official Python documentation](https://docs.python.org/3/library/venv.html) +on how to create virtual environments. + +To build a wheel directly from source, you can follow these steps: +```bash +git clone https://github.com/google/benchmark.git +cd benchmark +# create a virtual environment and activate it +python3 -m venv venv --system-site-packages +source venv/bin/activate # .\venv\Scripts\Activate.ps1 on Windows + +# upgrade Python's system-wide packages +python -m pip install --upgrade pip setuptools wheel +# builds the wheel and stores it in the directory "wheelhouse". +python -m pip wheel . -w wheelhouse +``` + +NB: Building wheels from source requires Bazel. For platform-specific instructions on how to install Bazel, +refer to the [Bazel installation docs](https://bazel.build/install). diff -Nru benchmark-1.6.1/docs/reducing_variance.md benchmark-1.7.1/docs/reducing_variance.md --- benchmark-1.6.1/docs/reducing_variance.md 1970-01-01 00:00:00.000000000 +0000 +++ benchmark-1.7.1/docs/reducing_variance.md 2022-11-11 14:01:03.000000000 +0000 @@ -0,0 +1,100 @@ +# Reducing Variance + + + +## Disabling CPU Frequency Scaling + +If you see this error: + +``` +***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. +``` + +you might want to disable the CPU frequency scaling while running the +benchmark, as well as consider other ways to stabilize the performance of +your system while benchmarking. + +See [Reducing Variance](reducing_variance.md) for more information. + +Exactly how to do this depends on the Linux distribution, +desktop environment, and installed programs. Specific details are a moving +target, so we will not attempt to exhaustively document them here. + +One simple option is to use the `cpupower` program to change the +performance governor to "performance". This tool is maintained along with +the Linux kernel and provided by your distribution. + +It must be run as root, like this: + +```bash +sudo cpupower frequency-set --governor performance +``` + +After this you can verify that all CPUs are using the performance governor +by running this command: + +```bash +cpupower frequency-info -o proc +``` + +The benchmarks you subsequently run will have less variance. + + + +## Reducing Variance in Benchmarks + +The Linux CPU frequency governor [discussed +above](user_guide#disabling-cpu-frequency-scaling) is not the only source +of noise in benchmarks. Some, but not all, of the sources of variance +include: + +1. On multi-core machines not all CPUs/CPU cores/CPU threads run the same + speed, so running a benchmark one time and then again may give a + different result depending on which CPU it ran on. +2. CPU scaling features that run on the CPU, like Intel's Turbo Boost and + AMD Turbo Core and Precision Boost, can temporarily change the CPU + frequency even when the using the "performance" governor on Linux. +3. Context switching between CPUs, or scheduling competition on the CPU the + benchmark is running on. +4. Intel Hyperthreading or AMD SMT causing the same issue as above. +5. Cache effects caused by code running on other CPUs. +6. Non-uniform memory architectures (NUMA). + +These can cause variance in benchmarks results within a single run +(`--benchmark_repetitions=N`) or across multiple runs of the benchmark +program. + +Reducing sources of variance is OS and architecture dependent, which is one +reason some companies maintain machines dedicated to performance testing. + +Some of the easier and and effective ways of reducing variance on a typical +Linux workstation are: + +1. Use the performance governer as [discussed +above](user_guide#disabling-cpu-frequency-scaling). +1. Disable processor boosting by: + ```sh + echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost + ``` + See the Linux kernel's + [boost.txt](https://www.kernel.org/doc/Documentation/cpu-freq/boost.txt) + for more information. +2. Set the benchmark program's task affinity to a fixed cpu. For example: + ```sh + taskset -c 0 ./mybenchmark + ``` +3. Disabling Hyperthreading/SMT. This can be done in the Bios or using the + `/sys` file system (see the LLVM project's [Benchmarking + tips](https://llvm.org/docs/Benchmarking.html)). +4. Close other programs that do non-trivial things based on timers, such as + your web browser, desktop environment, etc. +5. Reduce the working set of your benchmark to fit within the L1 cache, but + do be aware that this may lead you to optimize for an unrelistic + situation. + +Further resources on this topic: + +1. The LLVM project's [Benchmarking + tips](https://llvm.org/docs/Benchmarking.html). +1. The Arch Wiki [Cpu frequency +scaling](https://wiki.archlinux.org/title/CPU_frequency_scaling) page. diff -Nru benchmark-1.6.1/docs/releasing.md benchmark-1.7.1/docs/releasing.md --- benchmark-1.6.1/docs/releasing.md 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/docs/releasing.md 2022-11-11 14:01:03.000000000 +0000 @@ -1,7 +1,7 @@ # How to release * Make sure you're on main and synced to HEAD -* Ensure the project builds and tests run (sanity check only, obviously) +* Ensure the project builds and tests run * `parallel -j0 exec ::: test/*_test` can help ensure everything at least passes * Prepare release notes @@ -33,3 +33,5 @@ * `git pull --tags` * `git tag -a -f ` * `git push --force --tags origin` +* Confirm that the "Build and upload Python wheels" action runs to completion + * run it manually if it hasn't run diff -Nru benchmark-1.6.1/docs/user_guide.md benchmark-1.7.1/docs/user_guide.md --- benchmark-1.6.1/docs/user_guide.md 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/docs/user_guide.md 2022-11-11 14:01:03.000000000 +0000 @@ -50,14 +50,19 @@ [Custom Statistics](#custom-statistics) +[Memory Usage](#memory-usage) + [Using RegisterBenchmark](#using-register-benchmark) [Exiting with an Error](#exiting-with-an-error) [A Faster KeepRunning Loop](#a-faster-keep-running-loop) +## Benchmarking Tips + [Disabling CPU Frequency Scaling](#disabling-cpu-frequency-scaling) +[Reducing Variance in Benchmarks](reducing_variance.md) @@ -180,6 +185,12 @@ BM_memcpy/32k 1834 ns 1837 ns 357143 ``` +## Disabling Benchmarks + +It is possible to temporarily disable benchmarks by renaming the benchmark +function to have the prefix "DISABLED_". This will cause the benchmark to +be skipped at runtime. + ## Result comparison @@ -232,6 +243,19 @@ the minimum time, or the wallclock time is 5x minimum time. The minimum time is set per benchmark by calling `MinTime` on the registered benchmark object. +Furthermore warming up a benchmark might be necessary in order to get +stable results because of e.g caching effects of the code under benchmark. +Warming up means running the benchmark a given amount of time, before +results are actually taken into account. The amount of time for which +the warmup should be run can be set per benchmark by calling +`MinWarmUpTime` on the registered benchmark object or for all benchmarks +using the `--benchmark_min_warmup_time` command-line option. Note that +`MinWarmUpTime` will overwrite the value of `--benchmark_min_warmup_time` +for the single benchmark. How many iterations the warmup run of each +benchmark takes is determined the same way as described in the paragraph +above. Per default the warmup phase is set to 0 seconds and is therefore +disabled. + Average timings are then reported over the iterations run. If multiple repetitions are requested using the `--benchmark_repetitions` command-line option, or at registration time, the benchmark function will be run several @@ -293,7 +317,7 @@ delete[] src; delete[] dst; } -BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10); +BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(4<<10)->Arg(8<<10); ``` The preceding code is quite repetitive, and can be replaced with the following @@ -362,17 +386,14 @@ product of the two specified ranges and will generate a benchmark for each such pair. -{% raw %} ```c++ BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}}); ``` -{% endraw %} Some benchmarks may require specific argument values that cannot be expressed with `Ranges`. In this case, `ArgsProduct` offers the ability to generate a benchmark input for each combination in the product of the supplied vectors. -{% raw %} ```c++ BENCHMARK(BM_SetInsert) ->ArgsProduct({{1<<10, 3<<10, 8<<10}, {20, 40, 60, 80}}) @@ -391,7 +412,6 @@ ->Args({3<<10, 80}) ->Args({8<<10, 80}); ``` -{% endraw %} For the most common scenarios, helper methods for creating a list of integers for a given sparse or dense range are provided. @@ -434,13 +454,22 @@ should describe the values passed. ```c++ -template -void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) { - [...] +template +void BM_takes_args(benchmark::State& state, Args&&... args) { + auto args_tuple = std::make_tuple(std::move(args)...); + for (auto _ : state) { + std::cout << std::get<0>(args_tuple) << ": " << std::get<1>(args_tuple) + << '\n'; + [...] + } } // Registers a benchmark named "BM_takes_args/int_string_test" that passes -// the specified values to `extra_args`. +// the specified values to `args`. BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); + +// Registers the same benchmark "BM_takes_args/int_test" that passes +// the specified values to `args`. +BENCHMARK_CAPTURE(BM_takes_args, int_test, 42, 43); ``` Note that elements of `...args` may refer to global variables. Users should @@ -668,7 +697,6 @@ When you're compiling in C++11 mode or later you can use `insert()` with `std::initializer_list`: -{% raw %} ```c++ // With C++11, this can be done: state.counters.insert({{"Foo", numFoos}, {"Bar", numBars}, {"Baz", numBazs}}); @@ -677,7 +705,6 @@ state.counters["Bar"] = numBars; state.counters["Baz"] = numBazs; ``` -{% endraw %} ### Counter Reporting @@ -773,6 +800,16 @@ BENCHMARK(BM_MultiThreaded)->Threads(2); ``` +To run the benchmark across a range of thread counts, instead of `Threads`, use +`ThreadRange`. This takes two parameters (`min_threads` and `max_threads`) and +runs the benchmark once for values in the inclusive range. For example: + +```c++ +BENCHMARK(BM_MultiThreaded)->ThreadRange(1, 8); +``` + +will run `BM_MultiThreaded` with thread counts 1, 2, 4, and 8. + If the benchmarked code itself uses threads and you want to compare it to single-threaded code, you may want to use real-time ("wallclock") measurements for latency comparisons: @@ -836,7 +873,6 @@ that loop, every iteration, but without counting that time to the benchmark time. That is possible, although it is not recommended, since it has high overhead. -{% raw %} ```c++ static void BM_SetInsert_With_Timer_Control(benchmark::State& state) { std::set data; @@ -851,7 +887,6 @@ } BENCHMARK(BM_SetInsert_With_Timer_Control)->Ranges({{1<<10, 8<<10}, {128, 512}}); ``` -{% endraw %} @@ -906,6 +941,10 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); ``` +Additionally the default time unit can be set globally with the +`--benchmark_time_unit={ns|us|ms|s}` command line argument. The argument only +affects benchmarks where the time unit is not set explicitly. + ## Preventing Optimization @@ -1037,10 +1076,25 @@ BENCHMARK(BM_spin_empty) ->ComputeStatistics("ratio", [](const std::vector& v) -> double { return std::begin(v) / std::end(v); - }, benchmark::StatisticUnit::Percentage) + }, benchmark::StatisticUnit::kPercentage) ->Arg(512); ``` + + +## Memory Usage + +It's often useful to also track memory usage for benchmarks, alongside CPU +performance. For this reason, benchmark offers the `RegisterMemoryManager` +method that allows a custom `MemoryManager` to be injected. + +If set, the `MemoryManager::Start` and `MemoryManager::Stop` methods will be +called at the start and end of benchmark runs to allow user code to fill out +a report on the number of allocations, bytes used, etc. + +This data will then be reported alongside other performance data, currently +only when using JSON output. + ## Using RegisterBenchmark(name, fn, args...) @@ -1191,10 +1245,8 @@ ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ``` -you might want to disable the CPU frequency scaling while running the benchmark: +you might want to disable the CPU frequency scaling while running the +benchmark, as well as consider other ways to stabilize the performance of +your system while benchmarking. -```bash -sudo cpupower frequency-set --governor performance -./mybench -sudo cpupower frequency-set --governor powersave -``` +See [Reducing Variance](reducing_variance.md) for more information. diff -Nru benchmark-1.6.1/.github/install_bazel.sh benchmark-1.7.1/.github/install_bazel.sh --- benchmark-1.6.1/.github/install_bazel.sh 1970-01-01 00:00:00.000000000 +0000 +++ benchmark-1.7.1/.github/install_bazel.sh 2022-11-11 14:01:03.000000000 +0000 @@ -0,0 +1,13 @@ +if ! bazel version; then + arch=$(uname -m) + if [ "$arch" == "aarch64" ]; then + arch="arm64" + fi + echo "Installing wget and downloading $arch Bazel binary from GitHub releases." + yum install -y wget + wget "https://github.com/bazelbuild/bazel/releases/download/5.2.0/bazel-5.2.0-linux-$arch" -O /usr/local/bin/bazel + chmod +x /usr/local/bin/bazel +else + # bazel is installed for the correct architecture + exit 0 +fi diff -Nru benchmark-1.6.1/.github/.libcxx-setup.sh benchmark-1.7.1/.github/.libcxx-setup.sh --- benchmark-1.6.1/.github/.libcxx-setup.sh 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/.github/.libcxx-setup.sh 2022-11-11 14:01:03.000000000 +0000 @@ -10,14 +10,14 @@ # Build and install libc++ (Use unstable ABI for better sanitizer coverage) cd ./llvm-project -cmake -DCMAKE_C_COMPILER=${C_COMPILER} \ - -DCMAKE_CXX_COMPILER=${COMPILER} \ +cmake -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${CXX} \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_INSTALL_PREFIX=/usr \ -DLIBCXX_ABI_UNSTABLE=OFF \ -DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER} \ -DLLVM_BUILD_32_BITS=${BUILD_32_BITS} \ - -DLLVM_ENABLE_PROJECTS='libcxx;libcxxabi' \ + -DLLVM_ENABLE_RUNTIMES='libcxx;libcxxabi' \ -S llvm -B llvm-build -G "Unix Makefiles" make -C llvm-build -j3 cxx cxxabi sudo make -C llvm-build install-cxx install-cxxabi diff -Nru benchmark-1.6.1/.github/workflows/bazel.yml benchmark-1.7.1/.github/workflows/bazel.yml --- benchmark-1.6.1/.github/workflows/bazel.yml 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/.github/workflows/bazel.yml 2022-11-11 14:01:03.000000000 +0000 @@ -5,9 +5,14 @@ pull_request: {} jobs: - build-and-test: - runs-on: ubuntu-latest - + job: + name: bazel.${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-2022] + steps: - uses: actions/checkout@v1 @@ -17,9 +22,9 @@ cache-name: bazel-cache with: path: "~/.cache/bazel" - key: ${{ env.cache-name }}-${{ runner.os }}-${{ github.ref }} + key: ${{ env.cache-name }}-${{ matrix.os }}-${{ github.ref }} restore-keys: | - ${{ env.cache-name }}-${{ runner.os }}-main + ${{ env.cache-name }}-${{ matrix.os }}-main - name: build run: | diff -Nru benchmark-1.6.1/.github/workflows/build-and-test-perfcounters.yml benchmark-1.7.1/.github/workflows/build-and-test-perfcounters.yml --- benchmark-1.6.1/.github/workflows/build-and-test-perfcounters.yml 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/.github/workflows/build-and-test-perfcounters.yml 2022-11-11 14:01:03.000000000 +0000 @@ -14,7 +14,8 @@ strategy: fail-fast: false matrix: - os: [ubuntu-latest, ubuntu-20.04] + # ubuntu-18.04 is deprecated but included for best-effort + os: [ubuntu-22.04, ubuntu-20.04, ubuntu-18.04] build_type: ['Release', 'Debug'] steps: - uses: actions/checkout@v2 @@ -23,9 +24,10 @@ run: sudo apt -y install libpfm4-dev - name: setup cmake + if: matrix.os == 'ubuntu-18.04' uses: jwlawson/actions-setup-cmake@v1.9 with: - cmake-version: '3.5.1' + cmake-version: '3.16.3' - name: create build environment run: cmake -E make_directory ${{ runner.workspace }}/_build @@ -52,46 +54,3 @@ # working-directory: ${{ runner.workspace }}/_build # run: ctest -C ${{ matrix.build_type }} --rerun-failed --output-on-failure - ubuntu-16_04: - name: ubuntu-16.04.${{ matrix.build_type }} - runs-on: [ubuntu-latest] - strategy: - fail-fast: false - matrix: - build_type: ['Release', 'Debug'] - container: ubuntu:16.04 - steps: - - uses: actions/checkout@v2 - - - name: install required bits - run: | - apt update - apt -y install clang cmake g++ git - - - name: install libpfm - run: apt -y install libpfm4-dev - - - name: create build environment - run: cmake -E make_directory $GITHUB_WORKSPACE/_build - - - name: configure cmake - shell: bash - working-directory: ${{ github.workspace }}/_build - run: > - cmake $GITHUB_WORKSPACE - -DBENCHMARK_ENABLE_LIBPFM=1 - -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - - - name: build - shell: bash - working-directory: ${{ github.workspace }}/_build - run: cmake --build . --config ${{ matrix.build_type }} - - # Skip testing, for now. It seems perf_event_open does not succeed on the - # hosting machine, very likely a permissions issue. - # TODO(mtrofin): Enable test. - # - name: test - # shell: bash - # working-directory: ${{ runner.workspace }}/_build - # run: ctest -C ${{ matrix.build_type }} --rerun-failed --output-on-failure diff -Nru benchmark-1.6.1/.github/workflows/build-and-test.yml benchmark-1.7.1/.github/workflows/build-and-test.yml --- benchmark-1.6.1/.github/workflows/build-and-test.yml 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/.github/workflows/build-and-test.yml 2022-11-11 14:01:03.000000000 +0000 @@ -1,8 +1,10 @@ name: build-and-test on: - push: {} - pull_request: {} + push: + branches: [ main ] + pull_request: + branches: [ main ] jobs: # TODO: add 32-bit builds (g++ and clang++) for ubuntu @@ -10,36 +12,45 @@ # TODO: add coverage build (requires lcov) # TODO: add clang + libc++ builds for ubuntu job: - name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.compiler }} + name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.compiler }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - os: [ubuntu-latest, ubuntu-20.04, macos-latest] + # ubuntu-18.04 is deprecated but included for best-effort support + os: [ubuntu-22.04, ubuntu-20.04, ubuntu-18.04, macos-latest] build_type: ['Release', 'Debug'] compiler: [g++, clang++] - include: - - displayTargetName: windows-latest-release - os: windows-latest - build_type: 'Release' - - displayTargetName: windows-latest-debug - os: windows-latest - build_type: 'Debug' + lib: ['shared', 'static'] + steps: - uses: actions/checkout@v2 + - name: setup cmake + if: matrix.os == 'ubuntu-18.04' + uses: jwlawson/actions-setup-cmake@v1.9 + with: + cmake-version: '3.16.3' + - name: create build environment run: cmake -E make_directory ${{ runner.workspace }}/_build + - name: setup cmake initial cache + run: touch compiler-cache.cmake + - name: configure cmake env: CXX: ${{ matrix.compiler }} shell: bash working-directory: ${{ runner.workspace }}/_build run: > - cmake $GITHUB_WORKSPACE + cmake -C ${{ github.workspace }}/compiler-cache.cmake + $GITHUB_WORKSPACE -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + -DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + -DCMAKE_CXX_VISIBILITY_PRESET=hidden + -DCMAKE_VISIBILITY_INLINES_HIDDEN=ON - name: build shell: bash @@ -51,99 +62,55 @@ working-directory: ${{ runner.workspace }}/_build run: ctest -C ${{ matrix.build_type }} -VV - ubuntu-16_04: - name: ubuntu-16.04.${{ matrix.build_type }}.${{ matrix.compiler }} - runs-on: [ubuntu-latest] + msvc: + name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.msvc }} + runs-on: ${{ matrix.os }} + defaults: + run: + shell: powershell strategy: fail-fast: false matrix: - build_type: ['Release', 'Debug'] - compiler: [g++, clang++] - container: ubuntu:16.04 + msvc: + - VS-16-2019 + - VS-17-2022 + arch: + - x64 + build_type: + - Debug + - Release + lib: + - shared + - static + include: + - msvc: VS-16-2019 + os: windows-2019 + generator: 'Visual Studio 16 2019' + - msvc: VS-17-2022 + os: windows-2022 + generator: 'Visual Studio 17 2022' + steps: - uses: actions/checkout@v2 - - name: install required bits - run: | - apt update - apt -y install clang cmake g++ git - - - name: create build environment - run: cmake -E make_directory $GITHUB_WORKSPACE/_build - - name: configure cmake - env: - CXX: ${{ matrix.compiler }} - shell: bash - working-directory: ${{ github.workspace }}/_build run: > - cmake $GITHUB_WORKSPACE + cmake -S . -B _build/ + -A ${{ matrix.arch }} + -G "${{ matrix.generator }}" -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + -DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }} - name: build - shell: bash - working-directory: ${{ github.workspace }}/_build - run: cmake --build . --config ${{ matrix.build_type }} - - - name: test - shell: bash - working-directory: ${{ github.workspace }}/_build - run: ctest -C ${{ matrix.build_type }} -VV - - ubuntu-14_04: - name: ubuntu-14.04.${{ matrix.build_type }}.${{ matrix.compiler }} - runs-on: [ubuntu-latest] - strategy: - fail-fast: false - matrix: - build_type: ['Release', 'Debug'] - compiler: [g++-4.8, clang++-3.6] - include: - - compiler: g++-6 - build_type: 'Debug' - run_tests: true - - compiler: g++-6 - build_type: 'Release' - run_tests: true - container: ubuntu:14.04 - steps: - - uses: actions/checkout@v2 - - - name: install required bits - run: | - sudo apt update - sudo apt -y install clang-3.6 cmake3 g++-4.8 git - - - name: install other bits - if: ${{ matrix.compiler }} == g++-6 - run: | - sudo apt -y install software-properties-common - sudo add-apt-repository -y "ppa:ubuntu-toolchain-r/test" - sudo apt update - sudo apt -y install g++-6 - - - name: create build environment - run: cmake -E make_directory $GITHUB_WORKSPACE/_build + run: cmake --build _build/ --config ${{ matrix.build_type }} - - name: configure cmake - env: - CXX: ${{ matrix.compiler }} - shell: bash - working-directory: ${{ github.workspace }}/_build + - name: setup test environment + # Make sure gmock and benchmark DLLs can be found run: > - cmake $GITHUB_WORKSPACE - -DBENCHMARK_ENABLE_TESTING=${{ matrix.run_tests }} - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - -DBENCHMARK_DOWNLOAD_DEPENDENCIES=${{ matrix.run_tests }} - - - name: build - shell: bash - working-directory: ${{ github.workspace }}/_build - run: cmake --build . --config ${{ matrix.build_type }} + echo "$((Get-Item .).FullName)/_build/bin/${{ matrix.build_type }}" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append; + echo "$((Get-Item .).FullName)/_build/src/${{ matrix.build_type }}" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append; - name: test - if: ${{ matrix.run_tests }} - shell: bash - working-directory: ${{ github.workspace }}/_build - run: ctest -C ${{ matrix.build_type }} -VV + run: ctest --test-dir _build/ -C ${{ matrix.build_type }} -VV + + diff -Nru benchmark-1.6.1/.github/workflows/doxygen.yml benchmark-1.7.1/.github/workflows/doxygen.yml --- benchmark-1.6.1/.github/workflows/doxygen.yml 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/.github/workflows/doxygen.yml 2022-11-11 14:01:03.000000000 +0000 @@ -13,13 +13,15 @@ steps: - name: Fetching sources uses: actions/checkout@v2 + - name: Installing build dependencies run: | sudo apt update - sudo apt install cmake doxygen gcc git + sudo apt install doxygen gcc git + - name: Creating build directory - run: | - mkdir build + run: mkdir build + - name: Building HTML documentation with Doxygen run: | cmake -S . -B build -DBENCHMARK_ENABLE_TESTING:BOOL=OFF -DBENCHMARK_ENABLE_DOXYGEN:BOOL=ON -DBENCHMARK_INSTALL_DOCS:BOOL=ON diff -Nru benchmark-1.6.1/.github/workflows/pylint.yml benchmark-1.7.1/.github/workflows/pylint.yml --- benchmark-1.6.1/.github/workflows/pylint.yml 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/.github/workflows/pylint.yml 2022-11-11 14:01:03.000000000 +0000 @@ -17,10 +17,12 @@ uses: actions/setup-python@v1 with: python-version: 3.8 + - name: Install dependencies run: | python -m pip install --upgrade pip pip install pylint pylint-exit conan + - name: Run pylint run: | pylint `find . -name '*.py'|xargs` || pylint-exit $? diff -Nru benchmark-1.6.1/.github/workflows/sanitizer.yml benchmark-1.7.1/.github/workflows/sanitizer.yml --- benchmark-1.6.1/.github/workflows/sanitizer.yml 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/.github/workflows/sanitizer.yml 2022-11-11 14:01:03.000000000 +0000 @@ -45,11 +45,18 @@ echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all" >> $GITHUB_ENV echo "LIBCXX_SANITIZER=Thread" >> $GITHUB_ENV + - name: setup clang + if: matrix.compiler == 'clang' + uses: egor-tensin/setup-clang@v1 + with: + version: latest + platform: x64 + - name: configure clang if: matrix.compiler == 'clang' run: | - echo "CC=clang" >> $GITHUB_ENV - echo "CXX=clang++" >> $GITHUB_ENV + echo "CC=cc" >> $GITHUB_ENV + echo "CXX=c++" >> $GITHUB_ENV - name: configure gcc if: matrix.compiler == 'gcc' @@ -71,6 +78,7 @@ shell: bash working-directory: ${{ runner.workspace }}/_build run: > + VERBOSE=1 cmake $GITHUB_WORKSPACE -DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF -DBENCHMARK_ENABLE_LIBPFM=OFF diff -Nru benchmark-1.6.1/.github/workflows/wheels.yml benchmark-1.7.1/.github/workflows/wheels.yml --- benchmark-1.6.1/.github/workflows/wheels.yml 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/.github/workflows/wheels.yml 2022-11-11 14:01:03.000000000 +0000 @@ -12,10 +12,10 @@ runs-on: ubuntu-latest steps: - name: Check out repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Python 3.9 - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: 3.9 @@ -23,111 +23,57 @@ run: | python setup.py sdist - name: Upload sdist - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: dist path: dist/*.tar.gz - build_linux: - name: Build google-benchmark manylinux wheels - runs-on: ubuntu-latest + build_wheels: + name: Build Google Benchmark wheels on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] steps: - name: Check out Google Benchmark - uses: actions/checkout@v2 - - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - # TODO: Bazel does not seem to work in an emulated Docker environment, see - # https://github.com/bazelbuild/bazel/issues/11379 -# - name: Set up QEMU -# uses: docker/setup-qemu-action@v1 -# with: -# platforms: all - - - name: Build Python wheels on ubuntu-latest - env: - CIBW_BUILD: 'cp37-* cp38-* cp39-* cp310-*' - CIBW_SKIP: "*-musllinux_*" - # Bazel repo only exists on CentOS 7 for x86 and ppc, so no manylinux2010 - # TODO: Build ppc64le, aarch64 using some other trick - CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 - CIBW_ARCHS_LINUX: x86_64 - CIBW_BEFORE_ALL: > - curl -O --retry-delay 5 --retry 5 https://copr.fedorainfracloud.org/coprs/vbatts/bazel/repo/epel-7/vbatts-bazel-epel-7.repo && - cp vbatts-bazel-epel-7.repo /etc/yum.repos.d/bazel.repo && - yum install -y bazel4 - CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py - run: | - pip install cibuildwheel - python -m cibuildwheel --output-dir wheelhouse + uses: actions/checkout@v3 - - name: Upload Linux wheels - uses: actions/upload-artifact@v2 + - name: Set up QEMU + if: runner.os == 'Linux' + uses: docker/setup-qemu-action@v2 with: - name: dist - path: wheelhouse/*.whl - - build_macos: - name: Build google-benchmark macOS wheels - runs-on: macos-latest - - steps: - - name: Check out Google Benchmark - uses: actions/checkout@v2 - - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 + platforms: all - - name: Build Python wheels on macOS + - name: Build wheels on ${{ matrix.os }} using cibuildwheel + uses: pypa/cibuildwheel@v2.9.0 env: - CIBW_ARCHS_MACOS: "x86_64 arm64" - CIBW_BUILD: 'cp37-* cp38-* cp39-* cp310-*' - # ARM64 requires Python 3.8 minimum - CIBW_SKIP: 'cp37-*-arm64' + CIBW_BUILD: 'cp37-* cp38-* cp39-* cp310-* cp311-*' + CIBW_SKIP: "cp37-*-arm64 *-musllinux_*" + # TODO: Build ppc64le using some other trick + CIBW_ARCHS_LINUX: x86_64 aarch64 + CIBW_ARCHS_MACOS: x86_64 arm64 + CIBW_ARCHS_WINDOWS: AMD64 + CIBW_BEFORE_ALL_LINUX: bash .github/install_bazel.sh CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py - CIBW_TEST_SKIP: "*_arm64" - run: | - pip install cibuildwheel - python -m cibuildwheel --output-dir wheelhouse - - name: Upload macOS wheels - uses: actions/upload-artifact@v2 + - name: Upload Google Benchmark ${{ matrix.os }} wheels + uses: actions/upload-artifact@v3 with: name: dist - path: wheelhouse/*.whl - - build_windows: - name: Build google-benchmark wheels on Windows - runs-on: windows-latest + path: ./wheelhouse/*.whl + pypi_upload: + name: Publish google-benchmark wheels to PyPI + needs: [build_sdist, build_wheels] + runs-on: ubuntu-latest steps: - - name: Check out Google Benchmark - uses: actions/checkout@v2 - - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - - name: Build Python wheels on Windows - env: - CIBW_BUILD: 'cp37-* cp38-* cp39-* cp310-*' - CIBW_ARCHS_WINDOWS: AMD64 - # otherwise, pip crashes the job by trying to remove an in-use bazel DLL - PIP_NO_CLEAN: true - CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py - run: | - pip install cibuildwheel - python -m cibuildwheel --output-dir wheelhouse - - - name: Upload wheels - uses: actions/upload-artifact@v2 - with: - name: dist - path: wheelhouse/*.whl \ No newline at end of file + - uses: actions/download-artifact@v3 + with: + name: dist + path: dist + + - uses: pypa/gh-action-pypi-publish@v1.5.0 + with: + user: __token__ + password: ${{ secrets.PYPI_PASSWORD }} diff -Nru benchmark-1.6.1/include/benchmark/benchmark.h benchmark-1.7.1/include/benchmark/benchmark.h --- benchmark-1.6.1/include/benchmark/benchmark.h 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/include/benchmark/benchmark.h 2022-11-11 14:01:03.000000000 +0000 @@ -187,6 +187,8 @@ #include #include +#include "benchmark/export.h" + #if defined(BENCHMARK_HAS_CXX11) #include #include @@ -218,29 +220,18 @@ #if defined(__GNUC__) || defined(__clang__) #define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline)) -#define BENCHMARK_NOEXCEPT noexcept -#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) #elif defined(_MSC_VER) && !defined(__clang__) #define BENCHMARK_ALWAYS_INLINE __forceinline -#if _MSC_VER >= 1900 -#define BENCHMARK_NOEXCEPT noexcept -#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) -#else -#define BENCHMARK_NOEXCEPT -#define BENCHMARK_NOEXCEPT_OP(x) -#endif #define __func__ __FUNCTION__ #else #define BENCHMARK_ALWAYS_INLINE -#define BENCHMARK_NOEXCEPT -#define BENCHMARK_NOEXCEPT_OP(x) #endif #define BENCHMARK_INTERNAL_TOSTRING2(x) #x #define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x) // clang-format off -#if defined(__GNUC__) || defined(__clang__) +#if defined(__GNUC__) && !defined(__NVCC__) || defined(__clang__) #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y) #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) #define BENCHMARK_DISABLE_DEPRECATED_WARNING \ @@ -280,18 +271,41 @@ #define BENCHMARK_OVERRIDE #endif +#if defined(_MSC_VER) +#pragma warning(push) +// C4251: needs to have dll-interface to be used by clients of class +#pragma warning(disable : 4251) +#endif + namespace benchmark { class BenchmarkReporter; -void Initialize(int* argc, char** argv); -void Shutdown(); +BENCHMARK_EXPORT void PrintDefaultHelp(); + +BENCHMARK_EXPORT void Initialize(int* argc, char** argv, + void (*HelperPrinterf)() = PrintDefaultHelp); +BENCHMARK_EXPORT void Shutdown(); // Report to stdout all arguments in 'argv' as unrecognized except the first. // Returns true there is at least on unrecognized argument (i.e. 'argc' > 1). -bool ReportUnrecognizedArguments(int argc, char** argv); +BENCHMARK_EXPORT bool ReportUnrecognizedArguments(int argc, char** argv); // Returns the current value of --benchmark_filter. -std::string GetBenchmarkFilter(); +BENCHMARK_EXPORT std::string GetBenchmarkFilter(); + +// Sets a new value to --benchmark_filter. (This will override this flag's +// current value). +// Should be called after `benchmark::Initialize()`, as +// `benchmark::Initialize()` will override the flag's value. +BENCHMARK_EXPORT void SetBenchmarkFilter(std::string value); + +// Returns the current value of --v (command line value for verbosity). +BENCHMARK_EXPORT int32_t GetBenchmarkVerbosity(); + +// Creates a default display reporter. Used by the library when no display +// reporter is provided, but also made available for external use in case a +// custom reporter should respect the `--benchmark_format` flag as a fallback +BENCHMARK_EXPORT BenchmarkReporter* CreateDefaultDisplayReporter(); // Generate a list of benchmarks matching the specified --benchmark_filter flag // and if --benchmark_list_tests is specified return after printing the name @@ -309,18 +323,29 @@ // 'file_reporter' is ignored. // // RETURNS: The number of matching benchmarks. -size_t RunSpecifiedBenchmarks(); -size_t RunSpecifiedBenchmarks(std::string spec); +BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(); +BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(std::string spec); + +BENCHMARK_EXPORT size_t +RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter); +BENCHMARK_EXPORT size_t +RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, std::string spec); + +BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks( + BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter); +BENCHMARK_EXPORT size_t +RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, + BenchmarkReporter* file_reporter, std::string spec); + +// TimeUnit is passed to a benchmark in order to specify the order of magnitude +// for the measured time. +enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond }; + +BENCHMARK_EXPORT TimeUnit GetDefaultTimeUnit(); -size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter); -size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, - std::string spec); - -size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, - BenchmarkReporter* file_reporter); -size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, - BenchmarkReporter* file_reporter, - std::string spec); +// Sets the default time unit the benchmarks use +// Has to be called before the benchmark loop to take effect +BENCHMARK_EXPORT void SetDefaultTimeUnit(TimeUnit unit); // If a MemoryManager is registered (via RegisterMemoryManager()), // it can be used to collect and report allocation metrics for a run of the @@ -369,9 +394,11 @@ // Register a MemoryManager instance that will be used to collect and report // allocation measurements for benchmark runs. +BENCHMARK_EXPORT void RegisterMemoryManager(MemoryManager* memory_manager); // Add a key-value pair to output as part of the context stanza in the report. +BENCHMARK_EXPORT void AddCustomContext(const std::string& key, const std::string& value); namespace internal { @@ -379,14 +406,17 @@ class BenchmarkImp; class BenchmarkFamilies; +BENCHMARK_EXPORT std::map*& GetGlobalContext(); + +BENCHMARK_EXPORT void UseCharPointer(char const volatile*); // Take ownership of the pointer and register the benchmark. Return the // registered benchmark. -Benchmark* RegisterBenchmarkInternal(Benchmark*); +BENCHMARK_EXPORT Benchmark* RegisterBenchmarkInternal(Benchmark*); // Ensure that the standard streams are properly initialized in every TU. -int InitializeStreams(); +BENCHMARK_EXPORT int InitializeStreams(); BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams(); } // namespace internal @@ -409,6 +439,7 @@ // intended to add little to no overhead. // See: https://youtu.be/nXaxk27zwlk?t=2441 #ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY +#if !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER) template inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { asm volatile("" : : "r,m"(value) : "memory"); @@ -422,6 +453,55 @@ asm volatile("" : "+m,r"(value) : : "memory"); #endif } +#elif defined(BENCHMARK_HAS_CXX11) && (__GNUC__ >= 5) +// Workaround for a bug with full argument copy overhead with GCC. +// See: #1340 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105519 +template +inline BENCHMARK_ALWAYS_INLINE + typename std::enable_if::value && + (sizeof(Tp) <= sizeof(Tp*))>::type + DoNotOptimize(Tp const& value) { + asm volatile("" : : "r,m"(value) : "memory"); +} + +template +inline BENCHMARK_ALWAYS_INLINE + typename std::enable_if::value || + (sizeof(Tp) > sizeof(Tp*))>::type + DoNotOptimize(Tp const& value) { + asm volatile("" : : "m"(value) : "memory"); +} + +template +inline BENCHMARK_ALWAYS_INLINE + typename std::enable_if::value && + (sizeof(Tp) <= sizeof(Tp*))>::type + DoNotOptimize(Tp& value) { + asm volatile("" : "+m,r"(value) : : "memory"); +} + +template +inline BENCHMARK_ALWAYS_INLINE + typename std::enable_if::value || + (sizeof(Tp) > sizeof(Tp*))>::type + DoNotOptimize(Tp& value) { + asm volatile("" : "+m"(value) : : "memory"); +} + +#else +// Fallback for GCC < 5. Can add some overhead because the compiler is forced +// to use memory operations instead of operations with registers. +// TODO: Remove if GCC < 5 will be unsupported. +template +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { + asm volatile("" : : "m"(value) : "memory"); +} + +template +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) { + asm volatile("" : "+m"(value) : : "memory"); +} +#endif #ifndef BENCHMARK_HAS_CXX11 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { @@ -506,17 +586,13 @@ // This is the container for the user-defined counters. typedef std::map UserCounters; -// TimeUnit is passed to a benchmark in order to specify the order of magnitude -// for the measured time. -enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond }; - // BigO is passed to a benchmark in order to specify the asymptotic // computational // complexity for the benchmark. In case oAuto is selected, complexity will be // calculated automatically to the best fit. enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda }; -typedef uint64_t IterationCount; +typedef int64_t IterationCount; enum StatisticUnit { kTime, kPercentage }; @@ -568,7 +644,7 @@ // State is passed to a running Benchmark and contains state for the // benchmark to use. -class State { +class BENCHMARK_EXPORT State { public: struct StateIterator; friend struct StateIterator; @@ -899,7 +975,7 @@ // be called on this object to change the properties of the benchmark. // Each method returns "this" so that multiple method calls can // chained into one expression. -class Benchmark { +class BENCHMARK_EXPORT Benchmark { public: virtual ~Benchmark(); @@ -1000,6 +1076,12 @@ // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark. Benchmark* MinTime(double t); + // Set the minimum amount of time to run the benchmark before taking runtimes + // of this benchmark into account. This + // option overrides the `benchmark_min_warmup_time` flag. + // REQUIRES: `t >= 0` and `Iterations` has not been called on this benchmark. + Benchmark* MinWarmUpTime(double t); + // Specify the amount of iterations that should be run by this benchmark. // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark. // @@ -1025,7 +1107,7 @@ // By default, the CPU time is measured only for the main thread, which may // be unrepresentative if the benchmark uses threads internally. If called, // the total CPU time spent by all the threads will be measured instead. - // By default, the only the main thread CPU time will be measured. + // By default, only the main thread CPU time will be measured. Benchmark* MeasureProcessCPUTime(); // If a particular benchmark should use the Wall clock instead of the CPU time @@ -1090,9 +1172,10 @@ virtual void Run(State& state) = 0; + TimeUnit GetTimeUnit() const; + protected: explicit Benchmark(const char* name); - Benchmark(Benchmark const&); void SetName(const char* name); int ArgsCnt() const; @@ -1105,9 +1188,13 @@ AggregationReportMode aggregation_report_mode_; std::vector arg_names_; // Args for all benchmark runs std::vector > args_; // Args for all benchmark runs + TimeUnit time_unit_; + bool use_default_time_unit_; + int range_multiplier_; double min_time_; + double min_warmup_time_; IterationCount iterations_; int repetitions_; bool measure_process_cpu_time_; @@ -1122,7 +1209,17 @@ callback_function setup_; callback_function teardown_; - Benchmark& operator=(Benchmark const&); + Benchmark(Benchmark const&) +#if defined(BENCHMARK_HAS_CXX11) + = delete +#endif + ; + + Benchmark& operator=(Benchmark const&) +#if defined(BENCHMARK_HAS_CXX11) + = delete +#endif + ; }; } // namespace internal @@ -1141,12 +1238,12 @@ // Remove all registered benchmarks. All pointers to previously registered // benchmarks are invalidated. -void ClearRegisteredBenchmarks(); +BENCHMARK_EXPORT void ClearRegisteredBenchmarks(); namespace internal { // The class used to hold all Benchmarks created from static function. // (ie those created using the BENCHMARK(...) macros. -class FunctionBenchmark : public Benchmark { +class BENCHMARK_EXPORT FunctionBenchmark : public Benchmark { public: FunctionBenchmark(const char* name, Function* func) : Benchmark(name), func_(func) {} @@ -1176,7 +1273,6 @@ Lambda lambda_; }; #endif - } // namespace internal inline internal::Benchmark* RegisterBenchmark(const char* name, @@ -1228,7 +1324,6 @@ protected: virtual void BenchmarkCase(State&) = 0; }; - } // namespace benchmark // ------------------------------------------------------ @@ -1439,8 +1534,15 @@ #endif // Helper macro to create a main routine in a test that runs the benchmarks +// Note the workaround for Hexagon simulator passing argc != 0, argv = NULL. #define BENCHMARK_MAIN() \ int main(int argc, char** argv) { \ + char arg0_default[] = "benchmark"; \ + char* args_default = arg0_default; \ + if (!argv) { \ + argc = 1; \ + argv = &args_default; \ + } \ ::benchmark::Initialize(&argc, argv); \ if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \ ::benchmark::RunSpecifiedBenchmarks(); \ @@ -1454,7 +1556,7 @@ namespace benchmark { -struct CPUInfo { +struct BENCHMARK_EXPORT CPUInfo { struct CacheInfo { std::string type; int level; @@ -1478,7 +1580,7 @@ }; // Adding Struct for System Information -struct SystemInfo { +struct BENCHMARK_EXPORT SystemInfo { std::string name; static const SystemInfo& Get(); @@ -1490,10 +1592,11 @@ // BenchmarkName contains the components of the Benchmark's name // which allows individual fields to be modified or cleared before // building the final name using 'str()'. -struct BenchmarkName { +struct BENCHMARK_EXPORT BenchmarkName { std::string function_name; std::string args; std::string min_time; + std::string min_warmup_time; std::string iterations; std::string repetitions; std::string time_type; @@ -1509,7 +1612,7 @@ // can control the destination of the reports by calling // RunSpecifiedBenchmarks and passing it a custom reporter object. // The reporter object must implement the following interface. -class BenchmarkReporter { +class BENCHMARK_EXPORT BenchmarkReporter { public: struct Context { CPUInfo const& cpu_info; @@ -1520,7 +1623,7 @@ Context(); }; - struct Run { + struct BENCHMARK_EXPORT Run { static const int64_t no_repetition_index = -1; enum RunType { RT_Iteration, RT_Aggregate }; @@ -1530,7 +1633,7 @@ error_occurred(false), iterations(1), threads(1), - time_unit(kNanosecond), + time_unit(GetDefaultTimeUnit()), real_accumulated_time(0), cpu_accumulated_time(0), max_heapbytes_used(0), @@ -1665,7 +1768,7 @@ // Simple reporter that outputs benchmark data to the console. This is the // default reporter used by RunSpecifiedBenchmarks(). -class ConsoleReporter : public BenchmarkReporter { +class BENCHMARK_EXPORT ConsoleReporter : public BenchmarkReporter { public: enum OutputOptions { OO_None = 0, @@ -1690,7 +1793,7 @@ bool printed_header_; }; -class JSONReporter : public BenchmarkReporter { +class BENCHMARK_EXPORT JSONReporter : public BenchmarkReporter { public: JSONReporter() : first_report_(true) {} virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; @@ -1703,7 +1806,7 @@ bool first_report_; }; -class BENCHMARK_DEPRECATED_MSG( +class BENCHMARK_EXPORT BENCHMARK_DEPRECATED_MSG( "The CSV Reporter will be removed in a future release") CSVReporter : public BenchmarkReporter { public: @@ -1755,11 +1858,17 @@ // CreateRange(0, 100, /*multi=*/4), // CreateDenseRange(0, 4, /*step=*/1), // }); +BENCHMARK_EXPORT std::vector CreateRange(int64_t lo, int64_t hi, int multi); // Creates a list of integer values for the given range and step. +BENCHMARK_EXPORT std::vector CreateDenseRange(int64_t start, int64_t limit, int step); } // namespace benchmark +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + #endif // BENCHMARK_BENCHMARK_H_ diff -Nru benchmark-1.6.1/include/benchmark/export.h benchmark-1.7.1/include/benchmark/export.h --- benchmark-1.6.1/include/benchmark/export.h 1970-01-01 00:00:00.000000000 +0000 +++ benchmark-1.7.1/include/benchmark/export.h 2022-11-11 14:01:03.000000000 +0000 @@ -0,0 +1,47 @@ +#ifndef BENCHMARK_EXPORT_H +#define BENCHMARK_EXPORT_H + +#if defined(_WIN32) +#define EXPORT_ATTR __declspec(dllexport) +#define IMPORT_ATTR __declspec(dllimport) +#define NO_EXPORT_ATTR +#define DEPRECATED_ATTR __declspec(deprecated) +#else // _WIN32 +#define EXPORT_ATTR __attribute__((visibility("default"))) +#define IMPORT_ATTR __attribute__((visibility("default"))) +#define NO_EXPORT_ATTR __attribute__((visibility("hidden"))) +#define DEPRECATE_ATTR __attribute__((__deprecated__)) +#endif // _WIN32 + +#ifdef BENCHMARK_STATIC_DEFINE +#define BENCHMARK_EXPORT +#define BENCHMARK_NO_EXPORT +#else // BENCHMARK_STATIC_DEFINE +#ifndef BENCHMARK_EXPORT +#ifdef benchmark_EXPORTS +/* We are building this library */ +#define BENCHMARK_EXPORT EXPORT_ATTR +#else // benchmark_EXPORTS +/* We are using this library */ +#define BENCHMARK_EXPORT IMPORT_ATTR +#endif // benchmark_EXPORTS +#endif // !BENCHMARK_EXPORT + +#ifndef BENCHMARK_NO_EXPORT +#define BENCHMARK_NO_EXPORT NO_EXPORT_ATTR +#endif // !BENCHMARK_NO_EXPORT +#endif // BENCHMARK_STATIC_DEFINE + +#ifndef BENCHMARK_DEPRECATED +#define BENCHMARK_DEPRECATED DEPRECATE_ATTR +#endif // BENCHMARK_DEPRECATED + +#ifndef BENCHMARK_DEPRECATED_EXPORT +#define BENCHMARK_DEPRECATED_EXPORT BENCHMARK_EXPORT BENCHMARK_DEPRECATED +#endif // BENCHMARK_DEPRECATED_EXPORT + +#ifndef BENCHMARK_DEPRECATED_NO_EXPORT +#define BENCHMARK_DEPRECATED_NO_EXPORT BENCHMARK_NO_EXPORT BENCHMARK_DEPRECATED +#endif // BENCHMARK_DEPRECATED_EXPORT + +#endif /* BENCHMARK_EXPORT_H */ diff -Nru benchmark-1.6.1/README.md benchmark-1.7.1/README.md --- benchmark-1.6.1/README.md 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/README.md 2022-11-11 14:01:03.000000000 +0000 @@ -47,6 +47,8 @@ [Assembly Testing Documentation](docs/AssemblyTests.md) +[Building and installing Python bindings](docs/python_bindings.md) + ## Requirements The library can be used with C++03. However, it requires C++11 to build, diff -Nru benchmark-1.6.1/requirements.txt benchmark-1.7.1/requirements.txt --- benchmark-1.6.1/requirements.txt 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/requirements.txt 2022-11-11 14:01:03.000000000 +0000 @@ -1,3 +1,2 @@ -numpy == 1.19.4 +numpy == 1.22 scipy == 1.5.4 -pandas == 1.1.5 diff -Nru benchmark-1.6.1/setup.py benchmark-1.7.1/setup.py --- benchmark-1.6.1/setup.py 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/setup.py 2022-11-11 14:01:03.000000000 +0000 @@ -16,6 +16,10 @@ IS_WINDOWS = sys.platform.startswith("win") +with open("README.md", "r", encoding="utf-8") as fp: + long_description = fp.read() + + def _get_version(): """Parse the version string from __init__.py.""" with open( @@ -93,6 +97,12 @@ elif sys.platform == "darwin" and platform.machine() == "x86_64": bazel_argv.append("--macos_minimum_os=10.9") + # ARCHFLAGS is always set by cibuildwheel before macOS wheel builds. + archflags = os.getenv("ARCHFLAGS", "") + if "arm64" in archflags: + bazel_argv.append("--cpu=darwin_arm64") + bazel_argv.append("--macos_cpus=arm64") + self.spawn(bazel_argv) shared_lib_suffix = '.dll' if IS_WINDOWS else '.so' @@ -106,12 +116,17 @@ os.makedirs(ext_dest_dir) shutil.copyfile(ext_bazel_bin_path, ext_dest_path) + # explicitly call `bazel shutdown` for graceful exit + self.spawn(["bazel", "shutdown"]) + setuptools.setup( name="google_benchmark", version=_get_version(), url="https://github.com/google/benchmark", description="A library to benchmark code snippets.", + long_description=long_description, + long_description_content_type="text/markdown", author="Google", author_email="benchmark-py@google.com", # Contained modules and scripts. @@ -132,9 +147,11 @@ "Intended Audience :: Developers", "Intended Audience :: Science/Research", "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Software Development :: Testing", "Topic :: System :: Benchmark", ], diff -Nru benchmark-1.6.1/src/benchmark_api_internal.cc benchmark-1.7.1/src/benchmark_api_internal.cc --- benchmark-1.6.1/src/benchmark_api_internal.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/benchmark_api_internal.cc 2022-11-11 14:01:03.000000000 +0000 @@ -16,7 +16,7 @@ per_family_instance_index_(per_family_instance_idx), aggregation_report_mode_(benchmark_.aggregation_report_mode_), args_(args), - time_unit_(benchmark_.time_unit_), + time_unit_(benchmark_.GetTimeUnit()), measure_process_cpu_time_(benchmark_.measure_process_cpu_time_), use_real_time_(benchmark_.use_real_time_), use_manual_time_(benchmark_.use_manual_time_), @@ -25,6 +25,7 @@ statistics_(benchmark_.statistics_), repetitions_(benchmark_.repetitions_), min_time_(benchmark_.min_time_), + min_warmup_time_(benchmark_.min_warmup_time_), iterations_(benchmark_.iterations_), threads_(thread_count) { name_.function_name = benchmark_.name_; @@ -50,6 +51,11 @@ name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_); } + if (!IsZero(benchmark->min_warmup_time_)) { + name_.min_warmup_time = + StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_); + } + if (benchmark_.iterations_ != 0) { name_.iterations = StrFormat( "iterations:%lu", static_cast(benchmark_.iterations_)); diff -Nru benchmark-1.6.1/src/benchmark_api_internal.h benchmark-1.7.1/src/benchmark_api_internal.h --- benchmark-1.6.1/src/benchmark_api_internal.h 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/benchmark_api_internal.h 2022-11-11 14:01:03.000000000 +0000 @@ -36,6 +36,7 @@ const std::vector& statistics() const { return statistics_; } int repetitions() const { return repetitions_; } double min_time() const { return min_time_; } + double min_warmup_time() const { return min_warmup_time_; } IterationCount iterations() const { return iterations_; } int threads() const { return threads_; } void Setup() const; @@ -62,6 +63,7 @@ const std::vector& statistics_; int repetitions_; double min_time_; + double min_warmup_time_; IterationCount iterations_; int threads_; // Number of concurrent threads to us @@ -76,6 +78,7 @@ bool IsZero(double n); +BENCHMARK_EXPORT ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false); } // end namespace internal diff -Nru benchmark-1.6.1/src/benchmark.cc benchmark-1.7.1/src/benchmark.cc --- benchmark-1.6.1/src/benchmark.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/benchmark.cc 2022-11-11 14:01:03.000000000 +0000 @@ -19,7 +19,7 @@ #include "internal_macros.h" #ifndef BENCHMARK_OS_WINDOWS -#ifndef BENCHMARK_OS_FUCHSIA +#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) #include #endif #include @@ -72,6 +72,13 @@ // benchmark execution, regardless of number of threads. BM_DEFINE_double(benchmark_min_time, 0.5); +// Minimum number of seconds a benchmark should be run before results should be +// taken into account. This e.g can be neccessary for benchmarks of code which +// needs to fill some form of cache before performance is of interrest. +// Note: results gathered within this period are discarded and not used for +// reported result. +BM_DEFINE_double(benchmark_min_warmup_time, 0.0); + // The number of runs of each benchmark. If greater than 1, the mean and // standard deviation of the runs will be reported. BM_DEFINE_int32(benchmark_repetitions, 1); @@ -121,6 +128,10 @@ // pairs. Kept internal as it's only used for parsing from env/command line. BM_DEFINE_kvpairs(benchmark_context, {}); +// Set the default time unit to use for reports +// Valid values are 'ns', 'us', 'ms' or 's' +BM_DEFINE_string(benchmark_time_unit, ""); + // The level of verbose logging to output BM_DEFINE_int32(v, 0); @@ -128,6 +139,10 @@ std::map* global_context = nullptr; +BENCHMARK_EXPORT std::map*& GetGlobalContext() { + return global_context; +} + // FIXME: wouldn't LTO mess this up? void UseCharPointer(char const volatile*) {} @@ -167,6 +182,10 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Winvalid-offsetof" #endif +#if defined(__NVCC__) +#pragma nv_diagnostic push +#pragma nv_diag_suppress 1427 +#endif // Offset tests to ensure commonly accessed data is on the first cache line. const int cache_line_size = 64; static_assert(offsetof(State, error_occurred_) <= @@ -177,6 +196,9 @@ #elif defined(__GNUC__) #pragma GCC diagnostic pop #endif +#if defined(__NVCC__) +#pragma nv_diagnostic pop +#endif } void State::PauseTiming() { @@ -184,11 +206,14 @@ BM_CHECK(started_ && !finished_ && !error_occurred_); timer_->StopTimer(); if (perf_counters_measurement_) { - auto measurements = perf_counters_measurement_->StopAndGetMeasurements(); + std::vector> measurements; + if (!perf_counters_measurement_->Stop(measurements)) { + BM_CHECK(false) << "Perf counters read the value failed."; + } for (const auto& name_and_measurement : measurements) { auto name = name_and_measurement.first; auto measurement = name_and_measurement.second; - BM_CHECK_EQ(counters[name], 0.0); + BM_CHECK_EQ(std::fpclassify((double)counters[name]), FP_ZERO); counters[name] = Counter(measurement, Counter::kAvgIterations); } } @@ -385,9 +410,9 @@ if (name == "console") { return PtrType(new ConsoleReporter(output_opts)); } else if (name == "json") { - return PtrType(new JSONReporter); + return PtrType(new JSONReporter()); } else if (name == "csv") { - return PtrType(new CSVReporter); + return PtrType(new CSVReporter()); } else { std::cerr << "Unexpected format: '" << name << "'\n"; std::exit(1); @@ -428,6 +453,14 @@ } // end namespace internal +BenchmarkReporter* CreateDefaultDisplayReporter() { + static auto default_display_reporter = + internal::CreateReporter(FLAGS_benchmark_format, + internal::GetOutputOptions()) + .release(); + return default_display_reporter; +} + size_t RunSpecifiedBenchmarks() { return RunSpecifiedBenchmarks(nullptr, nullptr, FLAGS_benchmark_filter); } @@ -463,8 +496,7 @@ std::unique_ptr default_display_reporter; std::unique_ptr default_file_reporter; if (!display_reporter) { - default_display_reporter = internal::CreateReporter( - FLAGS_benchmark_format, internal::GetOutputOptions()); + default_display_reporter.reset(CreateDefaultDisplayReporter()); display_reporter = default_display_reporter.get(); } auto& Out = display_reporter->GetOutputStream(); @@ -510,8 +542,23 @@ return benchmarks.size(); } +namespace { +// stores the time unit benchmarks use by default +TimeUnit default_time_unit = kNanosecond; +} // namespace + +TimeUnit GetDefaultTimeUnit() { return default_time_unit; } + +void SetDefaultTimeUnit(TimeUnit unit) { default_time_unit = unit; } + std::string GetBenchmarkFilter() { return FLAGS_benchmark_filter; } +void SetBenchmarkFilter(std::string value) { + FLAGS_benchmark_filter = std::move(value); +} + +int32_t GetBenchmarkVerbosity() { return FLAGS_v; } + void RegisterMemoryManager(MemoryManager* manager) { internal::memory_manager = manager; } @@ -528,27 +575,27 @@ namespace internal { +void (*HelperPrintf)(); + void PrintUsageAndExit() { - fprintf(stdout, - "benchmark" - " [--benchmark_list_tests={true|false}]\n" - " [--benchmark_filter=]\n" - " [--benchmark_min_time=]\n" - " [--benchmark_repetitions=]\n" - " [--benchmark_enable_random_interleaving={true|false}]\n" - " [--benchmark_report_aggregates_only={true|false}]\n" - " [--benchmark_display_aggregates_only={true|false}]\n" - " [--benchmark_format=]\n" - " [--benchmark_out=]\n" - " [--benchmark_out_format=]\n" - " [--benchmark_color={auto|true|false}]\n" - " [--benchmark_counters_tabular={true|false}]\n" - " [--benchmark_perf_counters=,...]\n" - " [--benchmark_context==,...]\n" - " [--v=]\n"); + HelperPrintf(); exit(0); } +void SetDefaultTimeUnitFromFlag(const std::string& time_unit_flag) { + if (time_unit_flag == "s") { + return SetDefaultTimeUnit(kSecond); + } else if (time_unit_flag == "ms") { + return SetDefaultTimeUnit(kMillisecond); + } else if (time_unit_flag == "us") { + return SetDefaultTimeUnit(kMicrosecond); + } else if (time_unit_flag == "ns") { + return SetDefaultTimeUnit(kNanosecond); + } else if (!time_unit_flag.empty()) { + PrintUsageAndExit(); + } +} + void ParseCommandLineFlags(int* argc, char** argv) { using namespace benchmark; BenchmarkReporter::Context::executable_name = @@ -559,6 +606,8 @@ ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || ParseDoubleFlag(argv[i], "benchmark_min_time", &FLAGS_benchmark_min_time) || + ParseDoubleFlag(argv[i], "benchmark_min_warmup_time", + &FLAGS_benchmark_min_warmup_time) || ParseInt32Flag(argv[i], "benchmark_repetitions", &FLAGS_benchmark_repetitions) || ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving", @@ -578,6 +627,8 @@ &FLAGS_benchmark_perf_counters) || ParseKeyValueFlag(argv[i], "benchmark_context", &FLAGS_benchmark_context) || + ParseStringFlag(argv[i], "benchmark_time_unit", + &FLAGS_benchmark_time_unit) || ParseInt32Flag(argv[i], "v", &FLAGS_v)) { for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1]; @@ -593,6 +644,7 @@ PrintUsageAndExit(); } } + SetDefaultTimeUnitFromFlag(FLAGS_benchmark_time_unit); if (FLAGS_benchmark_color.empty()) { PrintUsageAndExit(); } @@ -608,7 +660,29 @@ } // end namespace internal -void Initialize(int* argc, char** argv) { +void PrintDefaultHelp() { + fprintf(stdout, + "benchmark" + " [--benchmark_list_tests={true|false}]\n" + " [--benchmark_filter=]\n" + " [--benchmark_min_time=]\n" + " [--benchmark_min_warmup_time=]\n" + " [--benchmark_repetitions=]\n" + " [--benchmark_enable_random_interleaving={true|false}]\n" + " [--benchmark_report_aggregates_only={true|false}]\n" + " [--benchmark_display_aggregates_only={true|false}]\n" + " [--benchmark_format=]\n" + " [--benchmark_out=]\n" + " [--benchmark_out_format=]\n" + " [--benchmark_color={auto|true|false}]\n" + " [--benchmark_counters_tabular={true|false}]\n" + " [--benchmark_context==,...]\n" + " [--benchmark_time_unit={ns|us|ms|s}]\n" + " [--v=]\n"); +} + +void Initialize(int* argc, char** argv, void (*HelperPrintf)()) { + internal::HelperPrintf = HelperPrintf; internal::ParseCommandLineFlags(argc, argv); internal::LogLevel() = FLAGS_v; } diff -Nru benchmark-1.6.1/src/benchmark_main.cc benchmark-1.7.1/src/benchmark_main.cc --- benchmark-1.6.1/src/benchmark_main.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/benchmark_main.cc 2022-11-11 14:01:03.000000000 +0000 @@ -14,4 +14,5 @@ #include "benchmark/benchmark.h" +BENCHMARK_EXPORT int main(int, char**); BENCHMARK_MAIN(); diff -Nru benchmark-1.6.1/src/benchmark_name.cc benchmark-1.7.1/src/benchmark_name.cc --- benchmark-1.6.1/src/benchmark_name.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/benchmark_name.cc 2022-11-11 14:01:03.000000000 +0000 @@ -51,8 +51,9 @@ } } // namespace +BENCHMARK_EXPORT std::string BenchmarkName::str() const { - return join('/', function_name, args, min_time, iterations, repetitions, - time_type, threads); + return join('/', function_name, args, min_time, min_warmup_time, iterations, + repetitions, time_type, threads); } } // namespace benchmark diff -Nru benchmark-1.6.1/src/benchmark_register.cc benchmark-1.7.1/src/benchmark_register.cc --- benchmark-1.6.1/src/benchmark_register.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/benchmark_register.cc 2022-11-11 14:01:03.000000000 +0000 @@ -15,7 +15,7 @@ #include "benchmark_register.h" #ifndef BENCHMARK_OS_WINDOWS -#ifndef BENCHMARK_OS_FUCHSIA +#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) #include #endif #include @@ -53,10 +53,13 @@ namespace { // For non-dense Range, intermediate values are powers of kRangeMultiplier. -static const int kRangeMultiplier = 8; +static constexpr int kRangeMultiplier = 8; + // The size of a benchmark family determines is the number of inputs to repeat // the benchmark on. If this is "large" then warn the user during configuration. -static const size_t kMaxFamilySize = 100; +static constexpr size_t kMaxFamilySize = 100; + +static constexpr char kDisabledPrefix[] = "DISABLED_"; } // end namespace namespace internal { @@ -116,10 +119,10 @@ // Make regular expression out of command-line flag std::string error_msg; Regex re; - bool isNegativeFilter = false; + bool is_negative_filter = false; if (spec[0] == '-') { spec.replace(0, 1, ""); - isNegativeFilter = true; + is_negative_filter = true; } if (!re.Init(spec, &error_msg)) { Err << "Could not compile benchmark re: " << error_msg << std::endl; @@ -154,7 +157,8 @@ << " will be repeated at least " << family_size << " times.\n"; } // reserve in the special case the regex ".", since we know the final - // family size. + // family size. this doesn't take into account any disabled benchmarks + // so worst case we reserve more than we need. if (spec == ".") benchmarks->reserve(benchmarks->size() + family_size); for (auto const& args : family->args_) { @@ -164,8 +168,9 @@ num_threads); const auto full_name = instance.name().str(); - if ((re.Match(full_name) && !isNegativeFilter) || - (!re.Match(full_name) && isNegativeFilter)) { + if (full_name.rfind(kDisabledPrefix, 0) != 0 && + ((re.Match(full_name) && !is_negative_filter) || + (!re.Match(full_name) && is_negative_filter))) { benchmarks->push_back(std::move(instance)); ++per_family_instance_index; @@ -202,9 +207,11 @@ Benchmark::Benchmark(const char* name) : name_(name), aggregation_report_mode_(ARM_Unspecified), - time_unit_(kNanosecond), + time_unit_(GetDefaultTimeUnit()), + use_default_time_unit_(true), range_multiplier_(kRangeMultiplier), min_time_(0), + min_warmup_time_(0), iterations_(0), repetitions_(0), measure_process_cpu_time_(false), @@ -235,6 +242,7 @@ Benchmark* Benchmark::Unit(TimeUnit unit) { time_unit_ = unit; + use_default_time_unit_ = false; return this; } @@ -348,9 +356,17 @@ return this; } +Benchmark* Benchmark::MinWarmUpTime(double t) { + BM_CHECK(t >= 0.0); + BM_CHECK(iterations_ == 0); + min_warmup_time_ = t; + return this; +} + Benchmark* Benchmark::Iterations(IterationCount n) { BM_CHECK(n > 0); BM_CHECK(IsZero(min_time_)); + BM_CHECK(IsZero(min_warmup_time_)); iterations_ = n; return this; } @@ -462,6 +478,10 @@ return static_cast(args_.front().size()); } +TimeUnit Benchmark::GetTimeUnit() const { + return use_default_time_unit_ ? GetDefaultTimeUnit() : time_unit_; +} + //=============================================================================// // FunctionBenchmark //=============================================================================// diff -Nru benchmark-1.6.1/src/benchmark_register.h benchmark-1.7.1/src/benchmark_register.h --- benchmark-1.6.1/src/benchmark_register.h 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/benchmark_register.h 2022-11-11 14:01:03.000000000 +0000 @@ -1,6 +1,7 @@ #ifndef BENCHMARK_REGISTER_H #define BENCHMARK_REGISTER_H +#include #include #include @@ -23,7 +24,7 @@ static const T kmax = std::numeric_limits::max(); // Space out the values in multiples of "mult" - for (T i = static_cast(1); i <= hi; i *= mult) { + for (T i = static_cast(1); i <= hi; i *= static_cast(mult)) { if (i >= lo) { dst->push_back(i); } @@ -32,7 +33,7 @@ if (i > kmax / mult) break; } - return dst->begin() + start_offset; + return dst->begin() + static_cast(start_offset); } template diff -Nru benchmark-1.6.1/src/benchmark_runner.cc benchmark-1.7.1/src/benchmark_runner.cc --- benchmark-1.6.1/src/benchmark_runner.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/benchmark_runner.cc 2022-11-11 14:01:03.000000000 +0000 @@ -19,7 +19,7 @@ #include "internal_macros.h" #ifndef BENCHMARK_OS_WINDOWS -#ifndef BENCHMARK_OS_FUCHSIA +#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) #include #endif #include @@ -122,6 +122,7 @@ b->measure_process_cpu_time() ? internal::ThreadTimer::CreateProcessCpuTime() : internal::ThreadTimer::Create()); + State st = b->Run(iters, thread_id, &timer, manager, perf_counters_measurement); BM_CHECK(st.error_occurred() || st.iterations() >= st.max_iterations) @@ -147,13 +148,16 @@ : b(b_), reports_for_family(reports_for_family_), min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time), + min_warmup_time((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0) + ? b.min_warmup_time() + : FLAGS_benchmark_min_warmup_time), + warmup_done(!(min_warmup_time > 0.0)), repeats(b.repetitions() != 0 ? b.repetitions() : FLAGS_benchmark_repetitions), has_explicit_iteration_count(b.iterations() != 0), pool(b.threads() - 1), iters(has_explicit_iteration_count ? b.iterations() : 1), - perf_counters_measurement( - PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))), + perf_counters_measurement(StrSplit(FLAGS_benchmark_perf_counters, ',')), perf_counters_measurement_ptr(perf_counters_measurement.IsValid() ? &perf_counters_measurement : nullptr) { @@ -232,20 +236,20 @@ const IterationResults& i) const { // See how much iterations should be increased by. // Note: Avoid division by zero with max(seconds, 1ns). - double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); + double multiplier = GetMinTimeToApply() * 1.4 / std::max(i.seconds, 1e-9); // If our last run was at least 10% of FLAGS_benchmark_min_time then we // use the multiplier directly. // Otherwise we use at most 10 times expansion. // NOTE: When the last run was at least 10% of the min time the max // expansion should be 14x. - bool is_significant = (i.seconds / min_time) > 0.1; + const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1; multiplier = is_significant ? multiplier : 10.0; // So what seems to be the sufficiently-large iteration count? Round up. const IterationCount max_next_iters = static_cast( std::lround(std::max(multiplier * static_cast(i.iters), static_cast(i.iters) + 1.0))); - // But we do have *some* sanity limits though.. + // But we do have *some* limits though.. const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); BM_VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; @@ -259,19 +263,78 @@ // or because an error was reported. return i.results.has_error_ || i.iters >= kMaxIterations || // Too many iterations already. - i.seconds >= min_time || // The elapsed time is large enough. + i.seconds >= + GetMinTimeToApply() || // The elapsed time is large enough. // CPU time is specified but the elapsed real time greatly exceeds // the minimum time. - // Note that user provided timers are except from this sanity check. - ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time()); + // Note that user provided timers are except from this test. + ((i.results.real_time_used >= 5 * GetMinTimeToApply()) && + !b.use_manual_time()); +} + +double BenchmarkRunner::GetMinTimeToApply() const { + // In order to re-use functionality to run and measure benchmarks for running + // a warmup phase of the benchmark, we need a way of telling whether to apply + // min_time or min_warmup_time. This function will figure out if we are in the + // warmup phase and therefore need to apply min_warmup_time or if we already + // in the benchmarking phase and min_time needs to be applied. + return warmup_done ? min_time : min_warmup_time; +} + +void BenchmarkRunner::FinishWarmUp(const IterationCount& i) { + warmup_done = true; + iters = i; +} + +void BenchmarkRunner::RunWarmUp() { + // Use the same mechanisms for warming up the benchmark as used for actually + // running and measuring the benchmark. + IterationResults i_warmup; + // Dont use the iterations determined in the warmup phase for the actual + // measured benchmark phase. While this may be a good starting point for the + // benchmark and it would therefore get rid of the need to figure out how many + // iterations are needed if min_time is set again, this may also be a complete + // wrong guess since the warmup loops might be considerably slower (e.g + // because of caching effects). + const IterationCount i_backup = iters; + + for (;;) { + b.Setup(); + i_warmup = DoNIterations(); + b.Teardown(); + + const bool finish = ShouldReportIterationResults(i_warmup); + + if (finish) { + FinishWarmUp(i_backup); + break; + } + + // Although we are running "only" a warmup phase where running enough + // iterations at once without measuring time isn't as important as it is for + // the benchmarking phase, we still do it the same way as otherwise it is + // very confusing for the user to know how to choose a proper value for + // min_warmup_time if a different approach on running it is used. + iters = PredictNumItersNeeded(i_warmup); + assert(iters > i_warmup.iters && + "if we did more iterations than we want to do the next time, " + "then we should have accepted the current iteration run."); + } } void BenchmarkRunner::DoOneRepetition() { assert(HasRepeatsRemaining() && "Already done all repetitions?"); const bool is_the_first_repetition = num_repetitions_done == 0; - IterationResults i; + // In case a warmup phase is requested by the benchmark, run it now. + // After running the warmup phase the BenchmarkRunner should be in a state as + // this warmup never happened except the fact that warmup_done is set. Every + // other manipulation of the BenchmarkRunner instance would be a bug! Please + // fix it. + if (!warmup_done) RunWarmUp(); + + IterationResults i; // We *may* be gradually increasing the length (iteration count) // of the benchmark until we decide the results are significant. // And once we do, we report those last results and exit. diff -Nru benchmark-1.6.1/src/benchmark_runner.h benchmark-1.7.1/src/benchmark_runner.h --- benchmark-1.6.1/src/benchmark_runner.h 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/benchmark_runner.h 2022-11-11 14:01:03.000000000 +0000 @@ -26,6 +26,7 @@ namespace benchmark { BM_DECLARE_double(benchmark_min_time); +BM_DECLARE_double(benchmark_min_warmup_time); BM_DECLARE_int32(benchmark_repetitions); BM_DECLARE_bool(benchmark_report_aggregates_only); BM_DECLARE_bool(benchmark_display_aggregates_only); @@ -69,6 +70,8 @@ BenchmarkReporter::PerFamilyRunReports* reports_for_family; const double min_time; + const double min_warmup_time; + bool warmup_done; const int repeats; const bool has_explicit_iteration_count; @@ -95,6 +98,12 @@ IterationCount PredictNumItersNeeded(const IterationResults& i) const; bool ShouldReportIterationResults(const IterationResults& i) const; + + double GetMinTimeToApply() const; + + void FinishWarmUp(const IterationCount& i); + + void RunWarmUp(); }; } // namespace internal diff -Nru benchmark-1.6.1/src/check.cc benchmark-1.7.1/src/check.cc --- benchmark-1.6.1/src/check.cc 1970-01-01 00:00:00.000000000 +0000 +++ benchmark-1.7.1/src/check.cc 2022-11-11 14:01:03.000000000 +0000 @@ -0,0 +1,11 @@ +#include "check.h" + +namespace benchmark { +namespace internal { + +static AbortHandlerT* handler = &std::abort; + +BENCHMARK_EXPORT AbortHandlerT*& GetAbortHandler() { return handler; } + +} // namespace internal +} // namespace benchmark diff -Nru benchmark-1.6.1/src/check.h benchmark-1.7.1/src/check.h --- benchmark-1.6.1/src/check.h 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/check.h 2022-11-11 14:01:03.000000000 +0000 @@ -5,18 +5,34 @@ #include #include +#include "benchmark/export.h" #include "internal_macros.h" #include "log.h" +#if defined(__GNUC__) || defined(__clang__) +#define BENCHMARK_NOEXCEPT noexcept +#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) +#elif defined(_MSC_VER) && !defined(__clang__) +#if _MSC_VER >= 1900 +#define BENCHMARK_NOEXCEPT noexcept +#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) +#else +#define BENCHMARK_NOEXCEPT +#define BENCHMARK_NOEXCEPT_OP(x) +#endif +#define __func__ __FUNCTION__ +#else +#define BENCHMARK_NOEXCEPT +#define BENCHMARK_NOEXCEPT_OP(x) +#endif + namespace benchmark { namespace internal { typedef void(AbortHandlerT)(); -inline AbortHandlerT*& GetAbortHandler() { - static AbortHandlerT* handler = &std::abort; - return handler; -} +BENCHMARK_EXPORT +AbortHandlerT*& GetAbortHandler(); BENCHMARK_NORETURN inline void CallAbortHandler() { GetAbortHandler()(); diff -Nru benchmark-1.6.1/src/CMakeLists.txt benchmark-1.7.1/src/CMakeLists.txt --- benchmark-1.6.1/src/CMakeLists.txt 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/CMakeLists.txt 2022-11-11 14:01:03.000000000 +0000 @@ -25,12 +25,13 @@ SOVERSION ${GENERIC_LIB_SOVERSION} ) target_include_directories(benchmark PUBLIC - $) + $ +) # libpfm, if available if (HAVE_LIBPFM) target_link_libraries(benchmark PRIVATE pfm) - add_definitions(-DHAVE_LIBPFM) + target_compile_definitions(benchmark PRIVATE -DHAVE_LIBPFM) endif() # Link threads. @@ -53,6 +54,10 @@ target_link_libraries(benchmark PRIVATE kstat) endif() +if (NOT BUILD_SHARED_LIBS) + target_compile_definitions(benchmark PUBLIC -DBENCHMARK_STATIC_DEFINE) +endif() + # Benchmark main library add_library(benchmark_main "benchmark_main.cc") add_library(benchmark::benchmark_main ALIAS benchmark_main) @@ -60,10 +65,10 @@ OUTPUT_NAME "benchmark_main" VERSION ${GENERIC_LIB_VERSION} SOVERSION ${GENERIC_LIB_SOVERSION} + DEFINE_SYMBOL benchmark_EXPORTS ) target_link_libraries(benchmark_main PUBLIC benchmark::benchmark) - set(generated_dir "${PROJECT_BINARY_DIR}") set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake") @@ -107,6 +112,7 @@ install( DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark" + "${PROJECT_BINARY_DIR}/include/benchmark" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.*h") diff -Nru benchmark-1.6.1/src/commandlineflags.cc benchmark-1.7.1/src/commandlineflags.cc --- benchmark-1.6.1/src/commandlineflags.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/commandlineflags.cc 2022-11-11 14:01:03.000000000 +0000 @@ -121,12 +121,14 @@ } // namespace +BENCHMARK_EXPORT bool BoolFromEnv(const char* flag, bool default_val) { const std::string env_var = FlagToEnvVar(flag); const char* const value_str = getenv(env_var.c_str()); return value_str == nullptr ? default_val : IsTruthyFlagValue(value_str); } +BENCHMARK_EXPORT int32_t Int32FromEnv(const char* flag, int32_t default_val) { const std::string env_var = FlagToEnvVar(flag); const char* const value_str = getenv(env_var.c_str()); @@ -139,6 +141,7 @@ return value; } +BENCHMARK_EXPORT double DoubleFromEnv(const char* flag, double default_val) { const std::string env_var = FlagToEnvVar(flag); const char* const value_str = getenv(env_var.c_str()); @@ -151,12 +154,14 @@ return value; } +BENCHMARK_EXPORT const char* StringFromEnv(const char* flag, const char* default_val) { const std::string env_var = FlagToEnvVar(flag); const char* const value = getenv(env_var.c_str()); return value == nullptr ? default_val : value; } +BENCHMARK_EXPORT std::map KvPairsFromEnv( const char* flag, std::map default_val) { const std::string env_var = FlagToEnvVar(flag); @@ -201,6 +206,7 @@ return flag_end + 1; } +BENCHMARK_EXPORT bool ParseBoolFlag(const char* str, const char* flag, bool* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, true); @@ -213,6 +219,7 @@ return true; } +BENCHMARK_EXPORT bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, false); @@ -225,6 +232,7 @@ value); } +BENCHMARK_EXPORT bool ParseDoubleFlag(const char* str, const char* flag, double* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, false); @@ -237,6 +245,7 @@ value); } +BENCHMARK_EXPORT bool ParseStringFlag(const char* str, const char* flag, std::string* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, false); @@ -248,6 +257,7 @@ return true; } +BENCHMARK_EXPORT bool ParseKeyValueFlag(const char* str, const char* flag, std::map* value) { const char* const value_str = ParseFlagValue(str, flag, false); @@ -263,10 +273,12 @@ return true; } +BENCHMARK_EXPORT bool IsFlag(const char* str, const char* flag) { return (ParseFlagValue(str, flag, true) != nullptr); } +BENCHMARK_EXPORT bool IsTruthyFlagValue(const std::string& value) { if (value.size() == 1) { char v = value[0]; diff -Nru benchmark-1.6.1/src/commandlineflags.h benchmark-1.7.1/src/commandlineflags.h --- benchmark-1.6.1/src/commandlineflags.h 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/commandlineflags.h 2022-11-11 14:01:03.000000000 +0000 @@ -5,28 +5,33 @@ #include #include +#include "benchmark/export.h" + // Macro for referencing flags. #define FLAG(name) FLAGS_##name // Macros for declaring flags. -#define BM_DECLARE_bool(name) extern bool FLAG(name) -#define BM_DECLARE_int32(name) extern int32_t FLAG(name) -#define BM_DECLARE_double(name) extern double FLAG(name) -#define BM_DECLARE_string(name) extern std::string FLAG(name) +#define BM_DECLARE_bool(name) BENCHMARK_EXPORT extern bool FLAG(name) +#define BM_DECLARE_int32(name) BENCHMARK_EXPORT extern int32_t FLAG(name) +#define BM_DECLARE_double(name) BENCHMARK_EXPORT extern double FLAG(name) +#define BM_DECLARE_string(name) BENCHMARK_EXPORT extern std::string FLAG(name) #define BM_DECLARE_kvpairs(name) \ - extern std::map FLAG(name) + BENCHMARK_EXPORT extern std::map FLAG(name) // Macros for defining flags. #define BM_DEFINE_bool(name, default_val) \ - bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val) + BENCHMARK_EXPORT bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val) #define BM_DEFINE_int32(name, default_val) \ - int32_t FLAG(name) = benchmark::Int32FromEnv(#name, default_val) + BENCHMARK_EXPORT int32_t FLAG(name) = \ + benchmark::Int32FromEnv(#name, default_val) #define BM_DEFINE_double(name, default_val) \ - double FLAG(name) = benchmark::DoubleFromEnv(#name, default_val) + BENCHMARK_EXPORT double FLAG(name) = \ + benchmark::DoubleFromEnv(#name, default_val) #define BM_DEFINE_string(name, default_val) \ - std::string FLAG(name) = benchmark::StringFromEnv(#name, default_val) -#define BM_DEFINE_kvpairs(name, default_val) \ - std::map FLAG(name) = \ + BENCHMARK_EXPORT std::string FLAG(name) = \ + benchmark::StringFromEnv(#name, default_val) +#define BM_DEFINE_kvpairs(name, default_val) \ + BENCHMARK_EXPORT std::map FLAG(name) = \ benchmark::KvPairsFromEnv(#name, default_val) namespace benchmark { @@ -35,6 +40,7 @@ // // If the variable exists, returns IsTruthyFlagValue() value; if not, // returns the given default value. +BENCHMARK_EXPORT bool BoolFromEnv(const char* flag, bool default_val); // Parses an Int32 from the environment variable corresponding to the given @@ -42,6 +48,7 @@ // // If the variable exists, returns ParseInt32() value; if not, returns // the given default value. +BENCHMARK_EXPORT int32_t Int32FromEnv(const char* flag, int32_t default_val); // Parses an Double from the environment variable corresponding to the given @@ -49,6 +56,7 @@ // // If the variable exists, returns ParseDouble(); if not, returns // the given default value. +BENCHMARK_EXPORT double DoubleFromEnv(const char* flag, double default_val); // Parses a string from the environment variable corresponding to the given @@ -56,6 +64,7 @@ // // If variable exists, returns its value; if not, returns // the given default value. +BENCHMARK_EXPORT const char* StringFromEnv(const char* flag, const char* default_val); // Parses a set of kvpairs from the environment variable corresponding to the @@ -63,6 +72,7 @@ // // If variable exists, returns its value; if not, returns // the given default value. +BENCHMARK_EXPORT std::map KvPairsFromEnv( const char* flag, std::map default_val); @@ -75,40 +85,47 @@ // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. +BENCHMARK_EXPORT bool ParseBoolFlag(const char* str, const char* flag, bool* value); // Parses a string for an Int32 flag, in the form of "--flag=value". // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. +BENCHMARK_EXPORT bool ParseInt32Flag(const char* str, const char* flag, int32_t* value); // Parses a string for a Double flag, in the form of "--flag=value". // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. +BENCHMARK_EXPORT bool ParseDoubleFlag(const char* str, const char* flag, double* value); // Parses a string for a string flag, in the form of "--flag=value". // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. +BENCHMARK_EXPORT bool ParseStringFlag(const char* str, const char* flag, std::string* value); // Parses a string for a kvpairs flag in the form "--flag=key=value,key=value" // // On success, stores the value of the flag in *value and returns true. On // failure returns false, though *value may have been mutated. +BENCHMARK_EXPORT bool ParseKeyValueFlag(const char* str, const char* flag, std::map* value); // Returns true if the string matches the flag. +BENCHMARK_EXPORT bool IsFlag(const char* str, const char* flag); // Returns true unless value starts with one of: '0', 'f', 'F', 'n' or 'N', or // some non-alphanumeric character. Also returns false if the value matches // one of 'no', 'false', 'off' (case-insensitive). As a special case, also // returns true if value is the empty string. +BENCHMARK_EXPORT bool IsTruthyFlagValue(const std::string& value); } // end namespace benchmark diff -Nru benchmark-1.6.1/src/console_reporter.cc benchmark-1.7.1/src/console_reporter.cc --- benchmark-1.6.1/src/console_reporter.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/console_reporter.cc 2022-11-11 14:01:03.000000000 +0000 @@ -33,6 +33,7 @@ namespace benchmark { +BENCHMARK_EXPORT bool ConsoleReporter::ReportContext(const Context& context) { name_field_width_ = context.name_field_width; printed_header_ = false; @@ -52,6 +53,7 @@ return true; } +BENCHMARK_EXPORT void ConsoleReporter::PrintHeader(const Run& run) { std::string str = FormatString("%-*s %13s %15s %12s", static_cast(name_field_width_), @@ -69,6 +71,7 @@ GetOutputStream() << line << "\n" << str << "\n" << line << "\n"; } +BENCHMARK_EXPORT void ConsoleReporter::ReportRuns(const std::vector& reports) { for (const auto& run : reports) { // print the header: @@ -99,6 +102,9 @@ } static std::string FormatTime(double time) { + // For the time columns of the console printer 13 digits are reserved. One of + // them is a space and max two of them are the time unit (e.g ns). That puts + // us at 10 digits usable for the number. // Align decimal places... if (time < 1.0) { return FormatString("%10.3f", time); @@ -109,9 +115,15 @@ if (time < 100.0) { return FormatString("%10.1f", time); } + // Assuming the time ist at max 9.9999e+99 and we have 10 digits for the + // number, we get 10-1(.)-1(e)-1(sign)-2(exponent) = 5 digits to print. + if (time > 9999999999 /*max 10 digit number*/) { + return FormatString("%1.4e", time); + } return FormatString("%10.0f", time); } +BENCHMARK_EXPORT void ConsoleReporter::PrintRunData(const Run& result) { typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...); auto& Out = GetOutputStream(); diff -Nru benchmark-1.6.1/src/csv_reporter.cc benchmark-1.7.1/src/csv_reporter.cc --- benchmark-1.6.1/src/csv_reporter.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/csv_reporter.cc 2022-11-11 14:01:03.000000000 +0000 @@ -52,11 +52,13 @@ return '"' + tmp + '"'; } +BENCHMARK_EXPORT bool CSVReporter::ReportContext(const Context& context) { PrintBasicContext(&GetErrorStream(), context); return true; } +BENCHMARK_EXPORT void CSVReporter::ReportRuns(const std::vector& reports) { std::ostream& Out = GetOutputStream(); @@ -103,6 +105,7 @@ } } +BENCHMARK_EXPORT void CSVReporter::PrintRunData(const Run& run) { std::ostream& Out = GetOutputStream(); Out << CsvEscape(run.benchmark_name()) << ","; diff -Nru benchmark-1.6.1/src/cycleclock.h benchmark-1.7.1/src/cycleclock.h --- benchmark-1.6.1/src/cycleclock.h 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/cycleclock.h 2022-11-11 14:01:03.000000000 +0000 @@ -132,7 +132,7 @@ // Native Client does not provide any API to access cycle counter. // Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday - // because is provides nanosecond resolution (which is noticable at + // because is provides nanosecond resolution (which is noticeable at // least for PNaCl modules running on x86 Mac & Linux). // Initialize to always return 0 if clock_gettime fails. struct timespec ts = {0, 0}; @@ -173,7 +173,7 @@ struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; -#elif defined(__loongarch__) +#elif defined(__loongarch__) || defined(__csky__) struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; @@ -212,6 +212,10 @@ struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; +#elif defined(__hexagon__) + uint64_t pcycle; + asm volatile("%0 = C15:14" : "=r"(pcycle)); + return static_cast(pcycle); #else // The soft failover to a generic implementation is automatic only for ARM. // For other platforms the developer is expected to make an attempt to create diff -Nru benchmark-1.6.1/src/internal_macros.h benchmark-1.7.1/src/internal_macros.h --- benchmark-1.6.1/src/internal_macros.h 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/internal_macros.h 2022-11-11 14:01:03.000000000 +0000 @@ -1,8 +1,6 @@ #ifndef BENCHMARK_INTERNAL_MACROS_H_ #define BENCHMARK_INTERNAL_MACROS_H_ -#include "benchmark/benchmark.h" - /* Needed to detect STL */ #include @@ -44,6 +42,13 @@ #define BENCHMARK_OS_CYGWIN 1 #elif defined(_WIN32) #define BENCHMARK_OS_WINDOWS 1 + #if defined(WINAPI_FAMILY_PARTITION) + #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + #define BENCHMARK_OS_WINDOWS_WIN32 1 + #elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) + #define BENCHMARK_OS_WINDOWS_RT 1 + #endif + #endif #if defined(__MINGW32__) #define BENCHMARK_OS_MINGW 1 #endif @@ -80,6 +85,8 @@ #define BENCHMARK_OS_QNX 1 #elif defined(__MVS__) #define BENCHMARK_OS_ZOS 1 +#elif defined(__hexagon__) +#define BENCHMARK_OS_QURT 1 #endif #if defined(__ANDROID__) && defined(__GLIBCXX__) diff -Nru benchmark-1.6.1/src/json_reporter.cc benchmark-1.7.1/src/json_reporter.cc --- benchmark-1.6.1/src/json_reporter.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/json_reporter.cc 2022-11-11 14:01:03.000000000 +0000 @@ -28,10 +28,6 @@ #include "timers.h" namespace benchmark { -namespace internal { -extern std::map* global_context; -} - namespace { std::string StrEscape(const std::string& s) { @@ -89,12 +85,6 @@ return ss.str(); } -std::string FormatKV(std::string const& key, IterationCount value) { - std::stringstream ss; - ss << '"' << StrEscape(key) << "\": " << value; - return ss.str(); -} - std::string FormatKV(std::string const& key, double value) { std::stringstream ss; ss << '"' << StrEscape(key) << "\": "; @@ -184,8 +174,11 @@ #endif out << indent << FormatKV("library_build_type", build_type); - if (internal::global_context != nullptr) { - for (const auto& kv : *internal::global_context) { + std::map* global_context = + internal::GetGlobalContext(); + + if (global_context != nullptr) { + for (const auto& kv : *global_context) { out << ",\n"; out << indent << FormatKV(kv.first, kv.second); } diff -Nru benchmark-1.6.1/src/log.h benchmark-1.7.1/src/log.h --- benchmark-1.6.1/src/log.h 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/log.h 2022-11-11 14:01:03.000000000 +0000 @@ -4,7 +4,12 @@ #include #include -#include "benchmark/benchmark.h" +// NOTE: this is also defined in benchmark.h but we're trying to avoid a +// dependency. +// The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer. +#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) +#define BENCHMARK_HAS_CXX11 +#endif namespace benchmark { namespace internal { @@ -23,7 +28,16 @@ private: LogType(std::ostream* out) : out_(out) {} std::ostream* out_; - BENCHMARK_DISALLOW_COPY_AND_ASSIGN(LogType); + + // NOTE: we could use BENCHMARK_DISALLOW_COPY_AND_ASSIGN but we shouldn't have + // a dependency on benchmark.h from here. +#ifndef BENCHMARK_HAS_CXX11 + LogType(const LogType&); + LogType& operator=(const LogType&); +#else + LogType(const LogType&) = delete; + LogType& operator=(const LogType&) = delete; +#endif }; template @@ -47,13 +61,13 @@ } inline LogType& GetNullLogInstance() { - static LogType log(nullptr); - return log; + static LogType null_log((std::ostream*)nullptr); + return null_log; } inline LogType& GetErrorLogInstance() { - static LogType log(&std::clog); - return log; + static LogType error_log(&std::clog); + return error_log; } inline LogType& GetLogInstanceForLevel(int level) { diff -Nru benchmark-1.6.1/src/perf_counters.cc benchmark-1.7.1/src/perf_counters.cc --- benchmark-1.6.1/src/perf_counters.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/perf_counters.cc 2022-11-11 14:01:03.000000000 +0000 @@ -15,6 +15,7 @@ #include "perf_counters.h" #include +#include #include #if defined HAVE_LIBPFM @@ -104,7 +105,7 @@ return PerfCounters(counter_names, std::move(counter_ids)); } -PerfCounters::~PerfCounters() { +void PerfCounters::CloseCounters() const { if (counter_ids_.empty()) { return; } @@ -126,7 +127,44 @@ return NoCounters(); } -PerfCounters::~PerfCounters() = default; +void PerfCounters::CloseCounters() const {} #endif // defined HAVE_LIBPFM + +Mutex PerfCountersMeasurement::mutex_; +int PerfCountersMeasurement::ref_count_ = 0; +PerfCounters PerfCountersMeasurement::counters_ = PerfCounters::NoCounters(); + +PerfCountersMeasurement::PerfCountersMeasurement( + const std::vector& counter_names) + : start_values_(counter_names.size()), end_values_(counter_names.size()) { + MutexLock l(mutex_); + if (ref_count_ == 0) { + counters_ = PerfCounters::Create(counter_names); + } + // We chose to increment it even if `counters_` ends up invalid, + // so that we don't keep trying to create, and also since the dtor + // will decrement regardless of `counters_`'s validity + ++ref_count_; + + BM_CHECK(!counters_.IsValid() || counters_.names() == counter_names); +} + +PerfCountersMeasurement::~PerfCountersMeasurement() { + MutexLock l(mutex_); + --ref_count_; + if (ref_count_ == 0) { + counters_ = PerfCounters::NoCounters(); + } +} + +PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept { + if (this != &other) { + CloseCounters(); + + counter_ids_ = std::move(other.counter_ids_); + counter_names_ = std::move(other.counter_names_); + } + return *this; +} } // namespace internal } // namespace benchmark diff -Nru benchmark-1.6.1/src/perf_counters.h benchmark-1.7.1/src/perf_counters.h --- benchmark-1.6.1/src/perf_counters.h 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/perf_counters.h 2022-11-11 14:01:03.000000000 +0000 @@ -17,16 +17,24 @@ #include #include +#include #include #include "benchmark/benchmark.h" #include "check.h" #include "log.h" +#include "mutex.h" #ifndef BENCHMARK_OS_WINDOWS #include #endif +#if defined(_MSC_VER) +#pragma warning(push) +// C4251: needs to have dll-interface to be used by clients of class +#pragma warning(disable : 4251) +#endif + namespace benchmark { namespace internal { @@ -66,17 +74,19 @@ // Collect PMU counters. The object, once constructed, is ready to be used by // calling read(). PMU counter collection is enabled from the time create() is // called, to obtain the object, until the object's destructor is called. -class PerfCounters final { +class BENCHMARK_EXPORT PerfCounters final { public: // True iff this platform supports performance counters. static const bool kSupported; - bool IsValid() const { return is_valid_; } + bool IsValid() const { return !counter_names_.empty(); } static PerfCounters NoCounters() { return PerfCounters(); } - ~PerfCounters(); + ~PerfCounters() { CloseCounters(); } PerfCounters(PerfCounters&&) = default; PerfCounters(const PerfCounters&) = delete; + PerfCounters& operator=(PerfCounters&&) noexcept; + PerfCounters& operator=(const PerfCounters&) = delete; // Platform-specific implementations may choose to do some library // initialization here. @@ -111,55 +121,66 @@ private: PerfCounters(const std::vector& counter_names, std::vector&& counter_ids) - : counter_ids_(std::move(counter_ids)), - counter_names_(counter_names), - is_valid_(true) {} - PerfCounters() : is_valid_(false) {} + : counter_ids_(std::move(counter_ids)), counter_names_(counter_names) {} + PerfCounters() = default; + + void CloseCounters() const; std::vector counter_ids_; - const std::vector counter_names_; - const bool is_valid_; + std::vector counter_names_; }; // Typical usage of the above primitives. -class PerfCountersMeasurement final { +class BENCHMARK_EXPORT PerfCountersMeasurement final { public: - PerfCountersMeasurement(PerfCounters&& c) - : counters_(std::move(c)), - start_values_(counters_.IsValid() ? counters_.names().size() : 0), - end_values_(counters_.IsValid() ? counters_.names().size() : 0) {} + PerfCountersMeasurement(const std::vector& counter_names); + ~PerfCountersMeasurement(); - bool IsValid() const { return counters_.IsValid(); } + // The only way to get to `counters_` is after ctor-ing a + // `PerfCountersMeasurement`, which means that `counters_`'s state is, here, + // decided (either invalid or valid) and won't change again even if a ctor is + // concurrently running with this. This is preferring efficiency to + // maintainability, because the address of the static can be known at compile + // time. + bool IsValid() const { + MutexLock l(mutex_); + return counters_.IsValid(); + } BENCHMARK_ALWAYS_INLINE void Start() { assert(IsValid()); + MutexLock l(mutex_); // Tell the compiler to not move instructions above/below where we take // the snapshot. ClobberMemory(); - counters_.Snapshot(&start_values_); + valid_read_ &= counters_.Snapshot(&start_values_); ClobberMemory(); } - BENCHMARK_ALWAYS_INLINE std::vector> - StopAndGetMeasurements() { + BENCHMARK_ALWAYS_INLINE bool Stop( + std::vector>& measurements) { assert(IsValid()); + MutexLock l(mutex_); // Tell the compiler to not move instructions above/below where we take // the snapshot. ClobberMemory(); - counters_.Snapshot(&end_values_); + valid_read_ &= counters_.Snapshot(&end_values_); ClobberMemory(); - std::vector> ret; for (size_t i = 0; i < counters_.names().size(); ++i) { double measurement = static_cast(end_values_[i]) - static_cast(start_values_[i]); - ret.push_back({counters_.names()[i], measurement}); + measurements.push_back({counters_.names()[i], measurement}); } - return ret; + + return valid_read_; } private: - PerfCounters counters_; + static Mutex mutex_; + GUARDED_BY(mutex_) static int ref_count_; + GUARDED_BY(mutex_) static PerfCounters counters_; + bool valid_read_ = true; PerfCounterValues start_values_; PerfCounterValues end_values_; }; @@ -169,4 +190,8 @@ } // namespace internal } // namespace benchmark +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + #endif // BENCHMARK_PERF_COUNTERS_H diff -Nru benchmark-1.6.1/src/reporter.cc benchmark-1.7.1/src/reporter.cc --- benchmark-1.6.1/src/reporter.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/reporter.cc 2022-11-11 14:01:03.000000000 +0000 @@ -25,9 +25,6 @@ #include "timers.h" namespace benchmark { -namespace internal { -extern std::map *global_context; -} BenchmarkReporter::BenchmarkReporter() : output_stream_(&std::cout), error_stream_(&std::cerr) {} @@ -39,7 +36,11 @@ BM_CHECK(out) << "cannot be null"; auto &Out = *out; +#ifndef BENCHMARK_OS_QURT + // Date/time information is not available on QuRT. + // Attempting to get it via this call cause the binary to crash. Out << LocalDateTimeString() << "\n"; +#endif if (context.executable_name) Out << "Running " << context.executable_name << "\n"; @@ -67,8 +68,11 @@ Out << "\n"; } - if (internal::global_context != nullptr) { - for (const auto &kv : *internal::global_context) { + std::map *global_context = + internal::GetGlobalContext(); + + if (global_context != nullptr) { + for (const auto &kv : *global_context) { Out << kv.first << ": " << kv.second << "\n"; } } diff -Nru benchmark-1.6.1/src/statistics.cc benchmark-1.7.1/src/statistics.cc --- benchmark-1.6.1/src/statistics.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/statistics.cc 2022-11-11 14:01:03.000000000 +0000 @@ -118,11 +118,13 @@ for (auto const& cnt : r.counters) { auto it = counter_stats.find(cnt.first); if (it == counter_stats.end()) { - counter_stats.insert({cnt.first, {cnt.second, std::vector{}}}); - it = counter_stats.find(cnt.first); + it = counter_stats + .emplace(cnt.first, + CounterStat{cnt.second, std::vector{}}) + .first; it->second.s.reserve(reports.size()); } else { - BM_CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags); + BM_CHECK_EQ(it->second.c.flags, cnt.second.flags); } } } diff -Nru benchmark-1.6.1/src/statistics.h benchmark-1.7.1/src/statistics.h --- benchmark-1.6.1/src/statistics.h 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/statistics.h 2022-11-11 14:01:03.000000000 +0000 @@ -25,12 +25,17 @@ // Return a vector containing the mean, median and standard devation information // (and any user-specified info) for the specified list of reports. If 'reports' // contains less than two non-errored runs an empty vector is returned +BENCHMARK_EXPORT std::vector ComputeStats( const std::vector& reports); +BENCHMARK_EXPORT double StatisticsMean(const std::vector& v); +BENCHMARK_EXPORT double StatisticsMedian(const std::vector& v); +BENCHMARK_EXPORT double StatisticsStdDev(const std::vector& v); +BENCHMARK_EXPORT double StatisticsCV(const std::vector& v); } // end namespace benchmark diff -Nru benchmark-1.6.1/src/string_util.cc benchmark-1.7.1/src/string_util.cc --- benchmark-1.6.1/src/string_util.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/string_util.cc 2022-11-11 14:01:03.000000000 +0000 @@ -133,21 +133,21 @@ // TODO(ericwf): use std::array for first attempt to avoid one memory // allocation guess what the size might be std::array local_buff; - std::size_t size = local_buff.size(); + // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation // in the android-ndk - auto ret = vsnprintf(local_buff.data(), size, msg, args_cp); + auto ret = vsnprintf(local_buff.data(), local_buff.size(), msg, args_cp); va_end(args_cp); // handle empty expansion if (ret == 0) return std::string{}; - if (static_cast(ret) < size) + if (static_cast(ret) < local_buff.size()) return std::string(local_buff.data()); // we did not provide a long enough buffer on our first attempt. // add 1 to size to account for null-byte in size cast to prevent overflow - size = static_cast(ret) + 1; + std::size_t size = static_cast(ret) + 1; auto buff_ptr = std::unique_ptr(new char[size]); // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation // in the android-ndk diff -Nru benchmark-1.6.1/src/string_util.h benchmark-1.7.1/src/string_util.h --- benchmark-1.6.1/src/string_util.h 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/string_util.h 2022-11-11 14:01:03.000000000 +0000 @@ -4,7 +4,10 @@ #include #include #include +#include +#include "benchmark/export.h" +#include "check.h" #include "internal_macros.h" namespace benchmark { @@ -13,6 +16,7 @@ std::string HumanReadableNumber(double n, double one_k = 1024.0); +BENCHMARK_EXPORT #if defined(__MINGW32__) __attribute__((format(__MINGW_PRINTF_FORMAT, 1, 2))) #elif defined(__GNUC__) @@ -38,6 +42,7 @@ return ss.str(); } +BENCHMARK_EXPORT std::vector StrSplit(const std::string& str, char delim); // Disable lint checking for this block since it re-implements C functions. diff -Nru benchmark-1.6.1/src/sysinfo.cc benchmark-1.7.1/src/sysinfo.cc --- benchmark-1.6.1/src/sysinfo.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/sysinfo.cc 2022-11-11 14:01:03.000000000 +0000 @@ -23,7 +23,7 @@ #include #else #include -#ifndef BENCHMARK_OS_FUCHSIA +#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) #include #endif #include @@ -38,10 +38,14 @@ #endif #if defined(BENCHMARK_OS_SOLARIS) #include +#include #endif #if defined(BENCHMARK_OS_QNX) #include #endif +#if defined(BENCHMARK_OS_QURT) +#include +#endif #include #include @@ -91,67 +95,59 @@ /// `sysctl` with the result type it's to be interpreted as. struct ValueUnion { union DataT { - uint32_t uint32_value; - uint64_t uint64_value; + int32_t int32_value; + int64_t int64_value; // For correct aliasing of union members from bytes. char bytes[8]; }; using DataPtr = std::unique_ptr; // The size of the data union member + its trailing array size. - size_t Size; - DataPtr Buff; + std::size_t size; + DataPtr buff; public: - ValueUnion() : Size(0), Buff(nullptr, &std::free) {} + ValueUnion() : size(0), buff(nullptr, &std::free) {} - explicit ValueUnion(size_t BuffSize) - : Size(sizeof(DataT) + BuffSize), - Buff(::new (std::malloc(Size)) DataT(), &std::free) {} + explicit ValueUnion(std::size_t buff_size) + : size(sizeof(DataT) + buff_size), + buff(::new (std::malloc(size)) DataT(), &std::free) {} ValueUnion(ValueUnion&& other) = default; - explicit operator bool() const { return bool(Buff); } + explicit operator bool() const { return bool(buff); } - char* data() const { return Buff->bytes; } + char* data() const { return buff->bytes; } std::string GetAsString() const { return std::string(data()); } int64_t GetAsInteger() const { - if (Size == sizeof(Buff->uint32_value)) - return static_cast(Buff->uint32_value); - else if (Size == sizeof(Buff->uint64_value)) - return static_cast(Buff->uint64_value); - BENCHMARK_UNREACHABLE(); - } - - uint64_t GetAsUnsigned() const { - if (Size == sizeof(Buff->uint32_value)) - return Buff->uint32_value; - else if (Size == sizeof(Buff->uint64_value)) - return Buff->uint64_value; + if (size == sizeof(buff->int32_value)) + return buff->int32_value; + else if (size == sizeof(buff->int64_value)) + return buff->int64_value; BENCHMARK_UNREACHABLE(); } template std::array GetAsArray() { - const int ArrSize = sizeof(T) * N; - BM_CHECK_LE(ArrSize, Size); - std::array Arr; - std::memcpy(Arr.data(), data(), ArrSize); - return Arr; + const int arr_size = sizeof(T) * N; + BM_CHECK_LE(arr_size, size); + std::array arr; + std::memcpy(arr.data(), data(), arr_size); + return arr; } }; -ValueUnion GetSysctlImp(std::string const& Name) { +ValueUnion GetSysctlImp(std::string const& name) { #if defined BENCHMARK_OS_OPENBSD int mib[2]; mib[0] = CTL_HW; - if ((Name == "hw.ncpu") || (Name == "hw.cpuspeed")) { + if ((name == "hw.ncpu") || (name == "hw.cpuspeed")) { ValueUnion buff(sizeof(int)); - if (Name == "hw.ncpu") { + if (name == "hw.ncpu") { mib[1] = HW_NCPU; } else { mib[1] = HW_CPUSPEED; @@ -164,41 +160,41 @@ } return ValueUnion(); #else - size_t CurBuffSize = 0; - if (sysctlbyname(Name.c_str(), nullptr, &CurBuffSize, nullptr, 0) == -1) + std::size_t cur_buff_size = 0; + if (sysctlbyname(name.c_str(), nullptr, &cur_buff_size, nullptr, 0) == -1) return ValueUnion(); - ValueUnion buff(CurBuffSize); - if (sysctlbyname(Name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0) + ValueUnion buff(cur_buff_size); + if (sysctlbyname(name.c_str(), buff.data(), &buff.size, nullptr, 0) == 0) return buff; return ValueUnion(); #endif } BENCHMARK_MAYBE_UNUSED -bool GetSysctl(std::string const& Name, std::string* Out) { - Out->clear(); - auto Buff = GetSysctlImp(Name); - if (!Buff) return false; - Out->assign(Buff.data()); +bool GetSysctl(std::string const& name, std::string* out) { + out->clear(); + auto buff = GetSysctlImp(name); + if (!buff) return false; + out->assign(buff.data()); return true; } template ::value>::type> -bool GetSysctl(std::string const& Name, Tp* Out) { - *Out = 0; - auto Buff = GetSysctlImp(Name); - if (!Buff) return false; - *Out = static_cast(Buff.GetAsUnsigned()); +bool GetSysctl(std::string const& name, Tp* out) { + *out = 0; + auto buff = GetSysctlImp(name); + if (!buff) return false; + *out = static_cast(buff.GetAsInteger()); return true; } template -bool GetSysctl(std::string const& Name, std::array* Out) { - auto Buff = GetSysctlImp(Name); - if (!Buff) return false; - *Out = Buff.GetAsArray(); +bool GetSysctl(std::string const& name, std::array* out) { + auto buff = GetSysctlImp(name); + if (!buff) return false; + *out = buff.GetAsArray(); return true; } #endif @@ -234,21 +230,21 @@ #endif } -int CountSetBitsInCPUMap(std::string Val) { - auto CountBits = [](std::string Part) { +int CountSetBitsInCPUMap(std::string val) { + auto CountBits = [](std::string part) { using CPUMask = std::bitset; - Part = "0x" + Part; - CPUMask Mask(benchmark::stoul(Part, nullptr, 16)); - return static_cast(Mask.count()); + part = "0x" + part; + CPUMask mask(benchmark::stoul(part, nullptr, 16)); + return static_cast(mask.count()); }; - size_t Pos; + std::size_t pos; int total = 0; - while ((Pos = Val.find(',')) != std::string::npos) { - total += CountBits(Val.substr(0, Pos)); - Val = Val.substr(Pos + 1); + while ((pos = val.find(',')) != std::string::npos) { + total += CountBits(val.substr(0, pos)); + val = val.substr(pos + 1); } - if (!Val.empty()) { - total += CountBits(Val); + if (!val.empty()) { + total += CountBits(val); } return total; } @@ -257,16 +253,16 @@ std::vector GetCacheSizesFromKVFS() { std::vector res; std::string dir = "/sys/devices/system/cpu/cpu0/cache/"; - int Idx = 0; + int idx = 0; while (true) { CPUInfo::CacheInfo info; - std::string FPath = StrCat(dir, "index", Idx++, "/"); - std::ifstream f(StrCat(FPath, "size").c_str()); + std::string fpath = StrCat(dir, "index", idx++, "/"); + std::ifstream f(StrCat(fpath, "size").c_str()); if (!f.is_open()) break; std::string suffix; f >> info.size; if (f.fail()) - PrintErrorAndDie("Failed while reading file '", FPath, "size'"); + PrintErrorAndDie("Failed while reading file '", fpath, "size'"); if (f.good()) { f >> suffix; if (f.bad()) @@ -277,13 +273,13 @@ else if (suffix == "K") info.size *= 1024; } - if (!ReadFromFile(StrCat(FPath, "type"), &info.type)) - PrintErrorAndDie("Failed to read from file ", FPath, "type"); - if (!ReadFromFile(StrCat(FPath, "level"), &info.level)) - PrintErrorAndDie("Failed to read from file ", FPath, "level"); + if (!ReadFromFile(StrCat(fpath, "type"), &info.type)) + PrintErrorAndDie("Failed to read from file ", fpath, "type"); + if (!ReadFromFile(StrCat(fpath, "level"), &info.level)) + PrintErrorAndDie("Failed to read from file ", fpath, "level"); std::string map_str; - if (!ReadFromFile(StrCat(FPath, "shared_cpu_map"), &map_str)) - PrintErrorAndDie("Failed to read from file ", FPath, "shared_cpu_map"); + if (!ReadFromFile(StrCat(fpath, "shared_cpu_map"), &map_str)) + PrintErrorAndDie("Failed to read from file ", fpath, "shared_cpu_map"); info.num_sharing = CountSetBitsInCPUMap(map_str); res.push_back(info); } @@ -294,26 +290,26 @@ #ifdef BENCHMARK_OS_MACOSX std::vector GetCacheSizesMacOSX() { std::vector res; - std::array CacheCounts{{0, 0, 0, 0}}; - GetSysctl("hw.cacheconfig", &CacheCounts); + std::array cache_counts{{0, 0, 0, 0}}; + GetSysctl("hw.cacheconfig", &cache_counts); struct { std::string name; std::string type; int level; - uint64_t num_sharing; - } Cases[] = {{"hw.l1dcachesize", "Data", 1, CacheCounts[1]}, - {"hw.l1icachesize", "Instruction", 1, CacheCounts[1]}, - {"hw.l2cachesize", "Unified", 2, CacheCounts[2]}, - {"hw.l3cachesize", "Unified", 3, CacheCounts[3]}}; - for (auto& C : Cases) { + int num_sharing; + } cases[] = {{"hw.l1dcachesize", "Data", 1, cache_counts[1]}, + {"hw.l1icachesize", "Instruction", 1, cache_counts[1]}, + {"hw.l2cachesize", "Unified", 2, cache_counts[2]}, + {"hw.l3cachesize", "Unified", 3, cache_counts[3]}}; + for (auto& c : cases) { int val; - if (!GetSysctl(C.name, &val)) continue; + if (!GetSysctl(c.name, &val)) continue; CPUInfo::CacheInfo info; - info.type = C.type; - info.level = C.level; + info.type = c.type; + info.level = c.level; info.size = val; - info.num_sharing = static_cast(C.num_sharing); + info.num_sharing = c.num_sharing; res.push_back(std::move(info)); } return res; @@ -338,16 +334,16 @@ for (; it != end; ++it) { if (it->Relationship != RelationCache) continue; using BitSet = std::bitset; - BitSet B(it->ProcessorMask); + BitSet b(it->ProcessorMask); // To prevent duplicates, only consider caches where CPU 0 is specified - if (!B.test(0)) continue; - CInfo* Cache = &it->Cache; + if (!b.test(0)) continue; + const CInfo& cache = it->Cache; CPUInfo::CacheInfo C; - C.num_sharing = static_cast(B.count()); - C.level = Cache->Level; - C.size = Cache->Size; + C.num_sharing = static_cast(b.count()); + C.level = cache.Level; + C.size = cache.Size; C.type = "Unknown"; - switch (Cache->Type) { + switch (cache.Type) { case CacheUnified: C.type = "Unified"; break; @@ -410,6 +406,8 @@ return GetCacheSizesWindows(); #elif defined(BENCHMARK_OS_QNX) return GetCacheSizesQNX(); +#elif defined(BENCHMARK_OS_QURT) + return std::vector(); #else return GetCacheSizesFromKVFS(); #endif @@ -418,21 +416,37 @@ std::string GetSystemName() { #if defined(BENCHMARK_OS_WINDOWS) std::string str; - const unsigned COUNT = MAX_COMPUTERNAME_LENGTH + 1; + static constexpr int COUNT = MAX_COMPUTERNAME_LENGTH + 1; TCHAR hostname[COUNT] = {'\0'}; DWORD DWCOUNT = COUNT; if (!GetComputerName(hostname, &DWCOUNT)) return std::string(""); #ifndef UNICODE str = std::string(hostname, DWCOUNT); #else - // Using wstring_convert, Is deprecated in C++17 - using convert_type = std::codecvt_utf8; - std::wstring_convert converter; - std::wstring wStr(hostname, DWCOUNT); - str = converter.to_bytes(wStr); + std::vector converted; + // Find the length first. + int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, hostname, + DWCOUNT, converted.begin(), 0); + // TODO: Report error from GetLastError()? + if (len == 0) return std::string(""); + converted.reserve(len + 1); + + len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, hostname, DWCOUNT, + converted.begin(), converted.size()); + // TODO: Report error from GetLastError()? + if (len == 0) return std::string(""); + str = std::string(converted.data()); #endif return str; -#else // defined(BENCHMARK_OS_WINDOWS) +#elif defined(BENCHMARK_OS_QURT) + std::string str = "Hexagon DSP"; + qurt_arch_version_t arch_version_struct; + if (qurt_sysenv_get_arch_version(&arch_version_struct) == QURT_EOK) { + str += " v"; + str += std::to_string(arch_version_struct.arch_version); + } + return str; +#else #ifndef HOST_NAME_MAX #ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac Doesnt have HOST_NAME_MAX defined #define HOST_NAME_MAX 64 @@ -442,6 +456,8 @@ #define HOST_NAME_MAX 154 #elif defined(BENCHMARK_OS_RTEMS) #define HOST_NAME_MAX 256 +#elif defined(BENCHMARK_OS_SOLARIS) +#define HOST_NAME_MAX MAXHOSTNAMELEN #else #pragma message("HOST_NAME_MAX not defined. using 64") #define HOST_NAME_MAX 64 @@ -456,8 +472,8 @@ int GetNumCPUs() { #ifdef BENCHMARK_HAS_SYSCTL - int NumCPU = -1; - if (GetSysctl("hw.ncpu", &NumCPU)) return NumCPU; + int num_cpu = -1; + if (GetSysctl("hw.ncpu", &num_cpu)) return num_cpu; fprintf(stderr, "Err: %s\n", strerror(errno)); std::exit(EXIT_FAILURE); #elif defined(BENCHMARK_OS_WINDOWS) @@ -471,17 +487,23 @@ // group #elif defined(BENCHMARK_OS_SOLARIS) // Returns -1 in case of a failure. - int NumCPU = sysconf(_SC_NPROCESSORS_ONLN); - if (NumCPU < 0) { + long num_cpu = sysconf(_SC_NPROCESSORS_ONLN); + if (num_cpu < 0) { fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n", strerror(errno)); } - return NumCPU; + return (int)num_cpu; #elif defined(BENCHMARK_OS_QNX) return static_cast(_syspage_ptr->num_cpu); +#elif defined(BENCHMARK_OS_QURT) + qurt_sysenv_max_hthreads_t hardware_threads; + if (qurt_sysenv_get_max_hw_threads(&hardware_threads) != QURT_EOK) { + hardware_threads.max_hthreads = 1; + } + return hardware_threads.max_hthreads; #else - int NumCPUs = 0; - int MaxID = -1; + int num_cpus = 0; + int max_id = -1; std::ifstream f("/proc/cpuinfo"); if (!f.is_open()) { std::cerr << "failed to open /proc/cpuinfo\n"; @@ -491,21 +513,21 @@ std::string ln; while (std::getline(f, ln)) { if (ln.empty()) continue; - size_t SplitIdx = ln.find(':'); + std::size_t split_idx = ln.find(':'); std::string value; #if defined(__s390__) // s390 has another format in /proc/cpuinfo // it needs to be parsed differently - if (SplitIdx != std::string::npos) - value = ln.substr(Key.size() + 1, SplitIdx - Key.size() - 1); + if (split_idx != std::string::npos) + value = ln.substr(Key.size() + 1, split_idx - Key.size() - 1); #else - if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); + if (split_idx != std::string::npos) value = ln.substr(split_idx + 1); #endif if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) { - NumCPUs++; + num_cpus++; if (!value.empty()) { - int CurID = benchmark::stoi(value); - MaxID = std::max(CurID, MaxID); + const int cur_id = benchmark::stoi(value); + max_id = std::max(cur_id, max_id); } } } @@ -519,12 +541,12 @@ } f.close(); - if ((MaxID + 1) != NumCPUs) { + if ((max_id + 1) != num_cpus) { fprintf(stderr, "CPU ID assignments in /proc/cpuinfo seem messed up." " This is usually caused by a bad BIOS.\n"); } - return NumCPUs; + return num_cpus; #endif BENCHMARK_UNREACHABLE(); } @@ -569,7 +591,7 @@ return error_value; } - auto startsWithKey = [](std::string const& Value, std::string const& Key) { + auto StartsWithKey = [](std::string const& Value, std::string const& Key) { if (Key.size() > Value.size()) return false; auto Cmp = [&](char X, char Y) { return std::tolower(X) == std::tolower(Y); @@ -580,18 +602,18 @@ std::string ln; while (std::getline(f, ln)) { if (ln.empty()) continue; - size_t SplitIdx = ln.find(':'); + std::size_t split_idx = ln.find(':'); std::string value; - if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); + if (split_idx != std::string::npos) value = ln.substr(split_idx + 1); // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only // accept positive values. Some environments (virtual machines) report zero, // which would cause infinite looping in WallTime_Init. - if (startsWithKey(ln, "cpu MHz")) { + if (StartsWithKey(ln, "cpu MHz")) { if (!value.empty()) { double cycles_per_second = benchmark::stod(value) * 1000000.0; if (cycles_per_second > 0) return cycles_per_second; } - } else if (startsWithKey(ln, "bogomips")) { + } else if (StartsWithKey(ln, "bogomips")) { if (!value.empty()) { bogo_clock = benchmark::stod(value) * 1000000.0; if (bogo_clock < 0.0) bogo_clock = error_value; @@ -613,7 +635,7 @@ if (bogo_clock >= 0.0) return bogo_clock; #elif defined BENCHMARK_HAS_SYSCTL - constexpr auto* FreqStr = + constexpr auto* freqStr = #if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD) "machdep.tsc_freq"; #elif defined BENCHMARK_OS_OPENBSD @@ -625,14 +647,17 @@ #endif unsigned long long hz = 0; #if defined BENCHMARK_OS_OPENBSD - if (GetSysctl(FreqStr, &hz)) return hz * 1000000; + if (GetSysctl(freqStr, &hz)) return hz * 1000000; #else - if (GetSysctl(FreqStr, &hz)) return hz; + if (GetSysctl(freqStr, &hz)) return hz; #endif fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", - FreqStr, strerror(errno)); + freqStr, strerror(errno)); + fprintf(stderr, + "This does not affect benchmark measurements, only the " + "metadata output.\n"); -#elif defined BENCHMARK_OS_WINDOWS +#elif defined BENCHMARK_OS_WINDOWS_WIN32 // In NT, read MHz from the registry. If we fail to do so or we're in win9x // then make a crude estimate. DWORD data, data_size = sizeof(data); @@ -649,7 +674,8 @@ std::cerr << "failed to open /dev/kstat\n"; return -1; } - kstat_t* ksp = kstat_lookup(kc, (char*)"cpu_info", -1, (char*)"cpu_info0"); + kstat_t* ksp = kstat_lookup(kc, const_cast("cpu_info"), -1, + const_cast("cpu_info0")); if (!ksp) { std::cerr << "failed to lookup in /dev/kstat\n"; return -1; @@ -658,8 +684,8 @@ std::cerr << "failed to read from /dev/kstat\n"; return -1; } - kstat_named_t* knp = - (kstat_named_t*)kstat_data_lookup(ksp, (char*)"current_clock_Hz"); + kstat_named_t* knp = (kstat_named_t*)kstat_data_lookup( + ksp, const_cast("current_clock_Hz")); if (!knp) { std::cerr << "failed to lookup data in /dev/kstat\n"; return -1; @@ -675,9 +701,12 @@ #elif defined(BENCHMARK_OS_QNX) return static_cast((int64_t)(SYSPAGE_ENTRY(cpuinfo)->speed) * (int64_t)(1000 * 1000)); +#elif defined(BENCHMARK_OS_QURT) + // QuRT doesn't provide any API to query Hexagon frequency. + return 1000000000; #endif // If we've fallen through, attempt to roughly estimate the CPU clock rate. - const int estimate_time_ms = 1000; + static constexpr int estimate_time_ms = 1000; const auto start_ticks = cycleclock::Now(); SleepForMilliseconds(estimate_time_ms); return static_cast(cycleclock::Now() - start_ticks); @@ -688,7 +717,7 @@ defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD || \ defined BENCHMARK_OS_OPENBSD || defined BENCHMARK_OS_DRAGONFLY) && \ !defined(__ANDROID__) - constexpr int kMaxSamples = 3; + static constexpr int kMaxSamples = 3; std::vector res(kMaxSamples, 0.0); const int nelem = getloadavg(res.data(), kMaxSamples); if (nelem < 1) { diff -Nru benchmark-1.6.1/src/timers.cc benchmark-1.7.1/src/timers.cc --- benchmark-1.6.1/src/timers.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/src/timers.cc 2022-11-11 14:01:03.000000000 +0000 @@ -23,7 +23,7 @@ #include #else #include -#ifndef BENCHMARK_OS_FUCHSIA +#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) #include #endif #include @@ -38,6 +38,9 @@ #include #include #endif +#if defined(BENCHMARK_OS_QURT) +#include +#endif #endif #ifdef BENCHMARK_OS_EMSCRIPTEN @@ -79,7 +82,7 @@ static_cast(user.QuadPart)) * 1e-7; } -#elif !defined(BENCHMARK_OS_FUCHSIA) +#elif !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) double MakeTime(struct rusage const& ru) { return (static_cast(ru.ru_utime.tv_sec) + static_cast(ru.ru_utime.tv_usec) * 1e-6 + @@ -119,11 +122,15 @@ &user_time)) return MakeTime(kernel_time, user_time); DiagnoseAndExit("GetProccessTimes() failed"); +#elif defined(BENCHMARK_OS_QURT) + return static_cast( + qurt_timer_timetick_to_us(qurt_timer_get_ticks())) * + 1.0e-6; #elif defined(BENCHMARK_OS_EMSCRIPTEN) // clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) returns 0 on Emscripten. // Use Emscripten-specific API. Reported CPU time would be exactly the // same as total time, but this is ok because there aren't long-latency - // syncronous system calls in Emscripten. + // synchronous system calls in Emscripten. return emscripten_get_now() * 1e-3; #elif defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX) // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. @@ -149,6 +156,10 @@ GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time, &user_time); return MakeTime(kernel_time, user_time); +#elif defined(BENCHMARK_OS_QURT) + return static_cast( + qurt_timer_timetick_to_us(qurt_timer_get_ticks())) * + 1.0e-6; #elif defined(BENCHMARK_OS_MACOSX) // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. // See https://github.com/google/benchmark/pull/292 diff -Nru benchmark-1.6.1/test/AssemblyTests.cmake benchmark-1.7.1/test/AssemblyTests.cmake --- benchmark-1.6.1/test/AssemblyTests.cmake 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/AssemblyTests.cmake 2022-11-11 14:01:03.000000000 +0000 @@ -1,3 +1,23 @@ +set(CLANG_SUPPORTED_VERSION "5.0.0") +set(GCC_SUPPORTED_VERSION "5.5.0") + +if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL ${CLANG_SUPPORTED_VERSION}) + message (WARNING + "Unsupported Clang version " ${CMAKE_CXX_COMPILER_VERSION} + ". Expected is " ${CLANG_SUPPORTED_VERSION} + ". Assembly tests may be broken.") + endif() +elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU") + if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL ${GCC_SUPPORTED_VERSION}) + message (WARNING + "Unsupported GCC version " ${CMAKE_CXX_COMPILER_VERSION} + ". Expected is " ${GCC_SUPPORTED_VERSION} + ". Assembly tests may be broken.") + endif() +else() + message (WARNING "Unsupported compiler. Assembly tests may be broken.") +endif() include(split_list) @@ -23,6 +43,7 @@ macro(add_filecheck_test name) cmake_parse_arguments(ARG "" "" "CHECK_PREFIXES" ${ARGV}) add_library(${name} OBJECT ${name}.cc) + target_link_libraries(${name} PRIVATE benchmark::benchmark) set_target_properties(${name} PROPERTIES COMPILE_FLAGS "-S ${ASM_TEST_FLAGS}") set(ASM_OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/${name}.s") add_custom_target(copy_${name} ALL diff -Nru benchmark-1.6.1/test/basic_test.cc benchmark-1.7.1/test/basic_test.cc --- benchmark-1.6.1/test/basic_test.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/basic_test.cc 2022-11-11 14:01:03.000000000 +0000 @@ -147,7 +147,7 @@ auto arg = state.range(0); T sum = 0; for (auto _ : state) { - sum += arg; + sum += static_cast(arg); } } BENCHMARK(BM_OneTemplateFunc)->Arg(1); @@ -159,8 +159,8 @@ A sum = 0; B prod = 1; for (auto _ : state) { - sum += arg; - prod *= arg; + sum += static_cast(arg); + prod *= static_cast(arg); } } BENCHMARK(BM_TwoTemplateFunc)->Arg(1); diff -Nru benchmark-1.6.1/test/benchmark_gtest.cc benchmark-1.7.1/test/benchmark_gtest.cc --- benchmark-1.6.1/test/benchmark_gtest.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/benchmark_gtest.cc 2022-11-11 14:01:03.000000000 +0000 @@ -3,12 +3,12 @@ #include #include "../src/benchmark_register.h" +#include "benchmark/benchmark.h" #include "gmock/gmock.h" #include "gtest/gtest.h" namespace benchmark { namespace internal { -extern std::map* global_context; namespace { @@ -38,8 +38,9 @@ TEST(AddRangeTest, FullRange8) { std::vector dst; - AddRange(&dst, int8_t{1}, std::numeric_limits::max(), 8); - EXPECT_THAT(dst, testing::ElementsAre(1, 8, 64, 127)); + AddRange(&dst, int8_t{1}, std::numeric_limits::max(), int8_t{8}); + EXPECT_THAT( + dst, testing::ElementsAre(int8_t{1}, int8_t{8}, int8_t{64}, int8_t{127})); } TEST(AddRangeTest, FullRange64) { @@ -129,11 +130,13 @@ TEST(AddRangeTest, Simple8) { std::vector dst; - AddRange(&dst, 1, 8, 2); - EXPECT_THAT(dst, testing::ElementsAre(1, 2, 4, 8)); + AddRange(&dst, int8_t{1}, int8_t{8}, int8_t{2}); + EXPECT_THAT(dst, + testing::ElementsAre(int8_t{1}, int8_t{2}, int8_t{4}, int8_t{8})); } TEST(AddCustomContext, Simple) { + std::map *&global_context = GetGlobalContext(); EXPECT_THAT(global_context, nullptr); AddCustomContext("foo", "bar"); @@ -148,6 +151,7 @@ } TEST(AddCustomContext, DuplicateKey) { + std::map *&global_context = GetGlobalContext(); EXPECT_THAT(global_context, nullptr); AddCustomContext("foo", "bar"); diff -Nru benchmark-1.6.1/test/benchmark_name_gtest.cc benchmark-1.7.1/test/benchmark_name_gtest.cc --- benchmark-1.6.1/test/benchmark_name_gtest.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/benchmark_name_gtest.cc 2022-11-11 14:01:03.000000000 +0000 @@ -32,6 +32,14 @@ EXPECT_EQ(name.str(), "function_name/some_args:3/4/min_time:3.4s"); } +TEST(BenchmarkNameTest, MinWarmUpTime) { + auto name = BenchmarkName(); + name.function_name = "function_name"; + name.args = "some_args:3/4"; + name.min_warmup_time = "min_warmup_time:3.5s"; + EXPECT_EQ(name.str(), "function_name/some_args:3/4/min_warmup_time:3.5s"); +} + TEST(BenchmarkNameTest, Iterations) { auto name = BenchmarkName(); name.function_name = "function_name"; diff -Nru benchmark-1.6.1/test/benchmark_random_interleaving_gtest.cc benchmark-1.7.1/test/benchmark_random_interleaving_gtest.cc --- benchmark-1.6.1/test/benchmark_random_interleaving_gtest.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/benchmark_random_interleaving_gtest.cc 2022-11-11 14:01:03.000000000 +0000 @@ -51,10 +51,9 @@ void Execute(const std::string& pattern) { queue->Clear(); - BenchmarkReporter* reporter = new NullReporter; + std::unique_ptr reporter(new NullReporter()); FLAGS_benchmark_filter = pattern; - RunSpecifiedBenchmarks(reporter); - delete reporter; + RunSpecifiedBenchmarks(reporter.get()); queue->Put("DONE"); // End marker } diff -Nru benchmark-1.6.1/test/benchmark_setup_teardown_test.cc benchmark-1.7.1/test/benchmark_setup_teardown_test.cc --- benchmark-1.6.1/test/benchmark_setup_teardown_test.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/benchmark_setup_teardown_test.cc 2022-11-11 14:01:03.000000000 +0000 @@ -10,19 +10,19 @@ // Test that Setup() and Teardown() are called exactly once // for each benchmark run (single-threaded). -namespace single { +namespace singlethreaded { static int setup_call = 0; static int teardown_call = 0; -} // namespace single +} // namespace singlethreaded static void DoSetup1(const benchmark::State& state) { - ++single::setup_call; + ++singlethreaded::setup_call; // Setup/Teardown should never be called with any thread_idx != 0. assert(state.thread_index() == 0); } static void DoTeardown1(const benchmark::State& state) { - ++single::teardown_call; + ++singlethreaded::teardown_call; assert(state.thread_index() == 0); } @@ -134,8 +134,8 @@ assert(ret > 0); // Setup/Teardown is called once for each arg group (1,3,5,7). - assert(single::setup_call == 4); - assert(single::teardown_call == 4); + assert(singlethreaded::setup_call == 4); + assert(singlethreaded::teardown_call == 4); // 3 group of threads calling this function (3,5,10). assert(concurrent::setup_call.load(std::memory_order_relaxed) == 3); diff -Nru benchmark-1.6.1/test/benchmark_test.cc benchmark-1.7.1/test/benchmark_test.cc --- benchmark-1.6.1/test/benchmark_test.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/benchmark_test.cc 2022-11-11 14:01:03.000000000 +0000 @@ -26,7 +26,7 @@ namespace { -int BENCHMARK_NOINLINE Factorial(uint32_t n) { +int BENCHMARK_NOINLINE Factorial(int n) { return (n == 1) ? 1 : n * Factorial(n - 1); } @@ -90,7 +90,8 @@ for (int j = 0; j < state.range(1); ++j) data.insert(rand()); } state.SetItemsProcessed(state.iterations() * state.range(1)); - state.SetBytesProcessed(state.iterations() * state.range(1) * sizeof(int)); + state.SetBytesProcessed(state.iterations() * state.range(1) * + static_cast(sizeof(int))); } // Test many inserts at once to reduce the total iterations needed. Otherwise, @@ -108,7 +109,7 @@ } const int64_t items_processed = state.iterations() * state.range(0); state.SetItemsProcessed(items_processed); - state.SetBytesProcessed(items_processed * sizeof(v)); + state.SetBytesProcessed(items_processed * static_cast(sizeof(v))); } BENCHMARK_TEMPLATE2(BM_Sequential, std::vector, int) ->Range(1 << 0, 1 << 10); @@ -169,7 +170,7 @@ for (int i = from; i < to; i++) { // No need to lock test_vector_mu as ranges // do not overlap between threads. - benchmark::DoNotOptimize(test_vector->at(i) = 1); + benchmark::DoNotOptimize(test_vector->at(static_cast(i)) = 1); } } diff -Nru benchmark-1.6.1/test/BUILD benchmark-1.7.1/test/BUILD --- benchmark-1.6.1/test/BUILD 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/BUILD 2022-11-11 14:01:03.000000000 +0000 @@ -1,8 +1,18 @@ +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") + +platform( + name = "windows", + constraint_values = [ + "@platforms//os:windows", + ], +) + TEST_COPTS = [ "-pedantic", "-pedantic-errors", "-std=c++11", "-Wall", + "-Wconversion", "-Wextra", "-Wshadow", # "-Wshorten-64-to-32", @@ -10,11 +20,10 @@ "-fstrict-aliasing", ] -PER_SRC_COPTS = ({ - "cxx03_test.cc": ["-std=c++03"], - # Some of the issues with DoNotOptimize only occur when optimization is enabled +# Some of the issues with DoNotOptimize only occur when optimization is enabled +PER_SRC_COPTS = { "donotoptimize_test.cc": ["-O3"], -}) +} TEST_ARGS = ["--benchmark_min_time=0.01"] @@ -22,16 +31,18 @@ "user_counters_tabular_test.cc": ["--benchmark_counters_tabular=true"], "repetitions_test.cc": [" --benchmark_repetitions=3"], "spec_arg_test.cc" : ["--benchmark_filter=BM_NotChosen"], + "spec_arg_verbosity_test.cc" : ["--v=42"], }) -load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") - cc_library( name = "output_test_helper", testonly = 1, srcs = ["output_test_helper.cc"], hdrs = ["output_test.h"], - copts = TEST_COPTS, + copts = select({ + "//:windows": [], + "//conditions:default": TEST_COPTS, + }), deps = [ "//:benchmark", "//:benchmark_internal_headers", @@ -44,15 +55,17 @@ size = "small", srcs = [test_src], args = TEST_ARGS + PER_SRC_TEST_ARGS.get(test_src, []), - copts = TEST_COPTS + PER_SRC_COPTS.get(test_src, []), + copts = select({ + "//:windows": [], + "//conditions:default": TEST_COPTS, + }) + PER_SRC_COPTS.get(test_src, []) , deps = [ ":output_test_helper", "//:benchmark", "//:benchmark_internal_headers", "@com_google_googletest//:gtest", - ] + ( - ["@com_google_googletest//:gtest_main"] if (test_src[-len("gtest.cc"):] == "gtest.cc") else [] - ), + "@com_google_googletest//:gtest_main", + ] # FIXME: Add support for assembly tests to bazel. # See Issue #556 # https://github.com/google/benchmark/issues/556 @@ -61,15 +74,37 @@ ["*test.cc"], exclude = [ "*_assembly_test.cc", + "cxx03_test.cc", "link_main_test.cc", ], ) ] cc_test( + name = "cxx03_test", + size = "small", + srcs = ["cxx03_test.cc"], + copts = TEST_COPTS + ["-std=c++03"], + deps = [ + ":output_test_helper", + "//:benchmark", + "//:benchmark_internal_headers", + "@com_google_googletest//:gtest", + "@com_google_googletest//:gtest_main", + ], + target_compatible_with = select({ + "//:windows": ["@platforms//:incompatible"], + "//conditions:default": [], + }) +) + +cc_test( name = "link_main_test", size = "small", srcs = ["link_main_test.cc"], - copts = TEST_COPTS, + copts = select({ + "//:windows": [], + "//conditions:default": TEST_COPTS, + }), deps = ["//:benchmark_main"], ) diff -Nru benchmark-1.6.1/test/CMakeLists.txt benchmark-1.7.1/test/CMakeLists.txt --- benchmark-1.6.1/test/CMakeLists.txt 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/CMakeLists.txt 2022-11-11 14:01:03.000000000 +0000 @@ -1,5 +1,7 @@ # Enable the tests +set(THREADS_PREFER_PTHREAD_FLAG ON) + find_package(Threads REQUIRED) include(CheckCXXCompilerFlag) @@ -22,6 +24,10 @@ endforeach() endif() +if (NOT BUILD_SHARED_LIBS) + add_definitions(-DBENCHMARK_STATIC_DEFINE) +endif() + check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG) set(BENCHMARK_O3_FLAG "") if (BENCHMARK_HAS_O3_FLAG) @@ -35,10 +41,11 @@ endif() add_library(output_test_helper STATIC output_test_helper.cc output_test.h) +target_link_libraries(output_test_helper PRIVATE benchmark::benchmark) macro(compile_benchmark_test name) add_executable(${name} "${name}.cc") - target_link_libraries(${name} benchmark::benchmark ${CMAKE_THREAD_LIBS_INIT}) + target_link_libraries(${name} benchmark::benchmark_main ${CMAKE_THREAD_LIBS_INIT}) endmacro(compile_benchmark_test) macro(compile_benchmark_test_with_main name) @@ -48,7 +55,7 @@ macro(compile_output_test name) add_executable(${name} "${name}.cc" output_test.h) - target_link_libraries(${name} output_test_helper benchmark::benchmark + target_link_libraries(${name} output_test_helper benchmark::benchmark_main ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) endmacro(compile_output_test) @@ -59,6 +66,9 @@ compile_benchmark_test(spec_arg_test) add_test(NAME spec_arg COMMAND spec_arg_test --benchmark_filter=BM_NotChosen) +compile_benchmark_test(spec_arg_verbosity_test) +add_test(NAME spec_arg_verbosity COMMAND spec_arg_verbosity_test --v=42) + compile_benchmark_test(benchmark_setup_teardown_test) add_test(NAME benchmark_setup_teardown COMMAND benchmark_setup_teardown_test) @@ -158,8 +168,8 @@ compile_output_test(memory_manager_test) add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01) -check_cxx_compiler_flag(-std=c++03 BENCHMARK_HAS_CXX03_FLAG) -if (BENCHMARK_HAS_CXX03_FLAG) +# MSVC does not allow to set the language standard to C++98/03. +if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") compile_benchmark_test(cxx03_test) set_target_properties(cxx03_test PROPERTIES @@ -170,11 +180,17 @@ # causing the test to fail to compile. To prevent this we explicitly disable # the warning. check_cxx_compiler_flag(-Wno-odr BENCHMARK_HAS_WNO_ODR) - if (BENCHMARK_ENABLE_LTO AND BENCHMARK_HAS_WNO_ODR) - set_target_properties(cxx03_test - PROPERTIES - LINK_FLAGS "-Wno-odr") + check_cxx_compiler_flag(-Wno-lto-type-mismatch BENCHMARK_HAS_WNO_LTO_TYPE_MISMATCH) + # Cannot set_target_properties multiple times here because the warnings will + # be overwritten on each call + set (DISABLE_LTO_WARNINGS "") + if (BENCHMARK_HAS_WNO_ODR) + set(DISABLE_LTO_WARNINGS "${DISABLE_LTO_WARNINGS} -Wno-odr") + endif() + if (BENCHMARK_HAS_WNO_LTO_TYPE_MISMATCH) + set(DISABLE_LTO_WARNINGS "${DISABLE_LTO_WARNINGS} -Wno-lto-type-mismatch") endif() + set_target_properties(cxx03_test PROPERTIES LINK_FLAGS "${DISABLE_LTO_WARNINGS}") add_test(NAME cxx03 COMMAND cxx03_test --benchmark_min_time=0.01) endif() @@ -210,6 +226,7 @@ add_gtest(statistics_gtest) add_gtest(string_util_gtest) add_gtest(perf_counters_gtest) + add_gtest(time_unit_gtest) endif(BENCHMARK_ENABLE_GTEST_TESTS) ############################################################################### diff -Nru benchmark-1.6.1/test/complexity_test.cc benchmark-1.7.1/test/complexity_test.cc --- benchmark-1.6.1/test/complexity_test.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/complexity_test.cc 2022-11-11 14:01:03.000000000 +0000 @@ -109,7 +109,7 @@ std::vector ConstructRandomVector(int64_t size) { std::vector v; - v.reserve(static_cast(size)); + v.reserve(static_cast(size)); for (int i = 0; i < size; ++i) { v.push_back(static_cast(std::rand() % size)); } @@ -174,7 +174,7 @@ ->RangeMultiplier(2) ->Range(1 << 10, 1 << 16) ->Complexity([](benchmark::IterationCount n) { - return kLog2E * n * log(static_cast(n)); + return kLog2E * static_cast(n) * log(static_cast(n)); }); BENCHMARK(BM_Complexity_O_N_log_N) ->RangeMultiplier(2) diff -Nru benchmark-1.6.1/test/donotoptimize_assembly_test.cc benchmark-1.7.1/test/donotoptimize_assembly_test.cc --- benchmark-1.6.1/test/donotoptimize_assembly_test.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/donotoptimize_assembly_test.cc 2022-11-11 14:01:03.000000000 +0000 @@ -9,6 +9,9 @@ extern int ExternInt; extern int ExternInt2; extern int ExternInt3; +extern int BigArray[2049]; + +const int ConstBigArray[2049]{}; inline int Add42(int x) { return x + 42; } @@ -23,7 +26,15 @@ int value; int data[2]; }; + +struct ExtraLarge { + int arr[2049]; +}; } + +extern ExtraLarge ExtraLargeObj; +const ExtraLarge ConstExtraLargeObj{}; + // CHECK-LABEL: test_with_rvalue: extern "C" void test_with_rvalue() { benchmark::DoNotOptimize(Add42(0)); @@ -68,6 +79,22 @@ // CHECK: ret } +// CHECK-LABEL: test_with_extra_large_lvalue_with_op: +extern "C" void test_with_extra_large_lvalue_with_op() { + ExtraLargeObj.arr[16] = 42; + benchmark::DoNotOptimize(ExtraLargeObj); + // CHECK: movl $42, ExtraLargeObj+64(%rip) + // CHECK: ret +} + +// CHECK-LABEL: test_with_big_array_with_op +extern "C" void test_with_big_array_with_op() { + BigArray[16] = 42; + benchmark::DoNotOptimize(BigArray); + // CHECK: movl $42, BigArray+64(%rip) + // CHECK: ret +} + // CHECK-LABEL: test_with_non_trivial_lvalue: extern "C" void test_with_non_trivial_lvalue() { NotTriviallyCopyable NTC(ExternInt); @@ -96,6 +123,18 @@ // CHECK: ret } +// CHECK-LABEL: test_with_const_extra_large_obj: +extern "C" void test_with_const_extra_large_obj() { + benchmark::DoNotOptimize(ConstExtraLargeObj); + // CHECK: ret +} + +// CHECK-LABEL: test_with_const_big_array +extern "C" void test_with_const_big_array() { + benchmark::DoNotOptimize(ConstBigArray); + // CHECK: ret +} + // CHECK-LABEL: test_with_non_trivial_const_lvalue: extern "C" void test_with_non_trivial_const_lvalue() { const NotTriviallyCopyable Obj(ExternInt); diff -Nru benchmark-1.6.1/test/donotoptimize_test.cc benchmark-1.7.1/test/donotoptimize_test.cc --- benchmark-1.6.1/test/donotoptimize_test.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/donotoptimize_test.cc 2022-11-11 14:01:03.000000000 +0000 @@ -4,9 +4,9 @@ namespace { #if defined(__GNUC__) -std::uint64_t double_up(const std::uint64_t x) __attribute__((const)); +std::int64_t double_up(const std::int64_t x) __attribute__((const)); #endif -std::uint64_t double_up(const std::uint64_t x) { return x * 2; } +std::int64_t double_up(const std::int64_t x) { return x * 2; } } // namespace // Using DoNotOptimize on types like BitRef seem to cause a lot of problems @@ -29,6 +29,15 @@ int main(int, char*[]) { // this test verifies compilation of DoNotOptimize() for some types + char buffer1[1] = ""; + benchmark::DoNotOptimize(buffer1); + + char buffer2[2] = ""; + benchmark::DoNotOptimize(buffer2); + + char buffer3[3] = ""; + benchmark::DoNotOptimize(buffer3); + char buffer8[8] = ""; benchmark::DoNotOptimize(buffer8); @@ -39,6 +48,25 @@ benchmark::DoNotOptimize(buffer1024); benchmark::DoNotOptimize(&buffer1024[0]); + const char const_buffer1[1] = ""; + benchmark::DoNotOptimize(const_buffer1); + + const char const_buffer2[2] = ""; + benchmark::DoNotOptimize(const_buffer2); + + const char const_buffer3[3] = ""; + benchmark::DoNotOptimize(const_buffer3); + + const char const_buffer8[8] = ""; + benchmark::DoNotOptimize(const_buffer8); + + const char const_buffer20[20] = ""; + benchmark::DoNotOptimize(const_buffer20); + + const char const_buffer1024[1024] = ""; + benchmark::DoNotOptimize(const_buffer1024); + benchmark::DoNotOptimize(&const_buffer1024[0]); + int x = 123; benchmark::DoNotOptimize(x); benchmark::DoNotOptimize(&x); diff -Nru benchmark-1.6.1/test/filter_test.cc benchmark-1.7.1/test/filter_test.cc --- benchmark-1.6.1/test/filter_test.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/filter_test.cc 2022-11-11 14:01:03.000000000 +0000 @@ -20,8 +20,7 @@ virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { ++count_; - max_family_index_ = - std::max(max_family_index_, report[0].family_index); + max_family_index_ = std::max(max_family_index_, report[0].family_index); ConsoleReporter::ReportRuns(report); }; @@ -29,13 +28,13 @@ virtual ~TestReporter() {} - size_t GetCount() const { return count_; } + int GetCount() const { return count_; } - size_t GetMaxFamilyIndex() const { return max_family_index_; } + int64_t GetMaxFamilyIndex() const { return max_family_index_; } private: - mutable size_t count_; - mutable size_t max_family_index_; + mutable int count_; + mutable int64_t max_family_index_; }; } // end namespace @@ -79,13 +78,13 @@ benchmark::Initialize(&argc, argv); TestReporter test_reporter; - const size_t returned_count = - benchmark::RunSpecifiedBenchmarks(&test_reporter); + const int64_t returned_count = + static_cast(benchmark::RunSpecifiedBenchmarks(&test_reporter)); if (argc == 2) { // Make sure we ran all of the tests std::stringstream ss(argv[1]); - size_t expected_return; + int64_t expected_return; ss >> expected_return; if (returned_count != expected_return) { @@ -95,8 +94,8 @@ return -1; } - const size_t expected_reports = list_only ? 0 : expected_return; - const size_t reports_count = test_reporter.GetCount(); + const int64_t expected_reports = list_only ? 0 : expected_return; + const int64_t reports_count = test_reporter.GetCount(); if (reports_count != expected_reports) { std::cerr << "ERROR: Expected " << expected_reports << " tests to be run but reported_count = " << reports_count @@ -104,8 +103,8 @@ return -1; } - const size_t max_family_index = test_reporter.GetMaxFamilyIndex(); - const size_t num_families = reports_count == 0 ? 0 : 1 + max_family_index; + const int64_t max_family_index = test_reporter.GetMaxFamilyIndex(); + const int64_t num_families = reports_count == 0 ? 0 : 1 + max_family_index; if (num_families != expected_reports) { std::cerr << "ERROR: Expected " << expected_reports << " test families to be run but num_families = " diff -Nru benchmark-1.6.1/test/options_test.cc benchmark-1.7.1/test/options_test.cc --- benchmark-1.6.1/test/options_test.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/options_test.cc 2022-11-11 14:01:03.000000000 +0000 @@ -33,6 +33,8 @@ BENCHMARK(BM_basic)->Args({42, 42}); BENCHMARK(BM_basic)->Ranges({{64, 512}, {64, 512}}); BENCHMARK(BM_basic)->MinTime(0.7); +BENCHMARK(BM_basic)->MinWarmUpTime(0.8); +BENCHMARK(BM_basic)->MinTime(0.1)->MinWarmUpTime(0.2); BENCHMARK(BM_basic)->UseRealTime(); BENCHMARK(BM_basic)->ThreadRange(2, 4); BENCHMARK(BM_basic)->ThreadPerCpu(); diff -Nru benchmark-1.6.1/test/perf_counters_gtest.cc benchmark-1.7.1/test/perf_counters_gtest.cc --- benchmark-1.6.1/test/perf_counters_gtest.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/perf_counters_gtest.cc 2022-11-11 14:01:03.000000000 +0000 @@ -11,6 +11,7 @@ #endif using benchmark::internal::PerfCounters; +using benchmark::internal::PerfCountersMeasurement; using benchmark::internal::PerfCounterValues; namespace { @@ -95,6 +96,53 @@ EXPECT_GT(values2[1], 0); } +TEST(PerfCountersTest, ReopenExistingCounters) { + // The test works (i.e. causes read to fail) for the assumptions + // about hardware capabilities (i.e. small number (3-4) hardware + // counters) at this date. + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; + } + EXPECT_TRUE(PerfCounters::Initialize()); + std::vector counters; + counters.reserve(6); + for (int i = 0; i < 6; i++) + counters.push_back(PerfCounters::Create({kGenericPerfEvent1})); + PerfCounterValues values(1); + EXPECT_TRUE(counters[0].Snapshot(&values)); + EXPECT_FALSE(counters[4].Snapshot(&values)); + EXPECT_FALSE(counters[5].Snapshot(&values)); +} + +TEST(PerfCountersTest, CreateExistingMeasurements) { + // The test works (i.e. causes read to fail) for the assumptions + // about hardware capabilities (i.e. small number (3-4) hardware + // counters) at this date, + // the same as previous test ReopenExistingCounters. + if (!PerfCounters::kSupported) { + GTEST_SKIP() << "Test skipped because libpfm is not supported.\n"; + } + EXPECT_TRUE(PerfCounters::Initialize()); + std::vector perf_counter_measurements; + std::vector> measurements; + + perf_counter_measurements.reserve(10); + for (int i = 0; i < 10; i++) + perf_counter_measurements.emplace_back( + std::vector{kGenericPerfEvent1}); + + perf_counter_measurements[0].Start(); + EXPECT_TRUE(perf_counter_measurements[0].Stop(measurements)); + + measurements.clear(); + perf_counter_measurements[8].Start(); + EXPECT_FALSE(perf_counter_measurements[8].Stop(measurements)); + + measurements.clear(); + perf_counter_measurements[9].Start(); + EXPECT_FALSE(perf_counter_measurements[9].Stop(measurements)); +} + size_t do_work() { size_t res = 0; for (size_t i = 0; i < 100000000; ++i) res += i * i; diff -Nru benchmark-1.6.1/test/register_benchmark_test.cc benchmark-1.7.1/test/register_benchmark_test.cc --- benchmark-1.6.1/test/register_benchmark_test.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/register_benchmark_test.cc 2022-11-11 14:01:03.000000000 +0000 @@ -96,6 +96,18 @@ #endif // BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK //----------------------------------------------------------------------------// +// Test RegisterBenchmark with DISABLED_ benchmark +//----------------------------------------------------------------------------// +void DISABLED_BM_function(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(DISABLED_BM_function); +ReturnVal dummy3 = benchmark::RegisterBenchmark("DISABLED_BM_function_manual", + DISABLED_BM_function); +// No need to add cases because we don't expect them to run. + +//----------------------------------------------------------------------------// // Test RegisterBenchmark with different callable types //----------------------------------------------------------------------------// diff -Nru benchmark-1.6.1/test/reporter_output_test.cc benchmark-1.7.1/test/reporter_output_test.cc --- benchmark-1.6.1/test/reporter_output_test.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/reporter_output_test.cc 2022-11-11 14:01:03.000000000 +0000 @@ -17,7 +17,7 @@ AddCases(TC_ConsoleErr, { {"^%int-%int-%intT%int:%int:%int[-+]%int:%int$", MR_Default}, - {"Running .*/reporter_output_test(\\.exe)?$", MR_Next}, + {"Running .*(/|\\\\)reporter_output_test(\\.exe)?$", MR_Next}, {"Run on \\(%int X %float MHz CPU s?\\)", MR_Next}, }); AddCases(TC_JSONOut, @@ -318,7 +318,7 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_no_arg_name/3\",%csv_report$"}}); // ========================================================================= // -// ------------------------ Testing Arg Name Output ----------------------- // +// ------------------------ Testing Arg Name Output ------------------------ // // ========================================================================= // void BM_arg_name(benchmark::State& state) { diff -Nru benchmark-1.6.1/test/spec_arg_test.cc benchmark-1.7.1/test/spec_arg_test.cc --- benchmark-1.6.1/test/spec_arg_test.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/spec_arg_test.cc 2022-11-11 14:01:03.000000000 +0000 @@ -91,5 +91,15 @@ << matched_functions.front() << "]\n"; return 2; } + + // Test that SetBenchmarkFilter works. + const std::string golden_value = "golden_value"; + benchmark::SetBenchmarkFilter(golden_value); + std::string current_value = benchmark::GetBenchmarkFilter(); + if (golden_value != current_value) { + std::cerr << "Expected [" << golden_value + << "] for --benchmark_filter but got [" << current_value << "]\n"; + return 3; + } return 0; } diff -Nru benchmark-1.6.1/test/spec_arg_verbosity_test.cc benchmark-1.7.1/test/spec_arg_verbosity_test.cc --- benchmark-1.6.1/test/spec_arg_verbosity_test.cc 1970-01-01 00:00:00.000000000 +0000 +++ benchmark-1.7.1/test/spec_arg_verbosity_test.cc 2022-11-11 14:01:03.000000000 +0000 @@ -0,0 +1,43 @@ +#include + +#include + +#include "benchmark/benchmark.h" + +// Tests that the user specified verbosity level can be get. +static void BM_Verbosity(benchmark::State& state) { + for (auto _ : state) { + } +} +BENCHMARK(BM_Verbosity); + +int main(int argc, char** argv) { + const int32_t flagv = 42; + + // Verify that argv specify --v=42. + bool found = false; + for (int i = 0; i < argc; ++i) { + if (strcmp("--v=42", argv[i]) == 0) { + found = true; + break; + } + } + if (!found) { + std::cerr << "This test requires '--v=42' to be passed as a command-line " + << "argument.\n"; + return 1; + } + + benchmark::Initialize(&argc, argv); + + // Check that the current flag value is reported accurately via the + // GetBenchmarkVerbosity() function. + if (flagv != benchmark::GetBenchmarkVerbosity()) { + std::cerr + << "Seeing different value for flags. GetBenchmarkVerbosity() returns [" + << benchmark::GetBenchmarkVerbosity() << "] expected flag=[" << flagv + << "]\n"; + return 1; + } + return 0; +} diff -Nru benchmark-1.6.1/test/time_unit_gtest.cc benchmark-1.7.1/test/time_unit_gtest.cc --- benchmark-1.6.1/test/time_unit_gtest.cc 1970-01-01 00:00:00.000000000 +0000 +++ benchmark-1.7.1/test/time_unit_gtest.cc 2022-11-11 14:01:03.000000000 +0000 @@ -0,0 +1,37 @@ +#include "../include/benchmark/benchmark.h" +#include "gtest/gtest.h" + +namespace benchmark { +namespace internal { + +namespace { + +class DummyBenchmark : public Benchmark { + public: + DummyBenchmark() : Benchmark("dummy") {} + virtual void Run(State&) override {} +}; + +TEST(DefaultTimeUnitTest, TimeUnitIsNotSet) { + DummyBenchmark benchmark; + EXPECT_EQ(benchmark.GetTimeUnit(), kNanosecond); +} + +TEST(DefaultTimeUnitTest, DefaultIsSet) { + DummyBenchmark benchmark; + EXPECT_EQ(benchmark.GetTimeUnit(), kNanosecond); + SetDefaultTimeUnit(kMillisecond); + EXPECT_EQ(benchmark.GetTimeUnit(), kMillisecond); +} + +TEST(DefaultTimeUnitTest, DefaultAndExplicitUnitIsSet) { + DummyBenchmark benchmark; + benchmark.Unit(kMillisecond); + SetDefaultTimeUnit(kMicrosecond); + + EXPECT_EQ(benchmark.GetTimeUnit(), kMillisecond); +} + +} // namespace +} // namespace internal +} // namespace benchmark diff -Nru benchmark-1.6.1/test/user_counters_test.cc benchmark-1.7.1/test/user_counters_test.cc --- benchmark-1.6.1/test/user_counters_test.cc 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/test/user_counters_test.cc 2022-11-11 14:01:03.000000000 +0000 @@ -195,8 +195,7 @@ CHECK_BENCHMARK_RESULTS("BM_Invert", &CheckInvert); // ========================================================================= // -// ------------------------- InvertedRate Counters Output -// -------------------------- // +// --------------------- InvertedRate Counters Output ---------------------- // // ========================================================================= // void BM_Counters_InvertedRate(benchmark::State& state) { @@ -460,7 +459,7 @@ &CheckIsIterationInvariantRate); // ========================================================================= // -// ------------------- AvgIterations Counters Output ------------------ // +// --------------------- AvgIterations Counters Output --------------------- // // ========================================================================= // void BM_Counters_AvgIterations(benchmark::State& state) { @@ -502,7 +501,7 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_AvgIterations", &CheckAvgIterations); // ========================================================================= // -// ----------------- AvgIterationsRate Counters Output ---------------- // +// ------------------- AvgIterationsRate Counters Output ------------------- // // ========================================================================= // void BM_Counters_kAvgIterationsRate(benchmark::State& state) { diff -Nru benchmark-1.6.1/tools/compare.py benchmark-1.7.1/tools/compare.py --- benchmark-1.6.1/tools/compare.py 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/tools/compare.py 2022-11-11 14:01:03.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import unittest """ diff -Nru benchmark-1.6.1/tools/gbench/Inputs/test1_run1.json benchmark-1.7.1/tools/gbench/Inputs/test1_run1.json --- benchmark-1.6.1/tools/gbench/Inputs/test1_run1.json 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/tools/gbench/Inputs/test1_run1.json 2022-11-11 14:01:03.000000000 +0000 @@ -114,6 +114,14 @@ "real_time": 1, "cpu_time": 1, "time_unit": "s" + }, + { + "name": "BM_hasLabel", + "label": "a label", + "iterations": 1, + "real_time": 1, + "cpu_time": 1, + "time_unit": "s" } ] } diff -Nru benchmark-1.6.1/tools/gbench/Inputs/test1_run2.json benchmark-1.7.1/tools/gbench/Inputs/test1_run2.json --- benchmark-1.6.1/tools/gbench/Inputs/test1_run2.json 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/tools/gbench/Inputs/test1_run2.json 2022-11-11 14:01:03.000000000 +0000 @@ -114,6 +114,14 @@ "real_time": 1, "cpu_time": 1, "time_unit": "ns" + }, + { + "name": "BM_hasLabel", + "label": "a label", + "iterations": 1, + "real_time": 1, + "cpu_time": 1, + "time_unit": "s" } ] } diff -Nru benchmark-1.6.1/tools/gbench/report.py benchmark-1.7.1/tools/gbench/report.py --- benchmark-1.6.1/tools/gbench/report.py 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/tools/gbench/report.py 2022-11-11 14:01:03.000000000 +0000 @@ -9,7 +9,6 @@ from scipy.stats import mannwhitneyu, gmean from numpy import array -from pandas import Timedelta class BenchmarkColor(object): @@ -43,6 +42,13 @@ UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better. UTEST_COL_NAME = "_pvalue" +_TIME_UNIT_TO_SECONDS_MULTIPLIER = { + "s": 1.0, + "ms": 1e-3, + "us": 1e-6, + "ns": 1e-9, +} + def color_format(use_color, fmt_str, *args, **kwargs): """ @@ -157,9 +163,9 @@ Get value of field_name field of benchmark, which is time with time unit time_unit, as time in seconds. """ - time_unit = benchmark['time_unit'] if 'time_unit' in benchmark else 's' - dt = Timedelta(benchmark[field_name], time_unit) - return dt / Timedelta(1, 's') + timedelta = benchmark[field_name] + time_unit = benchmark.get('time_unit', 's') + return timedelta * _TIME_UNIT_TO_SECONDS_MULTIPLIER.get(time_unit) def calculate_geomean(json): @@ -249,6 +255,7 @@ partitions = partition_benchmarks(json1, json2) for partition in partitions: benchmark_name = partition[0][0]['name'] + label = partition[0][0]['label'] if 'label' in partition[0][0] else '' time_unit = partition[0][0]['time_unit'] measurements = [] utest_results = {} @@ -289,6 +296,7 @@ aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else '' diff_report.append({ 'name': benchmark_name, + 'label': label, 'measurements': measurements, 'time_unit': time_unit, 'run_type': run_type, @@ -301,6 +309,7 @@ if lhs_gmean.any() and rhs_gmean.any(): diff_report.append({ 'name': 'OVERALL_GEOMEAN', + 'label': '', 'measurements': [{ 'real_time': lhs_gmean[0], 'cpu_time': lhs_gmean[1], @@ -450,7 +459,8 @@ '-0.1000', '100', '110', '100', '90'], ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'], ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'], - ['OVERALL_GEOMEAN', '-0.8344', '-0.8026', '0', '0', '0', '0'] + ['BM_hasLabel', '+0.0000', '+0.0000', '1', '1', '1', '1'], + ['OVERALL_GEOMEAN', '-0.8113', '-0.7779', '0', '0', '0', '0'] ] output_lines_with_header = print_difference_report( self.json_diff_report, use_color=False) @@ -467,81 +477,127 @@ expected_output = [ { 'name': 'BM_SameTimes', - 'measurements': [{'time': 0.0000, 'cpu': 0.0000, 'real_time': 10, 'real_time_other': 10, 'cpu_time': 10, 'cpu_time_other': 10}], + 'label': '', + 'measurements': [{'time': 0.0000, 'cpu': 0.0000, + 'real_time': 10, 'real_time_other': 10, + 'cpu_time': 10, 'cpu_time_other': 10}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_2xFaster', - 'measurements': [{'time': -0.5000, 'cpu': -0.5000, 'real_time': 50, 'real_time_other': 25, 'cpu_time': 50, 'cpu_time_other': 25}], + 'label': '', + 'measurements': [{'time': -0.5000, 'cpu': -0.5000, + 'real_time': 50, 'real_time_other': 25, + 'cpu_time': 50, 'cpu_time_other': 25}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_2xSlower', - 'measurements': [{'time': 1.0000, 'cpu': 1.0000, 'real_time': 50, 'real_time_other': 100, 'cpu_time': 50, 'cpu_time_other': 100}], + 'label': '', + 'measurements': [{'time': 1.0000, 'cpu': 1.0000, + 'real_time': 50, 'real_time_other': 100, + 'cpu_time': 50, 'cpu_time_other': 100}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_1PercentFaster', - 'measurements': [{'time': -0.0100, 'cpu': -0.0100, 'real_time': 100, 'real_time_other': 98.9999999, 'cpu_time': 100, 'cpu_time_other': 98.9999999}], + 'label': '', + 'measurements': [{'time': -0.0100, 'cpu': -0.0100, + 'real_time': 100, 'real_time_other': 98.9999999, + 'cpu_time': 100, 'cpu_time_other': 98.9999999}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_1PercentSlower', - 'measurements': [{'time': 0.0100, 'cpu': 0.0100, 'real_time': 100, 'real_time_other': 101, 'cpu_time': 100, 'cpu_time_other': 101}], + 'label': '', + 'measurements': [{'time': 0.0100, 'cpu': 0.0100, + 'real_time': 100, 'real_time_other': 101, + 'cpu_time': 100, 'cpu_time_other': 101}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_10PercentFaster', - 'measurements': [{'time': -0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 90, 'cpu_time': 100, 'cpu_time_other': 90}], + 'label': '', + 'measurements': [{'time': -0.1000, 'cpu': -0.1000, + 'real_time': 100, 'real_time_other': 90, + 'cpu_time': 100, 'cpu_time_other': 90}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_10PercentSlower', - 'measurements': [{'time': 0.1000, 'cpu': 0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 110}], + 'label': '', + 'measurements': [{'time': 0.1000, 'cpu': 0.1000, + 'real_time': 100, 'real_time_other': 110, + 'cpu_time': 100, 'cpu_time_other': 110}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_100xSlower', - 'measurements': [{'time': 99.0000, 'cpu': 99.0000, 'real_time': 100, 'real_time_other': 10000, 'cpu_time': 100, 'cpu_time_other': 10000}], + 'label': '', + 'measurements': [{'time': 99.0000, 'cpu': 99.0000, + 'real_time': 100, 'real_time_other': 10000, + 'cpu_time': 100, 'cpu_time_other': 10000}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_100xFaster', - 'measurements': [{'time': -0.9900, 'cpu': -0.9900, 'real_time': 10000, 'real_time_other': 100, 'cpu_time': 10000, 'cpu_time_other': 100}], + 'label': '', + 'measurements': [{'time': -0.9900, 'cpu': -0.9900, + 'real_time': 10000, 'real_time_other': 100, + 'cpu_time': 10000, 'cpu_time_other': 100}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_10PercentCPUToTime', - 'measurements': [{'time': 0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 90}], + 'label': '', + 'measurements': [{'time': 0.1000, 'cpu': -0.1000, + 'real_time': 100, 'real_time_other': 110, + 'cpu_time': 100, 'cpu_time_other': 90}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_ThirdFaster', - 'measurements': [{'time': -0.3333, 'cpu': -0.3334, 'real_time': 100, 'real_time_other': 67, 'cpu_time': 100, 'cpu_time_other': 67}], + 'label': '', + 'measurements': [{'time': -0.3333, 'cpu': -0.3334, + 'real_time': 100, 'real_time_other': 67, + 'cpu_time': 100, 'cpu_time_other': 67}], 'time_unit': 'ns', 'utest': {} }, { 'name': 'BM_NotBadTimeUnit', - 'measurements': [{'time': -0.9000, 'cpu': 0.2000, 'real_time': 0.4, 'real_time_other': 0.04, 'cpu_time': 0.5, 'cpu_time_other': 0.6}], + 'label': '', + 'measurements': [{'time': -0.9000, 'cpu': 0.2000, + 'real_time': 0.4, 'real_time_other': 0.04, + 'cpu_time': 0.5, 'cpu_time_other': 0.6}], + 'time_unit': 's', + 'utest': {} + }, + { + 'name': 'BM_hasLabel', + 'label': 'a label', + 'measurements': [{'time': 0.0000, 'cpu': 0.0000, + 'real_time': 1, 'real_time_other': 1, + 'cpu_time': 1, 'cpu_time_other': 1}], 'time_unit': 's', 'utest': {} }, { 'name': 'OVERALL_GEOMEAN', - 'measurements': [{'real_time': 1.193776641714438e-06, 'cpu_time': 1.2144445585302297e-06, + 'label': '', + 'measurements': [{'real_time': 3.1622776601683826e-06, 'cpu_time': 3.2130844755623912e-06, 'real_time_other': 1.9768988699420897e-07, 'cpu_time_other': 2.397447755209533e-07, - 'time': -0.834399601997324, 'cpu': -0.8025889499549471}], + 'time': -0.8112976497120911, 'cpu': -0.7778551721181174}], 'time_unit': 's', 'run_type': 'aggregate', 'aggregate_name': 'geomean', 'utest': {} @@ -551,6 +607,7 @@ for out, expected in zip( self.json_diff_report, expected_output): self.assertEqual(out['name'], expected['name']) + self.assertEqual(out['label'], expected['label']) self.assertEqual(out['time_unit'], expected['time_unit']) assert_utest(self, out, expected) assert_measurements(self, out, expected) diff -Nru benchmark-1.6.1/tools/libpfm.BUILD.bazel benchmark-1.7.1/tools/libpfm.BUILD.bazel --- benchmark-1.6.1/tools/libpfm.BUILD.bazel 1970-01-01 00:00:00.000000000 +0000 +++ benchmark-1.7.1/tools/libpfm.BUILD.bazel 2022-11-11 14:01:03.000000000 +0000 @@ -0,0 +1,21 @@ +# Build rule for libpfm, which is required to collect performance counters for +# BENCHMARK_ENABLE_LIBPFM builds. + +load("@rules_foreign_cc//foreign_cc:defs.bzl", "make") + +filegroup( + name = "pfm_srcs", + srcs = glob(["**"]), +) + +make( + name = "libpfm", + lib_source = ":pfm_srcs", + lib_name = "libpfm", + copts = [ + "-Wno-format-truncation", + ], + visibility = [ + "//visibility:public", + ], +) diff -Nru benchmark-1.6.1/tools/strip_asm.py benchmark-1.7.1/tools/strip_asm.py --- benchmark-1.6.1/tools/strip_asm.py 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/tools/strip_asm.py 2022-11-11 14:01:03.000000000 +0000 @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ strip_asm.py - Cleanup ASM output for the specified file diff -Nru benchmark-1.6.1/WORKSPACE benchmark-1.7.1/WORKSPACE --- benchmark-1.6.1/WORKSPACE 2022-01-10 19:14:27.000000000 +0000 +++ benchmark-1.7.1/WORKSPACE 2022-11-11 14:01:03.000000000 +0000 @@ -3,6 +3,27 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") + +http_archive( + name = "bazel_skylib", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz", + "https://github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz", + ], + sha256 = "f7be3474d42aae265405a592bb7da8e171919d74c16f082a5457840f06054728", +) + +# https://github.com/bazelbuild/rules_foreign_cc/ +http_archive( + name = "rules_foreign_cc", + sha256 = "bcd0c5f46a49b85b384906daae41d277b3dc0ff27c7c752cc51e43048a58ec83", + strip_prefix = "rules_foreign_cc-0.7.1", + url = "https://github.com/bazelbuild/rules_foreign_cc/archive/0.7.1.tar.gz", +) + +load("@rules_foreign_cc//foreign_cc:repositories.bzl", "rules_foreign_cc_dependencies") +rules_foreign_cc_dependencies() + http_archive( name = "com_google_absl", sha256 = "f41868f7a938605c92936230081175d1eae87f6ea2c248f41077c8f88316f111", @@ -16,12 +37,22 @@ tag = "release-1.11.0", ) +# Downloaded from v4.9.0 tag at https://sourceforge.net/p/perfmon2/libpfm4/ref/master/tags/ +http_archive( + name = "libpfm", + build_file = "//tools:libpfm.BUILD.bazel", + sha256 = "5da5f8872bde14b3634c9688d980f68bda28b510268723cc12973eedbab9fecc", + type = "tar.gz", + strip_prefix = "libpfm-4.11.0", + urls = ["https://sourceforge.net/projects/perfmon2/files/libpfm4/libpfm-4.11.0.tar.gz/download"], +) + http_archive( name = "pybind11", build_file = "@//bindings/python:pybind11.BUILD", - sha256 = "1eed57bc6863190e35637290f97a20c81cfe4d9090ac0a24f3bbf08f265eb71d", - strip_prefix = "pybind11-2.4.3", - urls = ["https://github.com/pybind/pybind11/archive/v2.4.3.tar.gz"], + sha256 = "eacf582fa8f696227988d08cfc46121770823839fe9e301a20fbce67e7cd70ec", + strip_prefix = "pybind11-2.10.0", + urls = ["https://github.com/pybind/pybind11/archive/v2.10.0.tar.gz"], ) new_local_repository(