Lumiera  0.pre.03
»edit your freedom«
microbenchmark.hpp
Go to the documentation of this file.
1 /*
2  MICROBENCHMARK.hpp - multithreaded timing measurement
3 
4  Copyright (C) Lumiera.org
5  2018, Hermann Vosseler <Ichthyostega@web.de>
6 
7  This program is free software; you can redistribute it and/or
8  modify it under the terms of the GNU General Public License as
9  published by the Free Software Foundation; either version 2 of
10  the License, or (at your option) any later version.
11 
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with this program; if not, write to the Free Software
19  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 
21 */
22 
23 
53 #ifndef LIB_TEST_MICROBENCHMARK_H
54 #define LIB_TEST_MICROBENCHMARK_H
55 
56 
57 #include "lib/meta/function.hpp"
59 #include "lib/sync-barrier.hpp"
60 #include "lib/thread.hpp"
61 
63 
64 #include <chrono>
65 
66 
67 
68 namespace lib {
69 namespace test{
70 
71  namespace {
72  constexpr size_t DEFAULT_RUNS = 10'000'000;
73  using CLOCK_SCALE = std::micro; // Results are in µ-sec
74  }
75 
76 
83  template<class FUN>
84  inline double
85  benchmarkTime (FUN const& invokeTestCode, const size_t repeatCnt =1)
86  {
87  using std::chrono::steady_clock;
88  using Dur = std::chrono::duration<double, CLOCK_SCALE>;
89 
90  auto start = steady_clock::now();
91  invokeTestCode();
92  Dur duration = steady_clock::now () - start;
93  return duration.count() / repeatCnt;
94  };
95 
96 
102  template<class FUN>
103  inline size_t
104  benchmarkLoop (FUN const& testSubject, const size_t repeatCnt = DEFAULT_RUNS)
105  {
106  // the test subject gets the current loop-index and returns a checksum value
107  auto subject4benchmark = microbenchmark::adapted4benchmark (testSubject);
108 
109  size_t checksum{0};
110  for (size_t i=0; i<repeatCnt; ++i)
111  checksum += subject4benchmark(i);
112  return checksum;
113  }
114 
115 
125  template<class FUN>
126  inline auto
127  microBenchmark (FUN const& testSubject, const size_t repeatCnt = DEFAULT_RUNS)
128  {
129  volatile size_t checksum{0};
130  auto invokeTestLoop = [&]{ checksum = benchmarkLoop (testSubject, repeatCnt); };
131  double micros = benchmarkTime (invokeTestLoop, repeatCnt);
132  return std::make_pair (micros, checksum);
133  }
134 
135 
136 
137 
154  template<size_t nThreads, class FUN>
155  inline auto
156  threadBenchmark(FUN const& subject, const size_t repeatCnt = DEFAULT_RUNS)
157  {
158  using std::chrono::steady_clock;
159  using Dur = std::chrono::duration<double, CLOCK_SCALE>;
160 
161  // the test subject gets the current loop-index and returns a checksum value
162  auto subject4benchmark = microbenchmark::adapted4benchmark (subject);
163  using Subject = decltype(subject4benchmark);
164 
165  struct Thread
167  {
168  Thread(Subject const& testSubject, size_t loopCnt, SyncBarrier& testStart)
169  : ThreadJoinable{"Micro-Benchmark"
170  ,[=, &testStart]() // local copy of the test-subject-Functor
171  {
172  testStart.sync(); // block until all threads are ready
173  auto start = steady_clock::now();
174  for (size_t i=0; i < loopCnt; ++i)
175  checksum += testSubject(i);
176  duration = steady_clock::now () - start;
177  }}
178  { }
179  // Note: barrier at begin and join at end both ensure data synchronisation
180  Dur duration{}; // measured time within thread
181  size_t checksum{0}; // collected checksum
182  };
183 
184  SyncBarrier testStart{nThreads + 1}; // coordinated start of timing measurement
185  lib::ScopedCollection<Thread> threads(nThreads);
186  for (size_t n=0; n<nThreads; ++n) // create test threads
187  threads.emplace (subject4benchmark, repeatCnt, testStart);
188 
189  testStart.sync(); // barrier until all threads are ready
190 
191  size_t checksum{0};
192  Dur sumDuration{0.0};
193  for (auto& thread : threads)
194  {
195  thread.join(); // block on measurement end (fence)
196  sumDuration += thread.duration;
197  checksum += thread.checksum;
198  }
199 
200  double micros = sumDuration.count() / (nThreads * repeatCnt);
201  return std::make_tuple (micros, checksum);
202  }
203 
204 
205 
206 }} // namespace lib::test
207 #endif /*LIB_TEST_MICROBENCHMARK_H*/
Variant of the standard case, requiring to wait and join() on the termination of this thread...
Definition: thread.hpp:676
auto threadBenchmark(FUN const &subject, const size_t repeatCnt=DEFAULT_RUNS)
perform a multithreaded microbenchmark.
A fixed collection of non-copyable polymorphic objects.
Definition: run.hpp:49
TY & emplace(ARGS &&...args)
push new entry at the end of this container and build object of type TY in place there ...
Implementation namespace for support and library code.
Helpers and wrappers so simplify usage of micobenchmark.hpp.
auto microBenchmark(FUN const &testSubject, const size_t repeatCnt=DEFAULT_RUNS)
perform a simple looped microbenchmark.
Managing a collection of non-copyable polymorphic objects in compact storage.
double benchmarkTime(FUN const &invokeTestCode, const size_t repeatCnt=1)
Helper to invoke a functor or λ to observe its running time.
Metaprogramming tools for transforming functor types.
Convenience front-end to simplify and codify basic thread handling.
A one time N-fold mutual synchronisation barrier.
A thin convenience wrapper to simplify thread-handling.
Definition: thread.hpp:656
size_t benchmarkLoop(FUN const &testSubject, const size_t repeatCnt=DEFAULT_RUNS)
Benchmark building block to invoke a functor or λ in a tight loop, passing the current loop index and...
A N-fold synchronisation latch using yield-wait until fulfilment.