Lumiera  0.pre.03
»edit your freedom«
scheduler-stress-test.cpp
1 /*
2  SchedulerStress(Test) - verify scheduler performance characteristics
3 
4  Copyright (C)
5  2024, Hermann Vosseler <Ichthyostega@web.de>
6 
7   **Lumiera** is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by the
9   Free Software Foundation; either version 2 of the License, or (at your
10   option) any later version. See the file COPYING for further details.
11 
12 * *****************************************************************/
13 
19 #include "lib/test/run.hpp"
20 #include "test-chain-load.hpp"
21 #include "stress-test-rig.hpp"
22 #include "lib/test/test-helper.hpp"
23 #include "vault/gear/scheduler.hpp"
24 #include "lib/time/timevalue.hpp"
25 #include "lib/format-string.hpp"
26 #include "lib/format-cout.hpp"
27 #include "lib/util.hpp"
28 
29 using test::Test;
30 
31 
32 namespace vault{
33 namespace gear {
34 namespace test {
35 
36  using util::_Fmt;
37  using util::isLimited;
38 
39 
40 
41  /***************************************************************************/
55  class SchedulerStress_test : public Test
56  {
57 
58  virtual void
59  run (Arg arg)
60  {
61  seedRand();
62  smokeTest();
63  if ("quick" == firstTok (arg))
64  return;
65 
71  }
72 
73 
76  void
78  {
80  TestChainLoad testLoad{1024};
82  .buildTopology()
83 // .printTopologyDOT()
84  ;
85 
86  auto stats = testLoad.computeGraphStatistics();
87  cout << _Fmt{"Test-Load: Nodes: %d Levels: %d ∅Node/Level: %3.1f Forks: %d Joins: %d"}
88  % stats.nodes
89  % stats.levels
90  % stats.indicators[STAT_NODE].pL
91  % stats.indicators[STAT_FORK].cnt
92  % stats.indicators[STAT_JOIN].cnt
93  << endl;
94 
95  // while building the calculation-plan graph
96  // node hashes were computed, observing dependencies
97  size_t expectedHash = testLoad.getHash();
98 
99  // some jobs/nodes are marked with a weight-step
100  // these can be instructed to spend some CPU time
101  auto LOAD_BASE = 500us;
102  testLoad.performGraphSynchronously(LOAD_BASE);
103  CHECK (testLoad.getHash() == expectedHash);
104 
105  double referenceTime = testLoad.calcRuntimeReference(LOAD_BASE);
106  cout << "refTime(singleThr): "<<referenceTime/1000<<"ms"<<endl;
107 
108 
109  // Perform through Scheduler----------
110  BlockFlowAlloc bFlow;
111  EngineObserver watch;
112  Scheduler scheduler{bFlow, watch};
113 
114  double performanceTime =
115  testLoad.setupSchedule(scheduler)
116  .withLoadTimeBase(LOAD_BASE)
117  .withJobDeadline (150ms) // ◁─────────────── rather tight (and below overall run time)
118  .withPlanningStep(300us)
119  .withChunkSize(20)
120  .launch_and_wait();
121 
122  cout << "runTime(Scheduler): "<<performanceTime/1000<<"ms"<<endl;
123 
124  // invocation through Scheduler has reproduced all node hashes
125  CHECK (testLoad.getHash() == expectedHash);
126  }
127 
128 
129 
135  void
137  {
139  TestChainLoad testLoad{64};
141  .buildTopology()
142 // .printTopologyDOT()
143 // .printTopologyStatistics()
144  ;
145 
146  auto LOAD_BASE = 500us;
147  ComputationalLoad cpuLoad;
148  cpuLoad.timeBase = LOAD_BASE;
149  cpuLoad.calibrate();
150 
151  double micros = cpuLoad.invoke();
152  CHECK (micros < 550);
153  CHECK (micros > 450);
154 
155  // build a schedule sequence based on
156  // summing up weight factors, with example concurrency ≔ 4
157  uint concurrency = 4;
158  auto stepFactors = testLoad.levelScheduleSequence(concurrency).effuse();
159  CHECK (stepFactors.size() == 1+testLoad.topLevel());
160  CHECK (stepFactors.size() == 26);
161 
162 
163  // Build-Performance-test-setup--------
164  BlockFlowAlloc bFlow;
165  EngineObserver watch;
166  Scheduler scheduler{bFlow, watch};
167 
168  auto testSetup =
169  testLoad.setupSchedule(scheduler)
170  .withLoadTimeBase(LOAD_BASE)
171  .withJobDeadline(50ms)
172  .withUpfrontPlanning();
173 
174  auto schedule = testSetup.getScheduleSeq().effuse();
175  CHECK (schedule.size() == testLoad.topLevel() + 2);
176  CHECK (schedule[ 0] == _uTicks(0ms));
177  CHECK (schedule[ 1] == _uTicks(1ms));
178  CHECK (schedule[ 2] == _uTicks(2ms));
179  // ....
180  CHECK (schedule[24] == _uTicks(24ms));
181  CHECK (schedule[25] == _uTicks(25ms));
182  CHECK (schedule[26] == _uTicks(26ms));
183 
184  // Adapted Schedule----------
185  double stressFac = 1.0;
186  testSetup.withAdaptedSchedule (stressFac, concurrency);
187  schedule = testSetup.getScheduleSeq().effuse();
188  CHECK (schedule.size() == testLoad.topLevel() + 2);
189  CHECK (schedule[ 0] == _uTicks(0ms));
190  CHECK (schedule[ 1] == _uTicks(0ms));
191 
192  // verify the numbers in detail....
193  _Fmt stepFmt{"lev:%-2d stepFac:%-6.3f schedule:%6.3f"};
194  auto stepStr = [&](uint i){ return string{stepFmt % i % stepFactors[i>0?i-1:0] % (_raw(schedule[i])/1000.0)}; };
195 
196  CHECK (stepStr( 0) == "lev:0 stepFac:0.000 schedule: 0.000"_expect);
197  CHECK (stepStr( 1) == "lev:1 stepFac:0.000 schedule: 0.000"_expect);
198  CHECK (stepStr( 2) == "lev:2 stepFac:0.000 schedule: 0.000"_expect);
199  CHECK (stepStr( 3) == "lev:3 stepFac:2.000 schedule: 1.000"_expect);
200  CHECK (stepStr( 4) == "lev:4 stepFac:2.000 schedule: 1.000"_expect);
201  CHECK (stepStr( 5) == "lev:5 stepFac:2.000 schedule: 1.000"_expect);
202  CHECK (stepStr( 6) == "lev:6 stepFac:2.000 schedule: 1.000"_expect);
203  CHECK (stepStr( 7) == "lev:7 stepFac:3.000 schedule: 1.500"_expect);
204  CHECK (stepStr( 8) == "lev:8 stepFac:5.000 schedule: 2.500"_expect);
205  CHECK (stepStr( 9) == "lev:9 stepFac:7.000 schedule: 3.500"_expect);
206  CHECK (stepStr(10) == "lev:10 stepFac:8.000 schedule: 4.000"_expect);
207  CHECK (stepStr(11) == "lev:11 stepFac:8.000 schedule: 4.000"_expect);
208  CHECK (stepStr(12) == "lev:12 stepFac:8.000 schedule: 4.000"_expect);
209  CHECK (stepStr(13) == "lev:13 stepFac:9.000 schedule: 4.500"_expect);
210  CHECK (stepStr(14) == "lev:14 stepFac:10.000 schedule: 5.000"_expect);
211  CHECK (stepStr(15) == "lev:15 stepFac:12.000 schedule: 6.000"_expect);
212  CHECK (stepStr(16) == "lev:16 stepFac:12.000 schedule: 6.000"_expect);
213  CHECK (stepStr(17) == "lev:17 stepFac:13.000 schedule: 6.500"_expect);
214  CHECK (stepStr(18) == "lev:18 stepFac:16.000 schedule: 8.000"_expect);
215  CHECK (stepStr(19) == "lev:19 stepFac:16.000 schedule: 8.000"_expect);
216  CHECK (stepStr(20) == "lev:20 stepFac:20.000 schedule:10.000"_expect);
217  CHECK (stepStr(21) == "lev:21 stepFac:22.500 schedule:11.250"_expect);
218  CHECK (stepStr(22) == "lev:22 stepFac:24.167 schedule:12.083"_expect);
219  CHECK (stepStr(23) == "lev:23 stepFac:26.167 schedule:13.083"_expect);
220  CHECK (stepStr(24) == "lev:24 stepFac:28.167 schedule:14.083"_expect);
221  CHECK (stepStr(25) == "lev:25 stepFac:30.867 schedule:15.433"_expect);
222  CHECK (stepStr(26) == "lev:26 stepFac:32.200 schedule:16.100"_expect);
223 
224 
225  // Adapted Schedule with lower stress level and higher concurrency....
226  stressFac = 0.3;
227  concurrency = 6;
228  stepFactors = testLoad.levelScheduleSequence(concurrency).effuse();
229 
230  testSetup.withAdaptedSchedule (stressFac, concurrency);
231  schedule = testSetup.getScheduleSeq().effuse();
232 
233  CHECK (stepStr( 0) == "lev:0 stepFac:0.000 schedule: 0.000"_expect);
234  CHECK (stepStr( 1) == "lev:1 stepFac:0.000 schedule: 0.000"_expect);
235  CHECK (stepStr( 2) == "lev:2 stepFac:0.000 schedule: 0.000"_expect);
236  CHECK (stepStr( 3) == "lev:3 stepFac:2.000 schedule: 3.333"_expect);
237  CHECK (stepStr( 4) == "lev:4 stepFac:2.000 schedule: 3.333"_expect);
238  CHECK (stepStr( 5) == "lev:5 stepFac:2.000 schedule: 3.333"_expect);
239  CHECK (stepStr( 6) == "lev:6 stepFac:2.000 schedule: 3.333"_expect);
240  CHECK (stepStr( 7) == "lev:7 stepFac:3.000 schedule: 5.000"_expect);
241  CHECK (stepStr( 8) == "lev:8 stepFac:5.000 schedule: 8.333"_expect);
242  CHECK (stepStr( 9) == "lev:9 stepFac:7.000 schedule:11.666"_expect);
243  CHECK (stepStr(10) == "lev:10 stepFac:8.000 schedule:13.333"_expect);
244  CHECK (stepStr(11) == "lev:11 stepFac:8.000 schedule:13.333"_expect);
245  CHECK (stepStr(12) == "lev:12 stepFac:8.000 schedule:13.333"_expect);
246  CHECK (stepStr(13) == "lev:13 stepFac:9.000 schedule:15.000"_expect);
247  CHECK (stepStr(14) == "lev:14 stepFac:10.000 schedule:16.666"_expect);
248  CHECK (stepStr(15) == "lev:15 stepFac:12.000 schedule:20.000"_expect);
249  CHECK (stepStr(16) == "lev:16 stepFac:12.000 schedule:20.000"_expect);
250  CHECK (stepStr(17) == "lev:17 stepFac:13.000 schedule:21.666"_expect);
251  CHECK (stepStr(18) == "lev:18 stepFac:16.000 schedule:26.666"_expect);
252  CHECK (stepStr(19) == "lev:19 stepFac:16.000 schedule:26.666"_expect);
253  CHECK (stepStr(20) == "lev:20 stepFac:18.000 schedule:30.000"_expect); // note: here the higher concurrency allows to process all 5 concurrent nodes at once
254  CHECK (stepStr(21) == "lev:21 stepFac:20.500 schedule:34.166"_expect);
255  CHECK (stepStr(22) == "lev:22 stepFac:22.167 schedule:36.944"_expect);
256  CHECK (stepStr(23) == "lev:23 stepFac:23.167 schedule:38.611"_expect);
257  CHECK (stepStr(24) == "lev:24 stepFac:24.167 schedule:40.277"_expect);
258  CHECK (stepStr(25) == "lev:25 stepFac:25.967 schedule:43.277"_expect);
259  CHECK (stepStr(26) == "lev:26 stepFac:27.300 schedule:45.500"_expect);
260 
261  // perform a Test with this low stress level (0.3)
262  double runTime = testSetup.launch_and_wait();
263  double expected = testSetup.getExpectedEndTime();
264  CHECK (fabs (runTime-expected) < 5000);
265  } // Scheduler should be able to follow the expected schedule
266 
267 
268 
269 
273  void
275  {
277  const size_t NODES = 20;
278  const size_t CORES = work::Config::COMPUTATION_CAPACITY;
279  auto LOAD_BASE = 5ms;
280 
281  TestChainLoad testLoad{NODES};
282 
283  BlockFlowAlloc bFlow;
284  EngineObserver watch;
285  Scheduler scheduler{bFlow, watch};
286 
287  auto testSetup =
288  testLoad.setWeight(1)
289  .setupSchedule(scheduler)
290  .withLoadTimeBase(LOAD_BASE)
291  .withJobDeadline(50ms)
292  .withInstrumentation() // activate an instrumentation bracket around each job invocation
293  ;
294  double runTime = testSetup.launch_and_wait();
295 
296  auto stat = testSetup.getInvocationStatistic(); // retrieve observed invocation statistics
297 
298  CHECK (runTime < stat.activeTime);
299  CHECK (isLimited (4900, stat.activeTime/NODES, 8000)); // should be close to 5000
300  CHECK (stat.coveredTime < runTime);
301  CHECK (NODES == stat.activationCnt); // each node activated once
302  CHECK (isLimited (CORES/2, stat.avgConcurrency, CORES)); // should ideally come close to hardware concurrency
303  CHECK (0 == stat.timeAtConc(0));
304  CHECK (0 == stat.timeAtConc(CORES+1));
305  CHECK (runTime/2 < stat.timeAtConc(CORES-1)+stat.timeAtConc(CORES));
306  } // should ideally spend most of the time at highest concurrency levels
307 
308 
309 
310 
312 
331  void
333  {
335 
336  struct Setup : StressRig
337  {
338  uint CONCURRENCY = 4;
339  bool showRuns = true;
340 
341  auto testLoad()
342  { return TestLoad{64}.configureShape_chain_loadBursts(); }
343 
344  auto testSetup (TestLoad& testLoad)
345  {
346  return StressRig::testSetup(testLoad)
347  .withLoadTimeBase(500us);
348  }
349 
350  };
351 
352  auto [stress,delta,time] = StressRig::with<Setup>()
353  .perform<bench::BreakingPoint>();
354  CHECK (delta > 2.5);
355  CHECK (1.15 > stress and stress > 0.85);
356  }
357 
358 
359 
370  void
372  {
374 
375  struct Setup
377  {
378  uint CONCURRENCY = 4;
379  uint REPETITIONS = 50;
380 
381  auto testLoad(Param nodes)
382  {
383  TestLoad testLoad{nodes};
384  return testLoad.configure_isolated_nodes();
385  }
386 
387  auto testSetup (TestLoad& testLoad)
388  {
389  return StressRig::testSetup(testLoad)
390  .withLoadTimeBase(2ms);
391  }
392  };
393 
394  auto results = StressRig::with<Setup>()
395  .perform<bench::ParameterRange> (33,128);
396 
397  auto [socket,gradient,v1,v2,corr,maxDelta,stdev] = bench::linearRegression (results.param, results.time);
398  double avgConc = Setup::avgConcurrency (results);
399 
400 // cout << "───═══───═══───═══───═══───═══───═══───═══───═══───═══───═══───"<<endl;
401 // cout << Setup::renderGnuplot (results) <<endl;
402  cout << "───═══───═══───═══───═══───═══───═══───═══───═══───═══───═══───"<<endl;
403  cout << _Fmt{"Model: %3.2f·p + %3.2f corr=%4.2f Δmax=%4.2f σ=%4.2f ∅concurrency: %3.1f"}
404  % gradient % socket % corr % maxDelta % stdev % avgConc
405  << endl;
406 
407  CHECK (corr > 0.80); // clearly a linearly correlated behaviour
408  CHECK (isLimited (0.4, gradient, 0.7)); // should be slightly above 0.5 (2ms and 4 threads => 0.5ms / Job)
409  CHECK (isLimited (3, socket, 9 )); // we have a spin-up and a shut-down both ~ 2ms plus some further overhead
410 
411  CHECK (avgConc > 3); // should be able to utilise 4 workers (minus the spin-up/shut-down phase)
412  }
413 
414 
415 
425  void
427  {
429  using StressRig = StressTestRig<8>;
430 
431  struct Setup : StressRig
432  {
433  uint CONCURRENCY = 4;
434  bool showRuns = true;
435 
436  auto
437  testLoad()
438  {
439  TestLoad testLoad{256}; // use a pattern of 4-step interleaved linear chains
440  testLoad.seedingRule(testLoad.rule().probability(0.6).maxVal(2))
441  .pruningRule(testLoad.rule().probability(0.44))
442  .weightRule(testLoad.value(1))
443  .setSeed(60);
444  return testLoad;
445  }
446 
447  auto testSetup (TestLoad& testLoad)
448  {
449  return StressRig::testSetup(testLoad)
450  .withLoadTimeBase(5ms);// ◁─────────────── Load 5ms on each Node
451  }
452  };
453  auto [stress,delta,time] = StressRig::with<Setup>()
454  .perform<bench::BreakingPoint>();
455  cout << "Time for 256 Nodes: "<<time<<"ms with stressFactor="<<stress<<endl;
456 
457 
458  /* ========== verify extended stable operation ============== */
459 
460  // Use the same pattern, but extended to 4 times the length;
461  // moreover, this time planning and execution will be interleaved.
462  TestChainLoad<8> testLoad{1024};
463  testLoad.seedingRule(testLoad.rule().probability(0.6).maxVal(2))
464  .pruningRule(testLoad.rule().probability(0.44))
465  .weightRule(testLoad.value(1))
466  .setSeed(60)
467  .buildTopology()
468 // .printTopologyDOT()
469 // .printTopologyStatistics()
470  ;
471  size_t expectedHash = testLoad.getHash();
472 
474  BlockFlowAlloc bFlow;
475  EngineObserver watch;
476  Scheduler scheduler{bFlow, watch};
477 
478  auto testSetup =
479  testLoad.setupSchedule(scheduler)
480  .withLoadTimeBase(5ms)
481  .withJobDeadline(50ms) // ◁───────────────────── deadline is way shorter than overall run time
482  .withChunkSize(32) // ◁───────────────────── planning of the next 32 nodes interleaved with performance
483  .withInstrumentation()
484  .withAdaptedSchedule (1.0, 4); // ◁───────────────────── stress factor 1.0 and 4 workers
485  double runTime = testSetup.launch_and_wait();
486  auto stat = testSetup.getInvocationStatistic();
487  cout << "Extended Scheduler Run: "<<runTime/1e6<<"sec concurrency:"<<stat.avgConcurrency<<endl;
488 
489  CHECK (stat.activationCnt == 1024);
490  CHECK (expectedHash == testLoad.getHash());
491  CHECK (3.2 < stat.avgConcurrency);
492  CHECK (stat.coveredTime < 5 * time*1000);
493  }
494  };
495 
496 
498  LAUNCHER (SchedulerStress_test, "unit engine");
499 
500 
501 
502 }}} // namespace vault::gear::test
const StatKey STAT_NODE
all nodes
Automatically use custom string conversion in C++ stream output.
#define TRANSIENTLY(_OO_)
Macro to simplify capturing assignments.
auto testSetup(TestLoad &testLoad)
(optional) extension point: base configuration of the test ScheduleCtx
Definition: Setup.py:1
Definition: run.hpp:40
Front-end for printf-style string template interpolation.
size_t getHash() const
global hash is the combination of all exit node hashes != 0
Configurable template framework for running Scheduler Stress tests Use to build a custom setup class...
double invoke(uint scaleStep=1)
cause a delay by computational load
TestChainLoad && buildTopology()
Use current configuration and seed to (re)build Node connectivity.
Generate synthetic computation load for Scheduler performance tests.
A test bench to conduct performance measurement series.
A Generator for synthetic Render Jobs for Scheduler load testing.
A front-end for using printf-style formatting.
Abstract Base Class for all testcases.
Definition: run.hpp:53
»Scheduler-Service« : coordinate render activities.
Definition: scheduler.hpp:213
Service for coordination and dispatch of render activities.
#define MARK_TEST_FUN
Macro to mark the current test function in STDOUT.
Simplistic test class runner.
void seedRand()
draw a new random seed from a common nucleus, and re-seed the default-Gen.
Definition: suite.cpp:211
static string firstTok(Arg)
conveniently pick the first token from the argument line
Definition: suite.cpp:233
Tiny helper functions and shortcuts to be used everywhere Consider this header to be effectively incl...
A collection of frequently used helper functions to support unit testing.
TestChainLoad && configureShape_chain_loadBursts()
preconfigured topology: single graph with massive »load bursts«
const StatKey STAT_JOIN
joining node
Statistic computeGraphStatistics()
Operator on TestChainLoad to evaluate current graph connectivity.
Mix-in for setup of a #ParameterRange evaluation to watch the processing of a single load peak...
const StatKey STAT_FORK
forking node
static size_t COMPUTATION_CAPACITY
Nominal »full size« of a pool of concurrent workers.
Definition: work-force.hpp:106
a family of time value like entities and their relationships.
Vault-Layer implementation namespace root.
Collector and aggregator for performance data.
A calibratable CPU load to be invoked from a node job functor.