timing.h
Go to the documentation of this file.
1 //
2 // Copyright 2016 Pixar
3 //
4 // Licensed under the Apache License, Version 2.0 (the "Apache License")
5 // with the following modification; you may not use this file except in
6 // compliance with the Apache License and the following modification to it:
7 // Section 6. Trademarks. is deleted and replaced with:
8 //
9 // 6. Trademarks. This License does not grant permission to use the trade
10 // names, trademarks, service marks, or product names of the Licensor
11 // and its affiliates, except as required to comply with Section 4(c) of
12 // the License and to reproduce the content of the NOTICE file.
13 //
14 // You may obtain a copy of the Apache License at
15 //
16 // http://www.apache.org/licenses/LICENSE-2.0
17 //
18 // Unless required by applicable law or agreed to in writing, software
19 // distributed under the Apache License with the above modification is
20 // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, either express or implied. See the Apache License for the specific
22 // language governing permissions and limitations under the Apache License.
23 //
24 #ifndef PXR_BASE_ARCH_TIMING_H
25 #define PXR_BASE_ARCH_TIMING_H
26 
30 
31 #include "pxr/pxr.h"
32 #include "pxr/base/arch/api.h"
33 #include "pxr/base/arch/defines.h"
34 #include "pxr/base/arch/inttypes.h"
35 
38 
39 #if defined(ARCH_OS_LINUX) && defined(ARCH_CPU_INTEL)
40 #include <x86intrin.h>
41 #elif defined(ARCH_OS_DARWIN)
42 #include <mach/mach_time.h>
43 #elif defined(ARCH_OS_WINDOWS)
44 #include <intrin.h>
45 #endif
46 
47 #include <algorithm>
48 #include <atomic>
49 #include <iterator>
50 #include <numeric>
51 
52 PXR_NAMESPACE_OPEN_SCOPE
53 
61 inline uint64_t
63 {
64 #if defined(ARCH_OS_DARWIN)
65  // On Darwin we'll use mach_absolute_time().
66  return mach_absolute_time();
67 #elif defined(ARCH_CPU_INTEL)
68  // On Intel we'll use the rdtsc instruction.
69  return __rdtsc();
70 #elif defined (ARCH_CPU_ARM)
71  uint64_t result;
72  __asm __volatile("mrs %0, CNTVCT_EL0" : "=&r" (result));
73  return result;
74 #else
75 #error Unknown architecture.
76 #endif
77 }
78 
79 
84 inline uint64_t
86 {
87  uint64_t t;
88 #if defined (ARCH_OS_DARWIN)
89  return ArchGetTickTime();
90 #elif defined (ARCH_CPU_ARM)
91  std::atomic_signal_fence(std::memory_order_seq_cst);
92  asm volatile("mrs %0, cntvct_el0" : "=r"(t));
93  std::atomic_signal_fence(std::memory_order_seq_cst);
94 #elif defined (ARCH_COMPILER_MSVC)
95  _mm_lfence();
96  std::atomic_signal_fence(std::memory_order_seq_cst);
97  t = __rdtsc();
98  _mm_lfence();
99  std::atomic_signal_fence(std::memory_order_seq_cst);
100 #elif defined(ARCH_CPU_INTEL) && \
101  (defined(ARCH_COMPILER_CLANG) || defined(ARCH_COMPILER_GCC))
102  // Prevent reorders by the compiler.
103  std::atomic_signal_fence(std::memory_order_seq_cst);
104  asm volatile(
105  "lfence\n\t"
106  "rdtsc\n\t"
107  "shl $32, %%rdx\n\t"
108  "or %%rdx, %0\n\t"
109  "lfence"
110  : "=a"(t)
111  :
112  // rdtsc writes rdx
113  // shl modifies cc flags
114  : "rdx", "cc");
115 #else
116 #error "Unsupported architecture."
117 #endif
118  return t;
119 }
120 
125 inline uint64_t
127 {
128  uint64_t t;
129 #if defined (ARCH_OS_DARWIN)
130  return ArchGetTickTime();
131 #elif defined (ARCH_CPU_ARM)
132  std::atomic_signal_fence(std::memory_order_seq_cst);
133  asm volatile("mrs %0, cntvct_el0" : "=r"(t));
134  std::atomic_signal_fence(std::memory_order_seq_cst);
135 #elif defined (ARCH_COMPILER_MSVC)
136  std::atomic_signal_fence(std::memory_order_seq_cst);
137  unsigned aux;
138  t = __rdtscp(&aux);
139  _mm_lfence();
140  std::atomic_signal_fence(std::memory_order_seq_cst);
141 #elif defined(ARCH_CPU_INTEL) && \
142  (defined(ARCH_COMPILER_CLANG) || defined(ARCH_COMPILER_GCC))
143  std::atomic_signal_fence(std::memory_order_seq_cst);
144  asm volatile(
145  "rdtscp\n\t"
146  "shl $32, %%rdx\n\t"
147  "or %%rdx, %0\n\t"
148  "lfence"
149  : "=a"(t)
150  :
151  // rdtscp writes rcx & rdx
152  // shl modifies cc flags
153  : "rcx", "rdx", "cc");
154 #else
155 #error "Unsupported architecture."
156 #endif
157  return t;
158 }
159 
160 #if defined (doxygen) || \
161  (!defined(ARCH_OS_DARWIN) && defined(ARCH_CPU_INTEL) && \
162  (defined(ARCH_COMPILER_CLANG) || defined(ARCH_COMPILER_GCC)))
163 
167 {
169  explicit ArchIntervalTimer(bool start=true)
170  : _started(start) {
171  if (_started) {
172  Start();
173  }
174  }
175 
177  void Start() {
178  _started = true;
179  std::atomic_signal_fence(std::memory_order_seq_cst);
180  asm volatile(
181  "lfence\n\t"
182  "rdtsc\n\t"
183  "lfence"
184  : "=a"(_startLow), "=d"(_startHigh) :: );
185  }
186 
188  bool IsStarted() const {
189  return _started;
190  }
191 
193  uint64_t GetStartTicks() const {
194  return (uint64_t(_startHigh) << 32) + _startLow;
195  }
196 
198  uint64_t GetCurrentTicks() {
199  return ArchGetStopTickTime();
200  }
201 
204  uint64_t GetElapsedTicks() {
205  if (!_started) {
206  return 0;
207  }
208  uint32_t stopLow, stopHigh;
209  std::atomic_signal_fence(std::memory_order_seq_cst);
210  asm volatile(
211  "rdtscp\n\t"
212  "lfence"
213  : "=a"(stopLow), "=d"(stopHigh)
214  :
215  // rdtscp writes rcx
216  : "rcx");
217  return (uint64_t(stopHigh - _startHigh) << 32) + (stopLow - _startLow);
218  }
219 private:
220  bool _started = false;
221  uint32_t _startLow = 0, _startHigh = 0;
222 };
223 
224 #else
225 
226 struct ArchIntervalTimer
227 {
228  explicit ArchIntervalTimer(bool start=true)
229  : _started(start) {
230  if (_started) {
231  _startTicks = ArchGetStartTickTime();
232  }
233  }
234 
235  void Start() {
236  _started = true;
237  _startTicks = ArchGetStartTickTime();
238  }
239 
240  bool IsStarted() const {
241  return _started;
242  }
243 
244  uint64_t GetStartTicks() const {
245  return _startTicks;
246  }
247 
248  uint64_t GetCurrentTicks() {
249  return ArchGetStopTickTime();
250  }
251 
252  uint64_t GetElapsedTicks() {
253  if (!_started) {
254  return 0;
255  }
256  return ArchGetStopTickTime() - _startTicks;
257  }
258 private:
259  bool _started = false;
260  uint64_t _startTicks;
261 };
262 
263 #endif
264 
275 ARCH_API
276 uint64_t ArchGetTickQuantum();
277 
280 ARCH_API
282 
283 
296 ARCH_API
297 int64_t ArchTicksToNanoseconds(uint64_t nTicks);
298 
301 ARCH_API
302 double ArchTicksToSeconds(uint64_t nTicks);
303 
306 ARCH_API
307 uint64_t ArchSecondsToTicks(double seconds);
308 
311 ARCH_API
313 
314 ARCH_API
315 uint64_t
316 Arch_MeasureExecutionTime(uint64_t maxMicroseconds, bool *reachedConsensus,
317  void const *m, uint64_t (*callM)(void const *, int));
318 
328 template <class Fn>
329 uint64_t
331  Fn const &fn,
332  uint64_t maxMicroSeconds = 10000, /* 10 msec */
333  bool *reachedConsensus = nullptr)
334 {
335  auto measureN = [&fn](int nTimes) -> uint64_t {
336  ArchIntervalTimer iTimer;
337  for (int i = nTimes; i--; ) {
338  std::atomic_signal_fence(std::memory_order_seq_cst);
339  (void)fn();
340  std::atomic_signal_fence(std::memory_order_seq_cst);
341  }
342  return iTimer.GetElapsedTicks();
343  };
344 
345  using MeasureNType = decltype(measureN);
346 
347  return Arch_MeasureExecutionTime(
348  maxMicroSeconds, reachedConsensus,
349  static_cast<void const *>(&measureN),
350  [](void const *mN, int nTimes) {
351  return (*static_cast<MeasureNType const *>(mN))(nTimes);
352  });
353 }
354 
356 
357 PXR_NAMESPACE_CLOSE_SCOPE
358 
359 #endif // PXR_BASE_ARCH_TIMING_H
ARCH_API double ArchTicksToSeconds(uint64_t nTicks)
Convert a duration measured in "ticks", as returned by ArchGetTickTime(), to seconds.
uint64_t ArchGetTickTime()
Return the current time in system-dependent units.
Definition: timing.h:62
ARCH_API double ArchGetNanosecondsPerTick()
Get nanoseconds per tick.
ARCH_API int64_t ArchTicksToNanoseconds(uint64_t nTicks)
Convert a duration measured in "ticks", as returned by ArchGetTickTime(), to nanoseconds.
ARCH_API uint64_t ArchGetIntervalTimerTickOverhead()
Return the ticks taken to record an interval of time with ArchIntervalTimer, as measured at startup t...
uint64_t ArchMeasureExecutionTime(Fn const &fn, uint64_t maxMicroSeconds=10000, bool *reachedConsensus=nullptr)
Run fn repeatedly attempting to determine a consensus fastest execution time with low noise,...
Definition: timing.h:330
uint64_t GetCurrentTicks()
Read and return the current time.
Definition: timing.h:198
bool IsStarted() const
Return true if this timer is started.
Definition: timing.h:188
void Start()
Start the timer, or reset the start time if it has already been started.
Definition: timing.h:177
uint64_t GetStartTicks() const
Return this timer's start time, or 0 if it hasn't been started.
Definition: timing.h:193
ARCH_API uint64_t ArchSecondsToTicks(double seconds)
Convert a duration in seconds to "ticks", as returned by ArchGetTickTime().
A simple timer class for measuring an interval of time using the ArchTickTimer facilities.
Definition: timing.h:166
Define integral types.
uint64_t ArchGetStartTickTime()
Get a "start" tick time for measuring an interval of time, followed by a later call to ArchGetStopTic...
Definition: timing.h:85
uint64_t GetElapsedTicks()
Read the current time and return the difference between it and the start time.
Definition: timing.h:204
uint64_t ArchGetStopTickTime()
Get a "stop" tick time for measuring an interval of time.
Definition: timing.h:126
ArchIntervalTimer(bool start=true)
Construct a timer and start timing if start is true.
Definition: timing.h:169
ARCH_API uint64_t ArchGetTickQuantum()
Return the tick time resolution.